hexrays.hpp
Go to the documentation of this file.
1 /*
2  * Hex-Rays Decompiler project
3  * Copyright (c) 1990-2019 Hex-Rays
4  * ALL RIGHTS RESERVED.
5  *
6  * There are 2 representations of the binary code in the decompiler:
7  * - microcode: processor instructions are translated into it and then
8  * the decompiler optimizes and transforms it
9  * - ctree: ctree is built from the optimized microcode and represents
10  * AST-like tree with C statements and expressions. It can
11  * be printed as C code.
12  *
13  * Microcode is represented by the following classes:
14  * mbl_array_t keeps general info about the decompiled code and
15  * array of basic blocks. usually mbl_array_t is named 'mba'
16  * mblock_t a basic block. includes list of instructions
17  * minsn_t an instruction. contains 3 operands: left, right, and
18  * destination
19  * mop_t an operand. depending on its type may hold various info
20  * like a number, register, stack variable, etc.
21  * mlist_t list of memory or register locations; can hold vast areas
22  * of memory and multiple registers. this class is used
23  * very extensively in the decompiler. it may represent
24  * list of locations accessed by an instruction or even
25  * an entire basic block. it is also used as argument of
26  * many functions. for example, there is a function
27  * that searches for an instruction that refers to a mlist_t.
28  * See http://www.hexblog.com/?p=1232 for some pictures
29  *
30  * Ctree is represented by:
31  * cfunc_t keeps general info about the decompiled code, including a
32  * pointer to mbl_array_t. deleting cfunc_t will delete
33  * mbl_array_t too (however, decompiler returns cfuncptr_t,
34  * which is a reference counting object and deletes the
35  * underlying function as soon as all references to it go
36  * out of scope). cfunc_t has 'body', which represents the
37  * decompiled function body as cinsn_t.
38  * cinsn_t a C statement. can be a compound statement or any other
39  * legal C statements (like if, for, while, return,
40  * expression-statement, etc). depending on the statement
41  * type has pointers to additional info. for example, the
42  * 'if' statement has poiner to cif_t, which holds the
43  * 'if' condition, 'then' branch, and optionally 'else'
44  * branch. Please note that despite of the name cinsn_t
45  * we say "statements", not "instructions". For us
46  * instructions are part of microcode, not ctree.
47  * cexpr_t a C expression. is used as part of a C statement, when
48  * necessary. cexpr_t has 'type' field, which keeps the
49  * expression type.
50  * citem_t a base class for cinsn_t and cexpr_t, holds common info
51  * like the address, label, and opcode.
52  * cnumber_t a constant 64-bit number. in addition to its value also
53  * holds information how to represent it: decimal, hex, or
54  * as a symbolic constant (enum member). please note that
55  * numbers are represented by another class (mnumber_t)
56  * in microcode.
57  * See http://www.hexblog.com/?p=107 for some pictures and more details
58  *
59  * Both microcode and ctree use the following class:
60  * lvar_t a local variable. may represent a stack or register
61  * variable. a variable has a name, type, location, etc.
62  * the list of variables is stored in mba->vars.
63  * lvar_locator_t holds a variable location (vdloc_t) and its definition
64  * address.
65  * vdloc_t describes a variable location, like a register number,
66  * a stack offset, or, in complex cases, can be a mix of
67  * register and stack locations. very similar to argloc_t,
68  * which is used in ida. the differences between argloc_t
69  * and vdloc_t are:
70  * - vdloc_t never uses ARGLOC_REG2
71  * - vdloc_t uses micro register numbers instead of
72  * processor register numbers
73  * - the stack offsets are never negative in vdloc_t, while
74  * in argloc_t there can be negative offsets
75  *
76  * The above are the most important classes in this header file. There are
77  * many auxiliary classes, please see their definitions below.
78  *
79  */
80 
81 #ifndef __HEXRAYS_HPP
82 #define __HEXRAYS_HPP
83 
84 #include <pro.h>
85 #include <fpro.h>
86 #include <ida.hpp>
87 #include <idp.hpp>
88 #include <gdl.hpp>
89 #include <ieee.h>
90 #include <loader.hpp>
91 #include <kernwin.hpp>
92 #include <typeinf.hpp>
93 #include <set>
94 #include <map>
95 #include <deque>
96 #include <queue>
97 #include <algorithm>
98 
99 /*
100  * We can imagine a virtual micro machine that executes microcode.
101  * This virtual micro machine has many registers.
102  * Each register is 8 bits wide. During translation of processor
103  * instructions into microcode, multibyte processor registers are mapped
104  * to adjacent microregisters. Processor condition codes are also
105  * represented by microregisters. The microregisters are grouped
106  * into following groups:
107  * 0..7: condition codes
108  * 8..n: all processor registers (including fpu registers, if necessary)
109  * this range may also include temporary registers used during
110  * the initial microcode generation
111  * n.. : so called kernel registers; they are used during optimization
112  * see is_kreg()
113  *
114  * Each micro-instruction (minsn_t) has zero to three operands.
115  * Some of the possible operands types are:
116  * - immediate value
117  * - register
118  * - memory reference
119  * - result of another micro-instruction
120  *
121  * The operands (mop_t) are l (left), r (right), d (destination).
122  * An example of a microinstruction:
123  * add r0.4, #8.4, r2.4
124  * which means 'add constant 8 to r0 and place the result into r2'.
125  * where the left operand is 'r0', its size is 4 bytes (r0.4)
126  * the right operand is a constant '8', its size is 4 bytes (#8.4)
127  * the destination operand is 'r2', its size is 4 bytes (r2.4)
128  * 'd' is almost always the destination but there are exceptions.
129  * See mcode_modifies_d(). For example, stx does not modify 'd'.
130  * See the opcode map below for the list of microinstructions and their
131  * operands. Most instructions are very simple and do not need
132  * detailed explanations. There are no side effects in microinstructions.
133  *
134  * Each operand has a size specifier. The following sizes can be used in
135  * practically all contexts: 1, 2, 4, 8, 16 bytes. Floating types may have
136  * other sizes. Functions may return objects of arbitrary size, as well as
137  * operations upon UDT's (user-defined types, i.e. are structs and unions).
138  *
139  * Memory is considered to consist of several segments.
140  * A memory reference is made using a (selector, offset) pair.
141  * A selector is always 2 bytes long. An offset can be 4 or 8 bytes long,
142  * depending on the bitness of the target processor.
143  * Currently the selectors are not used very much. The decompiler tries to
144  * resolve (selector, offset) pairs into direct memory references at each
145  * opportunity and then operates on mop_v operands. In other words,
146  * while the decompiler can handle segmented memory models, internally
147  * it still uses simple linear addresses.
148  *
149  * The following memory regions are recognized:
150  * - GLBLOW global memory: low part, everything below the stack
151  * - LVARS stack: local variables
152  * - RETADDR stack: return address
153  * - SHADOW stack: shadow arguments
154  * - ARGS stack: regular stack arguments
155  * - GLBHIGH global memory: high part, everything above the stack
156  * Any stack region may be empty. Objects residing in one memory region
157  * are considered to be completely distinct from objects in other regions.
158  * We allocate the stack frame in some memory region, which is not
159  * allocated for any purposes in IDA. This permits us to use linear addresses
160  * for all memory references, including the stack frame.
161  *
162  * If the operand size is bigger than 1 then the register
163  * operand references a block of registers. For example:
164  *
165  * ldc #1.4, r8.4
166  *
167  * loads the constant 1 to registers 8, 9, 10, 11:
168  *
169  * #1 -> r8
170  * #0 -> r9
171  * #0 -> r10
172  * #0 -> r11
173  *
174  * This example uses little-endian byte ordering.
175  * Big-endian byte ordering is supported too. Registers are always little-
176  * endian, regardless of the memory endianness.
177  *
178  * Each instruction has 'next' and 'prev' fields that are used to form
179  * a doubly linked list. Such lists are present for each basic block (mblock_t).
180  * Basic blocks have other attributes, including:
181  * - dead_at_start: list of dead locations at the block start
182  * - maybuse: list of locations the block may use
183  * - maybdef: list of locations the block may define (or spoil)
184  * - mustbuse: list of locations the block will certainly use
185  * - mustbdef: list of locations the block will certainly define
186  * - dnu: list of locations the block will certainly define
187  * but will not use (registers or non-aliasable stkack vars)
188  *
189  * These lists are represented by the mlist_t class. It consists of 2 parts:
190  * - rlist_t: list of microregisters (possibly including virtual stack locations)
191  * - ivlset_t: list of memory locations represented as intervals
192  * we use linear addresses in this list.
193  * The mlist_t class is used quite often. For example, to find what an operand
194  * can spoil, we build its 'maybe-use' list. Then we can find out if this list
195  * is accessed using the is_accessed() or is_accessed_globally() functions.
196  *
197  * All basic blocks of the decompiled function constitute an array called
198  * mbl_array_t (array of microblocks). This is a huge class that has too
199  * many fields to describe here (some of the fields are not visible in the sdk)
200  * The most importants ones are:
201  * - stack frame: frregs, stacksize, etc
202  * - memory: aliased, restricted, and other ranges
203  * - type: type of the current function, its arguments (argidx) and
204  * local variables (vars)
205  * - natural: array of pointers to basic blocks. the basic blocks
206  * are also accessible as a doubly linked list starting from 'blocks'.
207  * - bg: control flow graph. the graph gives access to the use-def
208  * chains that describe data dependencies between basic blocks
209  *
210  */
211 
212 #ifdef __NT__
213 #pragma warning(push)
214 #pragma warning(disable:4062) // enumerator 'x' in switch of enum 'y' is not handled
215 #pragma warning(disable:4265) // virtual functions without virtual destructor
216 #endif
217 
218 #define hexapi ///< Public functions are marked with this keyword
219 
220 // Warning suppressions for PVS Studio:
221 //-V:2:654 The condition '2' of loop is always true.
222 //-V::719 The switch statement does not cover all values
223 //-V:verify:678
224 //-V:chain_keeper_t:690 copy ctr will be generated
225 //-V:add_block:656 call to the same function
226 //-V:add:792 The 'add' function located to the right of the operator '|' will be called regardless of the value of the left operand
227 //-V:sub:792 The 'sub' function located to the right of the operator '|' will be called regardless of the value of the left operand
228 //-V:intersect:792 The 'intersect' function located to the right of the operator '|' will be called regardless of the value of the left operand
229 // Lint suppressions:
230 //lint -sem(mop_t::_make_cases, custodial(1))
231 //lint -sem(mop_t::_make_pair, custodial(1))
232 //lint -sem(mop_t::_make_callinfo, custodial(1))
233 //lint -sem(mop_t::_make_insn, custodial(1))
234 //lint -sem(mop_t::make_insn, custodial(1))
235 
236 // Microcode level forward definitions:
237 class mop_t; // microinstruction operand
238 class mop_pair_t; // pair of operands. example, :(edx.4,eax.4).8
239 class mop_addr_t; // address of an operand. example: &global_var
240 class mcallinfo_t; // function call info. example: <cdecl:"int x" #10.4>.8
241 class mcases_t; // jump table cases. example: {0 => 12, 1 => 13}
242 class minsn_t; // microinstruction
243 class mblock_t; // basic block
244 class mbl_array_t; // array of blocks, represents microcode for a function
245 class codegen_t; // helper class to generate the initial microcode
246 class mbl_graph_t; // control graph of microcode
247 struct vdui_t; // widget representing the pseudocode window
248 struct hexrays_failure_t; // decompilation failure object, is thrown by exceptions
249 struct mba_stats_t; // statistics about decompilation of a function
250 struct mlist_t; // list of memory and register locations
251 struct voff_t; // value offset (microregister number or stack offset)
252 typedef std::set<voff_t> voff_set_t;
253 struct vivl_t; // value interval (register or stack range)
254 typedef int mreg_t; ///< Micro register
255 
256 // Ctree level forward definitions:
257 struct cfunc_t; // result of decompilation, the highest level object
258 struct citem_t; // base class for cexpr_t and cinsn_t
259 struct cexpr_t; // C expression
260 struct cinsn_t; // C statement
261 struct cblock_t; // C statement block (sequence of statements)
262 struct cswitch_t; // C switch statement
263 struct carg_t; // call argument
264 struct carglist_t; // vector of call arguments
265 
266 typedef std::set<ea_t> easet_t;
267 typedef std::set<minsn_t *> minsn_ptr_set_t;
268 typedef std::set<qstring> strings_t;
269 typedef qvector<minsn_t*> minsnptrs_t;
270 typedef qvector<mop_t*> mopptrs_t;
271 typedef qvector<mop_t> mopvec_t;
272 typedef qvector<uint64> uint64vec_t;
273 typedef qvector<mreg_t> mregvec_t;
274 
275 // Function frames must be smaller than this value, otherwise
276 // the decompiler will bail out with MERR_HUGESTACK
277 #define MAX_SUPPORTED_STACK_SIZE 0x100000 // 1MB
278 
279 //-------------------------------------------------------------------------
280 // Original version of macro DEFINE_MEMORY_ALLOCATION_FUNCS
281 // (uses decompiler-specific memory allocation functions)
282 #if defined(SWIG)
283  #define HEXRAYS_MEMORY_ALLOCATION_FUNCS()
284 #elif defined(SWIGPYTHON)
285  #define HEXRAYS_MEMORY_ALLOCATION_FUNCS DEFINE_MEMORY_ALLOCATION_FUNCS
286 #else
287  #define HEXRAYS_PLACEMENT_DELETE void operator delete(void *, void *) {}
288  #define HEXRAYS_MEMORY_ALLOCATION_FUNCS() \
289  void *operator new (size_t _s) { return hexrays_alloc(_s); } \
290  void *operator new[](size_t _s) { return hexrays_alloc(_s); } \
291  void *operator new(size_t /*size*/, void *_v) { return _v; } \
292  void operator delete (void *_blk) { hexrays_free(_blk); } \
293  void operator delete[](void *_blk) { hexrays_free(_blk); } \
294  HEXRAYS_PLACEMENT_DELETE
295 #endif
296 
297 void *hexapi hexrays_alloc(size_t size);
298 void hexapi hexrays_free(void *ptr);
299 
300 typedef uint64 uvlr_t;
301 typedef int64 svlr_t;
302 enum { MAX_VLR_SIZE = sizeof(uvlr_t) };
303 const uvlr_t MAX_VALUE = uvlr_t(-1);
304 const svlr_t MAX_SVALUE = svlr_t(uvlr_t(-1) >> 1);
305 const svlr_t MIN_SVALUE = ~MAX_SVALUE;
306 
307 enum cmpop_t
308 { // the order of comparisons is the same as in microcode opcodes
309  CMP_NZ,
310  CMP_Z,
311  CMP_AE,
312  CMP_B,
313  CMP_A,
314  CMP_BE,
315  CMP_GT,
316  CMP_GE,
317  CMP_LT,
318  CMP_LE,
319 };
320 
321 //-------------------------------------------------------------------------
322 // value-range class to keep possible operand value(s).
323 class valrng_t
324 {
325 protected:
326  int flags;
327 #define VLR_TYPE 0x0F // valrng_t type
328 #define VLR_NONE 0x00 // no values
329 #define VLR_ALL 0x01 // all values
330 #define VLR_IVLS 0x02 // union of disjoint intervals
331 #define VLR_RANGE 0x03 // strided range
332 #define VLR_SRANGE 0x04 // strided range with signed bound
333 #define VLR_BITS 0x05 // known bits
334 #define VLR_SECT 0x06 // intersection of sub-ranges
335  // each sub-range should be simple or union
336 #define VLR_UNION 0x07 // union of sub-ranges
337  // each sub-range should be simple or
338  // intersection
339 #define VLR_UNK 0x08 // unknown value (like 'null' in SQL)
340  int size; // operand size: 1..8 bytes
341  // all values must fall within the size
342  union
343  {
344  struct // VLR_RANGE/VLR_SRANGE
345  { // values that are between VALUE and LIMIT
346  // and conform to: value+stride*N
347  uvlr_t value; // initial value
348  uvlr_t limit; // final value
349  // we adjust LIMIT to be on the STRIDE lattice
350  svlr_t stride; // stride between values
351  };
352  struct // VLR_BITS
353  {
354  uvlr_t zeroes; // bits known to be clear
355  uvlr_t ones; // bits known to be set
356  };
357  char reserved[sizeof(qvector<int>)];
358  // VLR_IVLS/VLR_SECT/VLR_UNION
359  };
360  void hexapi clear(void);
361  void hexapi copy(const valrng_t &r);
362  valrng_t &hexapi assign(const valrng_t &r);
363 
364 public:
365  explicit valrng_t(int size_ = MAX_VLR_SIZE)
366  : flags(VLR_NONE), size(size_), value(0), limit(0), stride(0) {}
367  valrng_t(const valrng_t &r) { copy(r); }
368  ~valrng_t(void) { clear(); }
369  valrng_t &operator=(const valrng_t &r) { return assign(r); }
370  void swap(valrng_t &r) { qswap(*this, r); }
371  DECLARE_COMPARISONS(valrng_t);
372  DEFINE_MEMORY_ALLOCATION_FUNCS()
373 
374  void set_none(void) { clear(); }
375  void set_all(void) { clear(); flags = VLR_ALL; }
376  void set_unk(void) { clear(); flags = VLR_UNK; }
377  void hexapi set_eq(uvlr_t v);
378  void hexapi set_cmp(cmpop_t cmp, uvlr_t _value);
379 
380  // reduce size
381  // it takes the low part of size NEW_SIZE
382  // it returns "true" if size is changed successfully.
383  // e.g.: valrng_t vr(2); vr.set_eq(0x1234);
384  // vr.reduce_size(1);
385  // uvlr_t v; vr.cvt_to_single_value(&v);
386  // assert(v == 0x34);
387  bool hexapi reduce_size(int new_size);
388 
389  // Perform intersection or union or inversion.
390  // \return did we change something in THIS?
391  bool hexapi intersect_with(const valrng_t &r);
392  bool hexapi unite_with(const valrng_t &r);
393  void hexapi inverse(); // works for VLR_IVLS only
394 
395  bool empty(void) const { return flags == VLR_NONE; }
396  bool all_values(void) const { return flags == VLR_ALL; }
397  bool is_unknown(void) const { return flags == VLR_UNK; }
398  bool hexapi has(uvlr_t v) const;
399 
400  void hexapi print(qstring *vout) const;
401  const char *hexapi dstr(void) const;
402 
403  bool hexapi cvt_to_single_value(uvlr_t *v) const;
404  bool hexapi cvt_to_cmp(cmpop_t *cmp, uvlr_t *val, bool strict) const;
405 
406  int get_size() const { return size; }
407  static uvlr_t max_value(int size_)
408  {
409  return size_ == MAX_VLR_SIZE
410  ? MAX_VALUE
411  : (uvlr_t(1) << (size_ * 8)) - 1;
412  }
413  static uvlr_t min_svalue(int size_)
414  {
415  return size_ == MAX_VLR_SIZE
416  ? MIN_SVALUE
417  : (uvlr_t(1) << (size_ * 8 - 1));
418  }
419  static uvlr_t max_svalue(int size_)
420  {
421  return size_ == MAX_VLR_SIZE
422  ? MAX_SVALUE
423  : (uvlr_t(1) << (size_ * 8 - 1)) - 1;
424  }
425  uvlr_t max_value() const { return max_value(size); }
426  uvlr_t min_svalue() const { return min_svalue(size); }
427  uvlr_t max_svalue() const { return max_svalue(size); }
428 };
429 DECLARE_TYPE_AS_MOVABLE(valrng_t);
430 
431 //-------------------------------------------------------------------------
432 // possible memory and register access types.
433 enum access_type_t
434 {
435  NO_ACCESS = 0,
436  WRITE_ACCESS = 1,
437  READ_ACCESS = 2,
438  RW_ACCESS = WRITE_ACCESS | READ_ACCESS,
439 };
440 
441 // Are we looking for 'must access' or 'may access' information?
442 // 'must access' means that the code will always access the specified location(s)
443 // 'may access' means that the code may in some cases access the specified location(s)
444 // Example: ldx cs.2, r0.4, r1.4
445 // MUST_ACCESS: r0.4 and r1.4, usually displayed as r0.8 because r0 and r1 are adjacent
446 // MAY_ACCESS: r0.4 and r1.4, and all aliasable memory, because
447 // ldx may access any part of the aliasable memory
448 typedef int maymust_t;
449 const maymust_t
450  // One of the following two bits should be specified:
451  MUST_ACCESS = 0x00, // access information we can count on
452  MAY_ACCESS = 0x01, // access information we should take into account
453  // Optionally combined with the following bits:
454  MAYMUST_ACCESS_MASK = 0x01,
455 
456  ONE_ACCESS_TYPE = 0x20, // for find_first_use():
457  // use only the specified maymust access type
458  // (by default it inverts the access type for def-lists)
459  INCLUDE_SPOILED_REGS = 0x40, // for build_def_list() with MUST_ACCESS:
460  // include spoiled registers in the list
461  EXCLUDE_PASS_REGS = 0x80, // for build_def_list() with MAY_ACCESS:
462  // exclude pass_regs from the list
463  FULL_XDSU = 0x100, // for build_def_list():
464  // if xds/xdu source and targets are the same
465  // treat it as if xdsu redefines the entire destination
466  WITH_ASSERTS = 0x200, // for find_first_use():
467  // do not ignore assertions
468  EXCLUDE_VOLATILE = 0x400, // for build_def_list():
469  // exclude volatile memory from the list
470  INCLUDE_UNUSED_SRC = 0x800, // for build_use_list():
471  // do not exclude unused source bytes for m_and/m_or insns
472  INCLUDE_DEAD_RETREGS = 0x1000, // for build_def_list():
473  // include dead returned registers in the list
474  INCLUDE_RESTRICTED = 0x2000,// for MAY_ACCESS: include restricted memory
475  CALL_SPOILS_ONLY_ARGS = 0x4000;// for build_def_list() & MAY_ACCESS:
476  // do not include global memory into the
477  // spoiled list of a call
478 
479 inline THREAD_SAFE bool is_may_access(maymust_t maymust)
480 {
481  return (maymust & MAYMUST_ACCESS_MASK) != MUST_ACCESS;
482 }
483 
484 //-------------------------------------------------------------------------
485 /// \defgroup MERR_ Microcode error codes
486 //@{
488 {
489  MERR_OK = 0, ///< ok
490  MERR_BLOCK = 1, ///< no error, switch to new block
491  MERR_INTERR = -1, ///< internal error
492  MERR_INSN = -2, ///< cannot convert to microcode
493  MERR_MEM = -3, ///< not enough memory
494  MERR_BADBLK = -4, ///< bad block found
495  MERR_BADSP = -5, ///< positive sp value has been found
496  MERR_PROLOG = -6, ///< prolog analysis failed
497  MERR_SWITCH = -7, ///< wrong switch idiom
498  MERR_EXCEPTION = -8, ///< exception analysis failed
499  MERR_HUGESTACK = -9, ///< stack frame is too big
500  MERR_LVARS = -10, ///< local variable allocation failed
501  MERR_BITNESS = -11, ///< only 32/16bit functions can be decompiled
502  MERR_BADCALL = -12, ///< could not determine call arguments
503  MERR_BADFRAME = -13, ///< function frame is wrong
504  MERR_UNKTYPE = -14, ///< undefined type %s (currently unused error code)
505  MERR_BADIDB = -15, ///< inconsistent database information
506  MERR_SIZEOF = -16, ///< wrong basic type sizes in compiler settings
507  MERR_REDO = -17, ///< redecompilation has been requested
508  MERR_CANCELED = -18, ///< decompilation has been cancelled
509  MERR_RECDEPTH = -19, ///< max recursion depth reached during lvar allocation
510  MERR_OVERLAP = -20, ///< variables would overlap: %s
511  MERR_PARTINIT = -21, ///< partially initialized variable %s
512  MERR_COMPLEX = -22, ///< too complex function
513  MERR_LICENSE = -23, ///< no license available
514  MERR_ONLY32 = -24, ///< only 32-bit functions can be decompiled for the current database
515  MERR_ONLY64 = -25, ///< only 64-bit functions can be decompiled for the current database
516  MERR_BUSY = -26, ///< already decompiling a function
517  MERR_FARPTR = -27, ///< far memory model is supported only for pc
518  MERR_EXTERN = -28, ///< special segments cannot be decompiled
519  MERR_FUNCSIZE = -29, ///< too big function
520  MERR_BADRANGES = -30, ///< bad input ranges
521  MERR_STOP = -31, ///< no error, stop the analysis
522  MERR_MAX_ERR = 31,
523  MERR_LOOP = -32, ///< internal code: redo last loop (never reported)
524 };
525 //@}
526 
527 /// Get textual description of an error code
528 /// \param out the output buffer for the error description
529 /// \param code \ref MERR_
530 /// \param mba the microcode array
531 /// \return the error address
532 
533 ea_t hexapi get_merror_desc(qstring *out, merror_t code, mbl_array_t *mba);
534 
535 //-------------------------------------------------------------------------
536 // List of microinstruction opcodes.
537 // The order of setX and jX insns is important, it is used in the code.
538 
539 // Instructions marked with *F may have the FPINSN bit set and operate on fp values
540 // Instructions marked with +F must have the FPINSN bit set. They always operate on fp values
541 // Other instructions do not operate on fp values.
542 
543 enum mcode_t
544 {
545  m_nop = 0x00, // nop // no operation
546  m_stx = 0x01, // stx l, {r=sel, d=off} // store register to memory *F
547  m_ldx = 0x02, // ldx {l=sel,r=off}, d // load register from memory *F
548  m_ldc = 0x03, // ldc l=const, d // load constant
549  m_mov = 0x04, // mov l, d // move *F
550  m_neg = 0x05, // neg l, d // negate
551  m_lnot = 0x06, // lnot l, d // logical not
552  m_bnot = 0x07, // bnot l, d // bitwise not
553  m_xds = 0x08, // xds l, d // extend (signed)
554  m_xdu = 0x09, // xdu l, d // extend (unsigned)
555  m_low = 0x0A, // low l, d // take low part
556  m_high = 0x0B, // high l, d // take high part
557  m_add = 0x0C, // add l, r, d // l + r -> dst
558  m_sub = 0x0D, // sub l, r, d // l - r -> dst
559  m_mul = 0x0E, // mul l, r, d // l * r -> dst
560  m_udiv = 0x0F, // udiv l, r, d // l / r -> dst
561  m_sdiv = 0x10, // sdiv l, r, d // l / r -> dst
562  m_umod = 0x11, // umod l, r, d // l % r -> dst
563  m_smod = 0x12, // smod l, r, d // l % r -> dst
564  m_or = 0x13, // or l, r, d // bitwise or
565  m_and = 0x14, // and l, r, d // bitwise and
566  m_xor = 0x15, // xor l, r, d // bitwise xor
567  m_shl = 0x16, // shl l, r, d // shift logical left
568  m_shr = 0x17, // shr l, r, d // shift logical right
569  m_sar = 0x18, // sar l, r, d // shift arithmetic right
570  m_cfadd = 0x19, // cfadd l, r, d=carry // calculate carry bit of (l+r)
571  m_ofadd = 0x1A, // ofadd l, r, d=overf // calculate overflow bit of (l+r)
572  m_cfshl = 0x1B, // cfshl l, r, d=carry // calculate carry bit of (l<<r)
573  m_cfshr = 0x1C, // cfshr l, r, d=carry // calculate carry bit of (l>>r)
574  m_sets = 0x1D, // sets l, d=byte SF=1 Sign
575  m_seto = 0x1E, // seto l, r, d=byte OF=1 Overflow of (l-r)
576  m_setp = 0x1F, // setp l, r, d=byte PF=1 Unordered/Parity *F
577  m_setnz = 0x20, // setnz l, r, d=byte ZF=0 Not Equal *F
578  m_setz = 0x21, // setz l, r, d=byte ZF=1 Equal *F
579  m_setae = 0x22, // setae l, r, d=byte CF=0 Above or Equal *F
580  m_setb = 0x23, // setb l, r, d=byte CF=1 Below *F
581  m_seta = 0x24, // seta l, r, d=byte CF=0 & ZF=0 Above *F
582  m_setbe = 0x25, // setbe l, r, d=byte CF=1 | ZF=1 Below or Equal *F
583  m_setg = 0x26, // setg l, r, d=byte SF=OF & ZF=0 Greater
584  m_setge = 0x27, // setge l, r, d=byte SF=OF Greater or Equal
585  m_setl = 0x28, // setl l, r, d=byte SF!=OF Less
586  m_setle = 0x29, // setle l, r, d=byte SF!=OF | ZF=1 Less or Equal
587  m_jcnd = 0x2A, // jcnd l, d // d is mop_v or mop_b
588  m_jnz = 0x2B, // jnz l, r, d // ZF=0 Not Equal *F
589  m_jz = 0x2C, // jz l, r, d // ZF=1 Equal *F
590  m_jae = 0x2D, // jae l, r, d // CF=0 Above or Equal *F
591  m_jb = 0x2E, // jb l, r, d // CF=1 Below *F
592  m_ja = 0x2F, // ja l, r, d // CF=0 & ZF=0 Above *F
593  m_jbe = 0x30, // jbe l, r, d // CF=1 | ZF=1 Below or Equal *F
594  m_jg = 0x31, // jg l, r, d // SF=OF & ZF=0 Greater
595  m_jge = 0x32, // jge l, r, d // SF=OF Greater or Equal
596  m_jl = 0x33, // jl l, r, d // SF!=OF Less
597  m_jle = 0x34, // jle l, r, d // SF!=OF | ZF=1 Less or Equal
598  m_jtbl = 0x35, // jtbl l, r=mcases // Table jump
599  m_ijmp = 0x36, // ijmp {r=sel, d=off} // indirect unconditional jump
600  m_goto = 0x37, // goto l // l is mop_v or mop_b
601  m_call = 0x38, // call l d // l is mop_v or mop_b or mop_h
602  m_icall = 0x39, // icall {l=sel, r=off} d // indirect call
603  m_ret = 0x3A, // ret
604  m_push = 0x3B, // push l
605  m_pop = 0x3C, // pop d
606  m_und = 0x3D, // und d // undefine
607  m_ext = 0x3E, // ext in1, in2, out1 // external insn, not microcode *F
608  m_f2i = 0x3F, // f2i l, d int(l) => d; convert fp -> integer +F
609  m_f2u = 0x40, // f2u l, d uint(l)=> d; convert fp -> uinteger +F
610  m_i2f = 0x41, // i2f l, d fp(l) => d; convert integer -> fp e +F
611  m_u2f = 0x42, // i2f l, d fp(l) => d; convert uinteger -> fp +F
612  m_f2f = 0x43, // f2f l, d l => d; change fp precision +F
613  m_fneg = 0x44, // fneg l, d -l => d; change sign +F
614  m_fadd = 0x45, // fadd l, r, d l + r => d; add +F
615  m_fsub = 0x46, // fsub l, r, d l - r => d; subtract +F
616  m_fmul = 0x47, // fmul l, r, d l * r => d; multiply +F
617  m_fdiv = 0x48, // fdiv l, r, d l / r => d; divide +F
618 #define m_max 0x49 // first unused opcode
619 };
620 
621 /// Must an instruction with the given opcode be the last one in a block?
622 /// Such opcodes are called closing opcodes.
623 /// \param mcode instruction opcode
624 /// \param including_calls should m_call/m_icall be considered as the closing opcodes?
625 /// If this function returns true, the opcode cannot appear in the middle
626 /// of a block. Calls are a special case because before MMAT_CALLS they are
627 /// closing opcodes. Afteer MMAT_CALLS that are not considered as closing opcodes.
628 
629 THREAD_SAFE bool hexapi must_mcode_close_block(mcode_t mcode, bool including_calls);
630 
631 
632 /// May opcode be propagated?
633 /// Such opcodes can be used in sub-instructions (nested instructions)
634 /// There is a handful of non-propagatable opcodes, like jumps, ret, nop, etc
635 /// All other regular opcodes are propagatable and may appear in a nested
636 /// instruction.
637 
638 THREAD_SAFE bool hexapi is_mcode_propagatable(mcode_t mcode);
639 
640 
641 // Is add or sub instruction?
642 inline THREAD_SAFE bool is_mcode_addsub(mcode_t mcode) { return mcode == m_add || mcode == m_sub; }
643 // Is xds or xdu instruction? We use 'xdsu' as a shortcut for 'xds or xdu'
644 inline THREAD_SAFE bool is_mcode_xdsu(mcode_t mcode) { return mcode == m_xds || mcode == m_xdu; }
645 // Is a 'set' instruction? (an instruction that sets a condition code)
646 inline THREAD_SAFE bool is_mcode_set(mcode_t mcode) { return mcode >= m_sets && mcode <= m_setle; }
647 // Is a 1-operand 'set' instruction? Only 'sets' is in this group
648 inline THREAD_SAFE bool is_mcode_set1(mcode_t mcode) { return mcode == m_sets; }
649 // Is a 1-operand conditional jump instruction? Only 'jcnd' is in this group
650 inline THREAD_SAFE bool is_mcode_j1(mcode_t mcode) { return mcode == m_jcnd; }
651 // Is a conditional jump?
652 inline THREAD_SAFE bool is_mcode_jcond(mcode_t mcode) { return mcode >= m_jcnd && mcode <= m_jle; }
653 // Is a 'set' instruction that can be converted into a conditional jump?
654 inline THREAD_SAFE bool is_mcode_convertible_to_jmp(mcode_t mcode) { return mcode >= m_setnz && mcode <= m_setle; }
655 // Is a conditional jump instruction that can be converted into a 'set'?
656 inline THREAD_SAFE bool is_mcode_convertible_to_set(mcode_t mcode) { return mcode >= m_jnz && mcode <= m_jle; }
657 // Is a call instruction? (direct or indirect)
658 inline THREAD_SAFE bool is_mcode_call(mcode_t mcode) { return mcode == m_call || mcode == m_icall; }
659 // Must be an FPU instruction?
660 inline THREAD_SAFE bool is_mcode_fpu(mcode_t mcode) { return mcode >= m_f2i; }
661 // Is a commutative instruction?
662 inline THREAD_SAFE bool is_mcode_commutative(mcode_t mcode)
663 {
664  return mcode == m_add
665  || mcode == m_mul
666  || mcode == m_or
667  || mcode == m_and
668  || mcode == m_xor
669  || mcode == m_setz
670  || mcode == m_setnz
671  || mcode == m_cfadd
672  || mcode == m_ofadd;
673 }
674 // Is a shift instruction?
675 inline THREAD_SAFE bool is_mcode_shift(mcode_t mcode)
676 {
677  return mcode == m_shl
678  || mcode == m_shr
679  || mcode == m_sar;
680 }
681 // Is a kind of div or mod instruction?
682 inline THREAD_SAFE bool is_mcode_divmod(mcode_t op)
683 {
684  return op == m_udiv || op == m_sdiv || op == m_umod || op == m_smod;
685 }
686 
687 // Convert setX opcode into corresponding jX opcode
688 // This function relies on the order of setX and jX opcodes!
689 inline THREAD_SAFE mcode_t set2jcnd(mcode_t code)
690 {
691  return mcode_t(code - m_setnz + m_jnz);
692 }
693 
694 // Convert setX opcode into corresponding jX opcode
695 // This function relies on the order of setX and jX opcodes!
696 inline THREAD_SAFE mcode_t jcnd2set(mcode_t code)
697 {
698  return mcode_t(code + m_setnz - m_jnz);
699 }
700 
701 // Negate a conditional opcode.
702 // Conditional jumps can be negated, example: jle -> jg
703 // 'Set' instruction can be negated, example: seta -> setbe
704 // If the opcode cannot be negated, return m_nop
705 THREAD_SAFE mcode_t hexapi negate_mcode_relation(mcode_t code);
706 
707 
708 // Swap a conditional opcode.
709 // Only conditional jumps and set instructions can be swapped.
710 // The returned opcode the one required for swapped operands.
711 // Example "x > y" is the same as "y < x", therefore swap(m_jg) is m_jl.
712 // If the opcode cannot be swapped, return m_nop
713 
714 THREAD_SAFE mcode_t hexapi swap_mcode_relation(mcode_t code);
715 
716 // Return the opcode that performs signed operation.
717 // Examples: jae -> jge; udiv -> sdiv
718 // If the opcode cannot be transformed into signed form, simply return it.
719 
720 THREAD_SAFE mcode_t hexapi get_signed_mcode(mcode_t code);
721 
722 
723 // Return the opcode that performs unsigned operation.
724 // Examples: jl -> jb; xds -> xdu
725 // If the opcode cannot be transformed into unsigned form, simply return it.
726 
727 THREAD_SAFE mcode_t hexapi get_unsigned_mcode(mcode_t code);
728 
729 // Does the opcode perform a signed operation?
730 inline THREAD_SAFE bool is_signed_mcode(mcode_t code) { return get_unsigned_mcode(code) != code; }
731 // Does the opcode perform a unsigned operation?
732 inline THREAD_SAFE bool is_unsigned_mcode(mcode_t code) { return get_signed_mcode(code) != code; }
733 
734 
735 // Does the 'd' operand gets modified by the instruction?
736 // Example: "add l,r,d" modifies d, while instructions
737 // like jcnd, ijmp, stx does not modify it.
738 // Note: this function returns 'true' for m_ext but it may be wrong.
739 // Use minsn_t::modifes_d() if you have minsn_t.
740 
741 THREAD_SAFE bool hexapi mcode_modifies_d(mcode_t mcode);
742 
743 
744 // Processor condition codes are mapped to the first microregisters
745 // The order is important, see mop_t::is_cc()
746 const mreg_t mr_none = mreg_t(-1);
747 const mreg_t mr_cf = mreg_t(0); // carry bit
748 const mreg_t mr_zf = mreg_t(1); // zero bit
749 const mreg_t mr_sf = mreg_t(2); // sign bit
750 const mreg_t mr_of = mreg_t(3); // overflow bit
751 const mreg_t mr_pf = mreg_t(4); // parity bit
752 const int cc_count = mr_pf - mr_cf + 1; // number of condition code registers
753 const mreg_t mr_cc = mreg_t(5); // synthetic condition code, used internally
754 const mreg_t mr_first = mreg_t(8); // the first processor specific register
755 
756 //-------------------------------------------------------------------------
757 /// Operand locator.
758 /// It is used to denote a particular operand in the ctree, for example,
759 /// when the user right clicks on a constant and requests to represent it, say,
760 /// as a hexadecimal number.
762 {
763 private:
764  // forbid the default constructor, force the user to initialize objects of this class.
765  operand_locator_t(void) {}
766 public:
767  ea_t ea; ///< address of the original processor instruction
768  int opnum; ///< operand number in the instruction
769  operand_locator_t(ea_t _ea, int _opnum) : ea(_ea), opnum(_opnum) {}
770  DECLARE_COMPARISONS(operand_locator_t);
771  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
772 };
773 
774 //-------------------------------------------------------------------------
775 /// Number representation.
776 /// This structure holds information about a number format.
778 {
779  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
780  flags_t flags; ///< ida flags, which describe number radix, enum, etc
781  char opnum; ///< operand number: 0..UA_MAXOP
782  char props; ///< properties: combination of NF_ bits (\ref NF_)
783 /// \defgroup NF_ Number format property bits
784 /// Used in number_format_t::props
785 //@{
786 #define NF_FIXED 0x01 ///< number format has been defined by the user
787 #define NF_NEGDONE 0x02 ///< temporary internal bit: negation has been performed
788 #define NF_BINVDONE 0x04 ///< temporary internal bit: inverting bits is done
789 #define NF_NEGATE 0x08 ///< The user asked to negate the constant
790 #define NF_BITNOT 0x10 ///< The user asked to invert bits of the constant
791 #define NF_STROFF 0x20 ///< internal bit: used as stroff, valid iff is_stroff()
792 //@}
793  uchar serial; ///< for enums: constant serial number
794  char org_nbytes; ///< original number size in bytes
795  qstring type_name; ///< for stroffs: structure for offsetof()\n
796  ///< for enums: enum name
797  /// Contructor
798  number_format_t(int _opnum=0)
799  : flags(0), opnum(char(_opnum)), props(0), serial(0), org_nbytes(0) {}
800  /// Get number radix
801  /// \return 2,8,10, or 16
802  int get_radix(void) const { return ::get_radix(flags, opnum); }
803  /// Is number representation fixed?
804  /// Fixed representation cannot be modified by the decompiler
805  bool is_fixed(void) const { return props != 0; }
806  /// Is a hexadecimal number?
807  bool is_hex(void) const { return ::is_numop(flags, opnum) && get_radix() == 16; }
808  /// Is a decimal number?
809  bool is_dec(void) const { return ::is_numop(flags, opnum) && get_radix() == 10; }
810  /// Is a octal number?
811  bool is_oct(void) const { return ::is_numop(flags, opnum) && get_radix() == 8; }
812  /// Is a symbolic constant?
813  bool is_enum(void) const { return ::is_enum(flags, opnum); }
814  /// Is a character constant?
815  bool is_char(void) const { return ::is_char(flags, opnum); }
816  /// Is a structure field offset?
817  bool is_stroff(void) const { return ::is_stroff(flags, opnum); }
818  /// Is a number?
819  bool is_numop(void) const { return !is_enum() && !is_char() && !is_stroff(); }
820  /// Does the number need to be negated or bitwise negated?
821  /// Returns true if the user requested a negation but it is not done yet
822  bool needs_to_be_inverted(void) const
823  {
824  return (props & (NF_NEGATE|NF_BITNOT)) != 0 // the user requested it
825  && (props & (NF_NEGDONE|NF_BINVDONE)) == 0; // not done yet
826  }
827 };
828 
829 // Number formats are attached to (ea,opnum) pairs
830 typedef std::map<operand_locator_t, number_format_t> user_numforms_t;
831 
832 //-------------------------------------------------------------------------
833 /// Base helper class to convert binary data structures into text.
834 /// Other classes are derived from this class.
836 {
837  qstring tmpbuf;
838  int hdrlines; ///< number of header lines (prototype+typedef+lvars)
839  ///< valid at the end of print process
840  /// Print.
841  /// This function is called to generate a portion of the output text.
842  /// The output text may contain color codes.
843  /// \return the number of printed characters
844  /// \param indent number of spaces to generate as prefix
845  /// \param format printf-style format specifier
846  /// \return length of printed string
847  AS_PRINTF(3, 4) virtual int hexapi print(int indent, const char *format,...);
848  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
849 };
850 
851 /// Helper class to convert cfunc_t into text.
852 struct vc_printer_t : public vd_printer_t
853 {
854  const cfunc_t *func; ///< cfunc_t to generate text for
855  char lastchar; ///< internal: last printed character
856  /// Constructor
857  vc_printer_t(const cfunc_t *f) : func(f), lastchar(0) {}
858  /// Are we generating one-line text representation?
859  /// \return \c true if the output will occupy one line without line breaks
860  virtual bool idaapi oneliner(void) const { return false; }
861 };
862 
863 /// Helper class to convert binary data structures into text and put into a file.
865 {
866  FILE *fp; ///< Output file pointer
867  /// Print.
868  /// This function is called to generate a portion of the output text.
869  /// The output text may contain color codes.
870  /// \return the number of printed characters
871  /// \param indent number of spaces to generate as prefix
872  /// \param format printf-style format specifier
873  /// \return length of printed string
874  AS_PRINTF(3, 4) int hexapi print(int indent, const char *format, ...);
875  /// Constructor
876  file_printer_t(FILE *_fp) : fp(_fp) {}
877 };
878 
879 /// Helper class to convert cfunc_t into a text string
881 {
882  bool with_tags; ///< Generate output with color tags
883  qstring &s; ///< Reference to the output string
884  /// Constructor
885  qstring_printer_t(const cfunc_t *f, qstring &_s, bool tags)
886  : vc_printer_t(f), with_tags(tags), s(_s) {}
887  /// Print.
888  /// This function is called to generate a portion of the output text.
889  /// The output text may contain color codes.
890  /// \return the number of printed characters
891  /// \param indent number of spaces to generate as prefix
892  /// \param format printf-style format specifier
893  /// \return length of the printed string
894  AS_PRINTF(3, 4) int hexapi print(int indent, const char *format, ...);
895 };
896 
897 //-------------------------------------------------------------------------
898 /// \defgroup type Type string related declarations
899 /// Type related functions and class.
900 //@{
901 
902 /// Print the specified type info.
903 /// This function can be used from a debugger by typing "tif->dstr()"
904 
905 const char *hexapi dstr(const tinfo_t *tif);
906 
907 
908 /// Verify a type string.
909 /// \return true if type string is correct
910 
911 bool hexapi is_type_correct(const type_t *ptr);
912 
913 
914 /// Is a small structure or union?
915 /// \return true if the type is a small UDT (user defined type).
916 /// Small UDTs fit into a register (or pair or registers) as a rule.
917 
918 bool hexapi is_small_udt(const tinfo_t &tif);
919 
920 
921 /// Is definitely a non-boolean type?
922 /// \return true if the type is a non-boolean type (non bool and well defined)
923 
924 bool hexapi is_nonbool_type(const tinfo_t &type);
925 
926 
927 /// Is a boolean type?
928 /// \return true if the type is a boolean type
929 
930 bool hexapi is_bool_type(const tinfo_t &type);
931 
932 
933 /// Is a pointer or array type?
934 inline THREAD_SAFE bool is_ptr_or_array(type_t t)
935 {
936  return is_type_ptr(t) || is_type_array(t);
937 }
938 
939 /// Is a pointer, array, or function type?
940 inline THREAD_SAFE bool is_paf(type_t t)
941 {
942  return is_ptr_or_array(t) || is_type_func(t);
943 }
944 
945 /// Is struct/union/enum definition (not declaration)?
946 inline THREAD_SAFE bool is_inplace_def(const tinfo_t &type)
947 {
948  return type.is_decl_complex() && !type.is_typeref();
949 }
950 
951 /// Calculate number of partial subtypes.
952 /// \return number of partial subtypes. The bigger is this number, the uglier is the type.
953 
954 int hexapi partial_type_num(const tinfo_t &type);
955 
956 
957 /// Get a type of a floating point value with the specified width
958 /// \returns type info object
959 /// \param width width of the desired type
960 
961 tinfo_t hexapi get_float_type(int width);
962 
963 
964 /// Create a type info by width and sign.
965 /// Returns a simple type (examples: int, short) with the given width and sign.
966 /// \param srcwidth size of the type in bytes
967 /// \param sign sign of the type
968 
969 tinfo_t hexapi get_int_type_by_width_and_sign(int srcwidth, type_sign_t sign);
970 
971 
972 /// Create a partial type info by width.
973 /// Returns a partially defined type (examples: _DWORD, _BYTE) with the given width.
974 /// \param size size of the type in bytes
975 
976 tinfo_t hexapi get_unk_type(int size);
977 
978 
979 /// Generate a dummy pointer type
980 /// \param ptrsize size of pointed object
981 /// \param isfp is floating point object?
982 
983 tinfo_t hexapi dummy_ptrtype(int ptrsize, bool isfp);
984 
985 
986 /// Get type of a structure field.
987 /// This function performs validity checks of the field type. Wrong types are rejected.
988 /// \param mptr structure field
989 /// \param type pointer to the variable where the type is returned. This parameter can be NULL.
990 /// \return false if failed
991 
992 bool hexapi get_member_type(const member_t *mptr, tinfo_t *type);
993 
994 
995 /// Create a pointer type.
996 /// This function performs the following conversion: "type" -> "type*"
997 /// \param type object type.
998 /// \return "type*". for example, if 'char' is passed as the argument,
999 // the function will return 'char *'
1000 
1001 tinfo_t hexapi make_pointer(const tinfo_t &type);
1002 
1003 
1004 /// Create a reference to a named type.
1005 /// \param name type name
1006 /// \return type which refers to the specified name. For example, if name is "DWORD",
1007 /// the type info which refers to "DWORD" is created.
1008 
1009 tinfo_t hexapi create_typedef(const char *name);
1010 
1011 
1012 /// Create a reference to an ordinal type.
1013 /// \param n ordinal number of the type
1014 /// \return type which refers to the specified ordianl. For example, if n is 1,
1015 /// the type info which refers to ordinal type 1 is created.
1016 
1017 inline tinfo_t create_typedef(int n)
1018 {
1019  tinfo_t tif;
1020  tif.create_typedef(NULL, n);
1021  return tif;
1022 }
1023 
1024 /// Type source (where the type information comes from)
1026 {
1027  GUESSED_NONE, // not guessed, specified by the user
1028  GUESSED_WEAK, // not guessed, comes from idb
1029  GUESSED_FUNC, // guessed as a function
1030  GUESSED_DATA, // guessed as a data item
1031  TS_NOELL = 0x8000000, // can be used in set_type() to avoid merging into ellipsis
1032  TS_SHRINK = 0x4000000, // can be used in set_type() to prefer smaller arguments
1033  TS_DONTREF = 0x2000000, // do not mark type as referenced (referenced_types)
1034  TS_MASK = 0xE000000, // all high bits
1035 };
1036 
1037 
1038 /// Get a global type.
1039 /// Global types are types of addressable objects and struct/union/enum types
1040 /// \param id address or id of the object
1041 /// \param tif buffer for the answer
1042 /// \param guess what kind of types to consider
1043 /// \return success
1044 
1045 bool hexapi get_type(uval_t id, tinfo_t *tif, type_source_t guess);
1046 
1047 
1048 /// Set a global type.
1049 /// \param id address or id of the object
1050 /// \param tif new type info
1051 /// \param source where the type comes from
1052 /// \param force true means to set the type as is, false means to merge the
1053 /// new type with the possibly existing old type info.
1054 /// \return success
1055 
1056 bool hexapi set_type(uval_t id, const tinfo_t &tif, type_source_t source, bool force=false);
1057 
1058 //@}
1059 
1060 //-------------------------------------------------------------------------
1061 // We use our own class to store argument and variable locations.
1062 // It is called vdloc_t that stands for 'vd location'.
1063 // 'vd' is the internal name of the decompiler, it stands for 'visual decompiler'.
1064 // The main differences between vdloc and argloc_t:
1065 // ALOC_REG1: the offset is always 0, so it is not used. the register number
1066 // uses the whole ~VLOC_MASK field.
1067 // ALOCK_STKOFF: stack offsets are always positive because they are based on
1068 // the lowest value of sp in the function.
1069 class vdloc_t : public argloc_t
1070 {
1071  int regoff(void); // inaccessible & undefined: regoff() should not be used
1072 public:
1073  // Get the register number.
1074  // This function works only for ALOC_REG1 and ALOC_REG2 location types.
1075  // It uses all available bits for register number for ALOC_REG1
1076  int reg1(void) const { return atype() == ALOC_REG2 ? argloc_t::reg1() : get_reginfo(); }
1077 
1078  // Set vdloc to point to the specified register without cleaning it up.
1079  // This is a dangerous function, use set_reg1() instead unless you understand
1080  // what it means to cleanup an argloc.
1081  void _set_reg1(int r1) { argloc_t::_set_reg1(r1, r1>>16); }
1082 
1083  // Set vdloc to point to the specified register.
1084  void set_reg1(int r1) { cleanup_argloc(this); _set_reg1(r1); }
1085 
1086  // Use member functions of argloc_t for other location types.
1087 
1088  // Return textual representation.
1089  // Note: this and all other dstr() functions can be used from a debugger.
1090  // It is much easier than to inspect the memory contents byte by byte.
1091  const char *hexapi dstr(int width=0) const;
1092  DECLARE_COMPARISONS(vdloc_t);
1093  bool hexapi is_aliasable(const mbl_array_t *mb, int size) const;
1094 };
1095 
1096 /// Print vdloc.
1097 /// Since vdloc does not always carry the size info, we pass it as NBYTES..
1098 void hexapi print_vdloc(qstring *vout, const vdloc_t &loc, int nbytes);
1099 
1100 //-------------------------------------------------------------------------
1101 /// Do two arglocs overlap?
1102 bool hexapi arglocs_overlap(const vdloc_t &loc1, size_t w1, const vdloc_t &loc2, size_t w2);
1103 
1104 /// Local variable locator.
1105 /// Local variables are located using definition ea and location.
1106 /// Each variable must have a unique locator, this is how we tell them apart.
1108 {
1109  vdloc_t location; ///< Variable location.
1110  ea_t defea; ///< Definition address. The address of an instruction
1111  ///< that initializes the variable. This value is
1112  ///< assigned to each lvar by lvar allocator.
1113  ///< BADADDR for function arguments
1114  lvar_locator_t(void) : defea(BADADDR) {}
1115  lvar_locator_t(const vdloc_t &loc, ea_t ea) : location(loc), defea(ea) {}
1116  /// Get offset of the varialbe in the stack frame.
1117  /// \return a non-negative value for stack variables. The value is
1118  /// an offset from the bottom of the stack frame in terms of
1119  /// vd-offsets.
1120  /// negative values mean error (not a stack variable)
1121  sval_t get_stkoff(void) const
1122  {
1123  return location.is_stkoff() ? location.stkoff() : -1;
1124  }
1125  /// Is variable located on one register?
1126  bool is_reg1(void) const { return location.is_reg1(); }
1127  /// Is variable located on two registers?
1128  bool is_reg2(void) const { return location.is_reg2(); }
1129  /// Is variable located on register(s)?
1130  bool is_reg_var(void) const { return location.is_reg(); }
1131  /// Is variable located on the stack?
1132  bool is_stk_var(void) const { return location.is_stkoff(); }
1133  /// Is variable scattered?
1134  bool is_scattered(void) const { return location.is_scattered(); }
1135  /// Get the register number of the variable
1136  mreg_t get_reg1(void) const { return location.reg1(); }
1137  /// Get the number of the second register (works only for ALOC_REG2 lvars)
1138  mreg_t get_reg2(void) const { return location.reg2(); }
1139  /// Get information about scattered variable
1140  const scattered_aloc_t &get_scattered(void) const { return location.scattered(); }
1141  scattered_aloc_t &get_scattered(void) { return location.scattered(); }
1142  DECLARE_COMPARISONS(lvar_locator_t);
1143  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
1144  // Debugging: get textual representation of a lvar locator.
1145  const char *hexapi dstr(void) const;
1146 };
1147 
1148 /// Definition of a local variable (register or stack) #var #lvar
1149 class lvar_t : public lvar_locator_t
1150 {
1151  friend class mbl_array_t;
1152  int flags; ///< \ref CVAR_
1153 /// \defgroup CVAR_ Local variable property bits
1154 /// Used in lvar_t::flags
1155 //@{
1156 #define CVAR_USED 0x00000001 ///< is used in the code?
1157 #define CVAR_TYPE 0x00000002 ///< the type is defined?
1158 #define CVAR_NAME 0x00000004 ///< has nice name?
1159 #define CVAR_MREG 0x00000008 ///< corresponding mregs were replaced?
1160 #define CVAR_NOWD 0x00000010 ///< width is unknown
1161 #define CVAR_UNAME 0x00000020 ///< user-defined name
1162 #define CVAR_UTYPE 0x00000040 ///< user-defined type
1163 #define CVAR_RESULT 0x00000080 ///< function result variable
1164 #define CVAR_ARG 0x00000100 ///< function argument
1165 #define CVAR_FAKE 0x00000200 ///< fake variable (return var or va_list)
1166 #define CVAR_OVER 0x00000400 ///< overlapping variable
1167 #define CVAR_FLOAT 0x00000800 ///< used in a fpu insn
1168 #define CVAR_SPOILED 0x00001000 ///< internal flag, do not use: spoiled var
1169 #define CVAR_MAPDST 0x00002000 ///< other variables are mapped to this var
1170 #define CVAR_PARTIAL 0x00004000 ///< variable type is partialy defined
1171 #define CVAR_THISARG 0x00008000 ///< 'this' argument of c++ member functions
1172 #define CVAR_FORCED 0x00010000 ///< variable was created by an explicit request
1173  ///< otherwise we could reuse an existing var
1174 #define CVAR_REGNAME 0x00020000 ///< has a register name (like _RAX)
1175 #define CVAR_NOPTR 0x00040000 ///< variable cannot be a pointer (user choice)
1176 #define CVAR_DUMMY 0x00080000 ///< dummy argument (added to fill a hole in
1177  ///< the argument list)
1178 #define CVAR_NOTARG 0x00100000 ///< variable cannot be an input argument
1179 #define CVAR_AUTOMAP 0x00200000 ///< variable was automatically mapped
1180 //@}
1181 
1182 public:
1183  qstring name; ///< variable name.
1184  ///< use mbl_array_t::set_nice_lvar_name() and
1185  ///< mbl_array_t::set_user_lvar_name() to modify it
1186  qstring cmt; ///< variable comment string
1187  tinfo_t tif; ///< variable type
1188  int width; ///< variable size in bytes
1189  int defblk; ///< first block defining the variable.
1190  ///< 0 for args, -1 if unknown
1191  uint64 divisor; ///< max known divisor of the variable
1192 
1193  lvar_t(void) : flags(CVAR_USED), width(0), defblk(-1), divisor(0) {}
1194  lvar_t(const qstring &n, const vdloc_t &l, ea_t e, const tinfo_t &t, int w, int db)
1195  : lvar_locator_t(l, e), flags(CVAR_USED), name(n), tif(t), width(w),
1196  defblk(db), divisor(0) {}
1197  lvar_t(mreg_t reg, int width, const tinfo_t &type, int nblock, ea_t defea);
1198  // Debugging: get textual representation of a local variable.
1199  const char *hexapi dstr(void) const;
1200 
1201  /// Is the variable used in the code?
1202  bool used(void) const { return (flags & CVAR_USED) != 0; }
1203  /// Has the variable a type?
1204  bool typed(void) const { return (flags & CVAR_TYPE) != 0; }
1205  /// Have corresponding microregs been replaced by references to this variable?
1206  bool mreg_done(void) const { return (flags & CVAR_MREG) != 0; }
1207  /// Does the variable have a nice name?
1208  bool has_nice_name(void) const { return (flags & CVAR_NAME) != 0; }
1209  /// Do we know the width of the variable?
1210  bool is_unknown_width(void) const { return (flags & CVAR_NOWD) != 0; }
1211  /// Has any user-defined information?
1212  bool has_user_info(void) const { return (flags & (CVAR_UNAME|CVAR_UTYPE|CVAR_NOPTR)) != 0 || !cmt.empty(); }
1213  /// Has user-defined name?
1214  bool has_user_name(void) const { return (flags & CVAR_UNAME) != 0; }
1215  /// Has user-defined type?
1216  bool has_user_type(void) const { return (flags & CVAR_UTYPE) != 0; }
1217  /// Is the function result?
1218  bool is_result_var(void) const { return (flags & CVAR_RESULT) != 0; }
1219  /// Is the function argument?
1220  bool is_arg_var(void) const { return (flags & CVAR_ARG) != 0; }
1221  /// Is the promoted function argument?
1222  bool hexapi is_promoted_arg(void) const;
1223  /// Is fake return variable?
1224  bool is_fake_var(void) const { return (flags & CVAR_FAKE) != 0; }
1225  /// Is overlapped variable?
1226  bool is_overlapped_var(void) const { return (flags & CVAR_OVER) != 0; }
1227  /// Used by a fpu insn?
1228  bool is_floating_var(void) const { return (flags & CVAR_FLOAT) != 0; }
1229  /// Is spoiled var? (meaningful only during lvar allocation)
1230  bool is_spoiled_var(void) const { return (flags & CVAR_SPOILED) != 0; }
1231  /// Variable type should be handled as a partial one
1232  bool is_partialy_typed(void) const { return (flags & CVAR_PARTIAL) != 0; }
1233  /// Variable type should not be a pointer
1234  bool is_noptr_var(void) const { return (flags & CVAR_NOPTR) != 0; }
1235  /// Other variable(s) map to this var?
1236  bool is_mapdst_var(void) const { return (flags & CVAR_MAPDST) != 0; }
1237  /// Is 'this' argument of a C++ member function?
1238  bool is_thisarg(void) const { return (flags & CVAR_THISARG) != 0; }
1239  /// Is a forced variable?
1240  bool is_forced_var(void) const { return (flags & CVAR_FORCED) != 0; }
1241  /// Has a register name? (like _RAX)
1242  bool has_regname(void) const { return (flags & CVAR_REGNAME) != 0; }
1243  /// Is a dummy argument (added to fill a hole in the argument list)
1244  bool is_dummy_arg(void) const { return (flags & CVAR_DUMMY) != 0; }
1245  /// Is a local variable? (local variable cannot be an input argument)
1246  bool is_notarg(void) const { return (flags & CVAR_NOTARG) != 0; }
1247  /// Was a local variable automatically mapped to another variable?
1248  bool is_automapped(void) const { return (flags & CVAR_AUTOMAP) != 0; }
1249  void set_used(void) { flags |= CVAR_USED; }
1250  void clear_used(void) { flags &= ~CVAR_USED; }
1251  void set_typed(void) { flags |= CVAR_TYPE; clr_noptr_var(); }
1252  void set_non_typed(void) { flags &= ~CVAR_TYPE; }
1253  void clr_user_info(void) { flags &= ~(CVAR_UNAME|CVAR_UTYPE|CVAR_NOPTR); }
1254  void set_user_name(void) { flags |= CVAR_NAME|CVAR_UNAME; }
1255  void set_user_type(void) { flags |= CVAR_TYPE|CVAR_UTYPE; }
1256  void clr_user_type(void) { flags &= ~CVAR_UTYPE; }
1257  void clr_user_name(void) { flags &= ~CVAR_UNAME; }
1258  void set_mreg_done(void) { flags |= CVAR_MREG; }
1259  void clr_mreg_done(void) { flags &= ~CVAR_MREG; }
1260  void set_unknown_width(void) { flags |= CVAR_NOWD; }
1261  void clr_unknown_width(void) { flags &= ~CVAR_NOWD; }
1262  void set_arg_var(void) { flags |= CVAR_ARG; }
1263  void clr_arg_var(void) { flags &= ~(CVAR_ARG|CVAR_THISARG); }
1264  void set_fake_var(void) { flags |= CVAR_FAKE; }
1265  void clr_fake_var(void) { flags &= ~CVAR_FAKE; }
1266  void set_overlapped_var(void) { flags |= CVAR_OVER; }
1267  void clr_overlapped_var(void) { flags &= ~CVAR_OVER; }
1268  void set_floating_var(void) { flags |= CVAR_FLOAT; }
1269  void clr_floating_var(void) { flags &= ~CVAR_FLOAT; }
1270  void set_spoiled_var(void) { flags |= CVAR_SPOILED; }
1271  void clr_spoiled_var(void) { flags &= ~CVAR_SPOILED; }
1272  void set_mapdst_var(void) { flags |= CVAR_MAPDST; }
1273  void clr_mapdst_var(void) { flags &= ~CVAR_MAPDST; }
1274  void set_partialy_typed(void) { flags |= CVAR_PARTIAL; }
1275  void clr_partialy_typed(void) { flags &= ~CVAR_PARTIAL; }
1276  void set_noptr_var(void) { flags |= CVAR_NOPTR; }
1277  void clr_noptr_var(void) { flags &= ~CVAR_NOPTR; }
1278  void set_thisarg(void) { flags |= CVAR_THISARG; }
1279  void clr_thisarg(void) { flags &= ~CVAR_THISARG; }
1280  void set_forced_var(void) { flags |= CVAR_FORCED; }
1281  void clr_forced_var(void) { flags &= ~CVAR_FORCED; }
1282  void set_dummy_arg(void) { flags |= CVAR_DUMMY; }
1283  void clr_dummy_arg(void) { flags &= ~CVAR_DUMMY; }
1284  void set_notarg(void) { clr_arg_var(); flags |= CVAR_NOTARG; }
1285  void clr_notarg(void) { flags &= ~CVAR_NOTARG; }
1286  void set_automapped(void) { flags |= CVAR_AUTOMAP; }
1287  void clr_automapped(void) { flags &= ~CVAR_AUTOMAP; }
1288 
1289  /// Do variables overlap?
1290  bool has_common(const lvar_t &v) const
1291  {
1292  return arglocs_overlap(location, width, v.location, v.width);
1293  }
1294  /// Does the variable overlap with the specified location?
1295  bool has_common_bit(const vdloc_t &loc, asize_t width2) const
1296  {
1297  return arglocs_overlap(location, width, loc, width2);
1298  }
1299  /// Get variable type
1300  const tinfo_t &type(void) const { return tif; }
1301  tinfo_t &type(void) { return tif; }
1302 
1303  /// Check if the variable accept the specified type.
1304  /// Some types are forbidden (void, function types, wrong arrays, etc)
1305  bool hexapi accepts_type(const tinfo_t &t, bool may_change_thisarg=false);
1306  /// Set variable type
1307  /// Note: this function does not modify the idb, only the lvar instance
1308  /// in the memory. For permanent changes see modify_user_lvars()
1309  /// Also, the variable type is not considered as final by the decompiler
1310  /// and may be modified later by the type derivation.
1311  /// In some cases set_final_var_type() may work better, but it does not
1312  /// do persistent changes to the database neither.
1313  /// \param t new type
1314  /// \param may_fail if false and type is bad, interr
1315  /// \return success
1316  bool hexapi set_lvar_type(const tinfo_t &t, bool may_fail=false);
1317 
1318  /// Set final variable type.
1319  void set_final_lvar_type(const tinfo_t &t)
1320  {
1321  set_lvar_type(t);
1322  set_typed();
1323  }
1324 
1325  /// Change the variable width.
1326  /// We call the variable size 'width', it is represents the number of bytes.
1327  /// This function may change the variable type using set_lvar_type().
1328  /// \param w new width
1329  /// \param svw_flags combination of SVW_... bits
1330  /// \return success
1331  bool hexapi set_width(int w, int svw_flags=0);
1332 #define SVW_INT 0x00 // integer value
1333 #define SVW_FLOAT 0x01 // floating point value
1334 #define SVW_SOFT 0x02 // may fail and return false;
1335  // if this bit is not set and the type is bad, interr
1336 
1337  /// Append local variable to mlist.
1338  /// \param lst list to append to
1339  /// \param if true, append padding bytes in case of scattered lvar
1340  void hexapi append_list(mlist_t *lst, bool pad_if_scattered=false) const;
1341 
1342  /// Is the variable aliasable?
1343  /// \param mba ptr to the current mbl_array_t
1344  /// Aliasable variables may be modified indirectly (through a pointer)
1345  bool is_aliasable(const mbl_array_t *mba) const
1346  {
1347  return location.is_aliasable(mba, width);
1348  }
1349 
1350 };
1351 DECLARE_TYPE_AS_MOVABLE(lvar_t);
1352 
1353 /// Vector of local variables
1354 struct lvars_t : public qvector<lvar_t>
1355 {
1356  /// Find input variable at the specified location.
1357  /// \param argloc variable location
1358  /// \param _size variable size
1359  /// \return -1 if failed, otherwise the index into the variables vector.
1360  int find_input_lvar(const vdloc_t &argloc, int _size) { return find_lvar(argloc, _size, 0); }
1361 
1362 
1363  /// Find stack variable at the specified location.
1364  /// \param spoff offset from the minimal sp
1365  /// \param width variable size
1366  /// \return -1 if failed, otherwise the index into the variables vector.
1367  int hexapi find_stkvar(int32 spoff, int width);
1368 
1369 
1370  /// Find variable at the specified location.
1371  /// \param ll variable location
1372  /// \return pointer to variable or NULL
1373  lvar_t *hexapi find(const lvar_locator_t &ll);
1374 
1375 
1376  /// Find variable at the specified location.
1377  /// \param location variable location
1378  /// \param width variable size
1379  /// \param defblk definition block of the lvar. -1 means any block
1380  /// \return -1 if failed, otherwise the index into the variables vector.
1381  int hexapi find_lvar(const vdloc_t &location, int width, int defblk=-1);
1382 };
1383 
1384 /// Saved user settings for local variables: name, type, comment.
1386 {
1387  lvar_locator_t ll; ///< Variable locator
1388  qstring name; ///< Name
1389  tinfo_t type; ///< Type
1390  qstring cmt; ///< Comment
1391  ssize_t size; ///< Type size (if not initialized then -1)
1392  int flags; ///< \ref LVINF_
1393 /// \defgroup LVINF_ saved user lvar info property bits
1394 /// Used in lvar_saved_info_t::flags
1395 //@{
1396 #define LVINF_KEEP 0x0001 ///< preserve saved user settings regardless of vars
1397  ///< for example, if a var loses all its
1398  ///< user-defined attributes or even gets
1399  ///< destroyed, keep its lvar_saved_info_t.
1400  ///< this is used for ephemeral variables that
1401  ///< get destroyed by macro recognition.
1402 #define LVINF_FORCE 0x0002 ///< force allocation of a new variable.
1403  ///< forces the decompiler to create a new
1404  ///< variable at ll.defea
1405 #define LVINF_NOPTR 0x0004 ///< variable type should not be a pointer
1406 #define LVINF_NOMAP 0x0008 ///< forbid automatic mapping of the variable
1407 //@}
1408  lvar_saved_info_t(void) : size(BADSIZE), flags(0) {}
1409  bool has_info(void) const
1410  {
1411  return !name.empty()
1412  || !type.empty()
1413  || !cmt.empty()
1414  || is_forced_lvar()
1415  || is_noptr_lvar()
1416  || is_nomap_lvar();
1417  }
1418  bool operator==(const lvar_saved_info_t &r) const
1419  {
1420  return name == r.name
1421  && cmt == r.cmt
1422  && ll == r.ll
1423  && type == r.type;
1424  }
1425  bool operator!=(const lvar_saved_info_t &r) const { return !(*this == r); }
1426  bool is_kept(void) const { return (flags & LVINF_KEEP) != 0; }
1427  void clear_keep(void) { flags &= ~LVINF_KEEP; }
1428  void set_keep(void) { flags |= LVINF_KEEP; }
1429  bool is_forced_lvar(void) const { return (flags & LVINF_FORCE) != 0; }
1430  void set_forced_lvar(void) { flags |= LVINF_FORCE; }
1431  void clr_forced_lvar(void) { flags &= ~LVINF_FORCE; }
1432  bool is_noptr_lvar(void) const { return (flags & LVINF_NOPTR) != 0; }
1433  void set_noptr_lvar(void) { flags |= LVINF_NOPTR; }
1434  void clr_noptr_lvar(void) { flags &= ~LVINF_NOPTR; }
1435  bool is_nomap_lvar(void) const { return (flags & LVINF_NOMAP) != 0; }
1436  void set_nomap_lvar(void) { flags |= LVINF_NOMAP; }
1437  void clr_nomap_lvar(void) { flags &= ~LVINF_NOMAP; }
1438 };
1439 DECLARE_TYPE_AS_MOVABLE(lvar_saved_info_t);
1440 typedef qvector<lvar_saved_info_t> lvar_saved_infos_t;
1441 
1442 /// Local variable mapping (is used to merge variables)
1443 typedef std::map<lvar_locator_t, lvar_locator_t> lvar_mapping_t;
1444 
1445 /// All user-defined information about local variables
1447 {
1448  /// User-specified names, types, comments for lvars. Variables without
1449  /// user-specified info are not present in this vector.
1450  lvar_saved_infos_t lvvec;
1451 
1452  /// Local variable mapping (used for merging variables)
1453  lvar_mapping_t lmaps;
1454 
1455  /// Delta to add to IDA stack offset to calculate Hex-Rays stack offsets.
1456  /// Should be set by the caller before calling save_user_lvar_settings();
1458 
1459  /// Various flags. Possible values are from \ref ULV_
1461 /// \defgroup ULV_ lvar_uservec_t property bits
1462 /// Used in lvar_uservec_t::ulv_flags
1463 //@{
1464 #define ULV_PRECISE_DEFEA 0x0001 ///< Use precise defea's for lvar locations
1465 //@}
1466 
1467  lvar_uservec_t(void) : stkoff_delta(0), ulv_flags(ULV_PRECISE_DEFEA) {}
1468  void swap(lvar_uservec_t &r)
1469  {
1470  lvvec.swap(r.lvvec);
1471  lmaps.swap(r.lmaps);
1472  std::swap(stkoff_delta, r.stkoff_delta);
1473  std::swap(ulv_flags, r.ulv_flags);
1474  }
1475  void clear()
1476  {
1477  lvvec.clear();
1478  lmaps.clear();
1479  stkoff_delta = 0;
1480  ulv_flags = ULV_PRECISE_DEFEA;
1481  }
1482 
1483  /// find saved user settings for given var
1485  {
1486  for ( lvar_saved_infos_t::iterator p=lvvec.begin(); p != lvvec.end(); ++p )
1487  {
1488  if ( p->ll == vloc )
1489  return p;
1490  }
1491  return NULL;
1492  }
1493 
1494  /// Preserve user settings for given var
1495  void keep_info(const lvar_t &v)
1496  {
1497  lvar_saved_info_t *p = find_info(v);
1498  if ( p != NULL )
1499  p->set_keep();
1500  }
1501 };
1502 
1503 /// Restore user defined local variable settings in the database.
1504 /// \param func_ea entry address of the function
1505 /// \param lvinf ptr to output buffer
1506 /// \return success
1507 
1508 bool hexapi restore_user_lvar_settings(lvar_uservec_t *lvinf, ea_t func_ea);
1509 
1510 
1511 /// Save user defined local variable settings into the database.
1512 /// \param func_ea entry address of the function
1513 /// \param lvinf user-specified info about local variables
1514 
1515 void hexapi save_user_lvar_settings(ea_t func_ea, const lvar_uservec_t &lvinf);
1516 
1517 
1518 /// Helper class to modify saved local variable settings.
1520 {
1521  /// Modify lvar settings.
1522  /// Returns: true-modified
1523  virtual bool idaapi modify_lvars(lvar_uservec_t *lvinf) = 0;
1524 };
1525 
1526 /// Modify saved local variable settings.
1527 /// \param entry_ea function start address
1528 /// \param mlv local variable modifier
1529 /// \return true if modified variables
1530 
1531 bool hexapi modify_user_lvars(ea_t entry_ea, user_lvar_modifier_t &mlv);
1532 
1533 
1534 /// Modify saved local variable settings.
1535 /// \param entry_ea function start address
1536 /// \param info local variable info attrs
1537 /// \param mli_flags bits that specify which attrs defined by INFO are to be set
1538 /// \return true if modified, false if invalid MLI_FLAGS passed
1539 
1540 bool hexapi modify_user_lvar_info(ea_t func_ea, uint mli_flags, const lvar_saved_info_t &info);
1541 /// \defgroup MLI_ user info bits
1542 //@{
1543 #define MLI_NAME 0x01 ///< apply lvar name
1544 #define MLI_TYPE 0x02 ///< apply lvar type
1545 #define MLI_CMT 0x04 ///< apply lvar comment
1546 #define MLI_SET_FLAGS 0x08 ///< set LVINF_... bits
1547 #define MLI_CLR_FLAGS 0x10 ///< clear LVINF_... bits
1548 //@}
1549 
1550 //-------------------------------------------------------------------------
1551 /// User-defined function calls
1552 struct udcall_t
1553 {
1554  qstring name; // name of the function
1555  tinfo_t tif; // function prototype
1556  DECLARE_COMPARISONS(udcall_t)
1557  {
1558  int code = ::compare(name, r.name);
1559  if ( code == 0 )
1560  code = ::compare(tif, r.tif);
1561  return 0;
1562  }
1563 };
1564 
1565 // All user-defined function calls (map address -> udcall)
1566 typedef std::map<ea_t, udcall_t> udcall_map_t;
1567 
1568 /// Restore user defined function calls from the database.
1569 /// \param udcalls ptr to output buffer
1570 /// \param func_ea entry address of the function
1571 /// \return success
1572 
1573 bool hexapi restore_user_defined_calls(udcall_map_t *udcalls, ea_t func_ea);
1574 
1575 
1576 /// Save user defined local function calls into the database.
1577 /// \param func_ea entry address of the function
1578 /// \param udcalls user-specified info about user defined function calls
1579 
1580 void hexapi save_user_defined_calls(ea_t func_ea, const udcall_map_t &udcalls);
1581 
1582 
1583 /// Convert function type declaration into internal structure
1584 /// \param udc - pointer to output structure
1585 /// \param decl - function type declaration
1586 /// \param silent - if TRUE: do not show warning in case of incorrect type
1587 /// \return success
1588 
1589 bool hexapi parse_user_call(udcall_t *udc, const char *decl, bool silent);
1590 
1591 
1592 /// try to generate user-defined call for an instruction
1593 /// \return \ref MERR_ code:
1594 /// MERR_OK - user-defined call generated
1595 /// else - error (MERR_INSN == inacceptable udc.tif)
1596 
1598 
1599 
1600 //-------------------------------------------------------------------------
1601 /// Generic microcode generator class.
1602 /// An instance of a derived class can be registered to be used for
1603 /// non-standard microcode generation. Before microcode generation for an
1604 /// instruction all registered object will be visited by the following way:
1605 /// if ( filter->match(cdg) )
1606 /// code = filter->apply(cdg);
1607 /// if ( code == MERR_OK )
1608 /// continue; // filter generated microcode, go to the next instruction
1610 {
1611  /// check if the filter object is to be appied
1612  /// \return success
1613  virtual bool match(codegen_t &cdg) = 0;
1614 
1615  /// generate microcode for an instruction
1616  /// \return MERR_... code:
1617  /// MERR_OK - user-defined call generated, go to the next instruction
1618  /// MERR_INSN - not generated - the caller should try the standard way
1619  /// else - error
1620  virtual merror_t apply(codegen_t &cdg) = 0;
1621 };
1622 
1623 /// register/unregister non-standard microcode generator
1624 /// \param filter - microcode generator object
1625 /// \param install - TRUE - register the object, FALSE - unregister
1626 void hexapi install_microcode_filter(microcode_filter_t *filter, bool install=true);
1627 
1628 //-------------------------------------------------------------------------
1629 /// Abstract class: User-defined call generator
1630 /// derived classes should implement method 'match'
1632 {
1633  udcall_t udc;
1634 
1635 public:
1636  /// return true if the filter object should be appied to given instruction
1637  virtual bool match(codegen_t &cdg) = 0;
1638 
1639  bool hexapi init(const char *decl);
1640  virtual merror_t hexapi apply(codegen_t &cdg);
1641 };
1642 
1643 //-------------------------------------------------------------------------
1644 typedef size_t mbitmap_t;
1645 const size_t bitset_width = sizeof(mbitmap_t) * CHAR_BIT;
1646 const size_t bitset_align = bitset_width - 1;
1647 const size_t bitset_shift = 6;
1648 
1649 /// Bit set class. See https://en.wikipedia.org/wiki/Bit_array
1651 {
1652  mbitmap_t *bitmap; ///< pointer to bitmap
1653  size_t high; ///< highest bit+1 (multiply of bitset_width)
1654 
1655 public:
1656  bitset_t(void) : bitmap(NULL), high(0) {}
1657  hexapi bitset_t(const bitset_t &m); // copy constructor
1658  ~bitset_t(void)
1659  {
1660  qfree(bitmap);
1661  bitmap = NULL;
1662  }
1663  void swap(bitset_t &r)
1664  {
1665  std::swap(bitmap, r.bitmap);
1666  std::swap(high, r.high);
1667  }
1668  bitset_t &operator=(const bitset_t &m) { return copy(m); }
1669  bitset_t &hexapi copy(const bitset_t &m); // assignment operator
1670  bool hexapi add(int bit); // add a bit
1671  bool hexapi add(int bit, int width); // add bits
1672  bool hexapi add(const bitset_t &ml); // add another bitset
1673  bool hexapi sub(int bit); // delete a bit
1674  bool hexapi sub(int bit, int width); // delete bits
1675  bool hexapi sub(const bitset_t &ml); // delete another bitset
1676  bool hexapi cut_at(int maxbit); // delete bits >= maxbit
1677  void hexapi shift_down(int shift); // shift bits down
1678  bool hexapi has(int bit) const; // test presence of a bit
1679  bool hexapi has_all(int bit, int width) const; // test presence of bits
1680  bool hexapi has_any(int bit, int width) const; // test presence of bits
1681  void print(
1682  qstring *vout,
1683  int (*get_bit_name)(qstring *out, int bit, int width, void *ud)=NULL,
1684  void *ud=NULL) const;
1685  const char *hexapi dstr(void) const;
1686  bool hexapi empty(void) const; // is empty?
1687  int hexapi count(void) const; // number of set bits
1688  int hexapi count(int bit) const; // get number set bits starting from 'bit'
1689  int hexapi last(void) const; // get the number of the last bit (-1-no bits)
1690  void clear(void) { high = 0; } // make empty
1691  void hexapi fill_with_ones(int maxbit);
1692  bool fill_gaps(int total_nbits);
1693  bool hexapi has_common(const bitset_t &ml) const; // has common elements?
1694  bool hexapi intersect(const bitset_t &ml); // intersect sets. returns true if changed
1695  bool hexapi is_subset_of(const bitset_t &ml) const; // is subset of?
1696  bool includes(const bitset_t &ml) const { return ml.is_subset_of(*this); }
1697  void extract(intvec_t &out) const;
1698  DECLARE_COMPARISONS(bitset_t);
1699  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
1700  class iterator
1701  {
1702  friend class bitset_t;
1703  int i;
1704  public:
1705  iterator(int n=-1) : i(n) {}
1706  bool operator==(const iterator &n) const { return i == n.i; }
1707  bool operator!=(const iterator &n) const { return i != n.i; }
1708  int operator*(void) const { return i; }
1709  };
1710  typedef iterator const_iterator;
1711  iterator itat(int n) const { return iterator(goup(n)); }
1712  iterator begin(void) const { return itat(0); }
1713  iterator end(void) const { return iterator(high); }
1714  int front(void) const { return *begin(); }
1715  int back(void) const { return *end(); }
1716  void inc(iterator &p, int n=1) const { p.i = goup(p.i+n); }
1717 private:
1718  int hexapi goup(int reg) const;
1719 };
1720 DECLARE_TYPE_AS_MOVABLE(bitset_t);
1721 typedef qvector<bitset_t> array_of_bitsets;
1722 
1723 //-------------------------------------------------------------------------
1724 template <class T>
1725 struct ivl_tpl // an interval
1726 {
1727 protected:
1728  // forbid the default constructor
1729  ivl_tpl(void) {}
1730 public:
1731  T off;
1732  T size;
1733  ivl_tpl(T _off, T _size) : off(_off), size(_size) {}
1734  bool valid() const { return last() >= off; }
1735  T end() const { return off + size; }
1736  T last() const { return off + size - 1; }
1737 
1738  DEFINE_MEMORY_ALLOCATION_FUNCS()
1739 };
1740 
1741 //-------------------------------------------------------------------------
1742 typedef ivl_tpl<uval_t> uval_ivl_t;
1743 struct ivl_t : public uval_ivl_t
1744 {
1745 private:
1746  typedef ivl_tpl<uval_t> inherited;
1747  // forbid the default constructor
1748  ivl_t(void) {}
1749  // ...except for use in a vector
1750  friend class qvector<ivl_t>;
1751 
1752 public:
1753  ivl_t(uval_t _off, uval_t _size) : inherited(_off,_size) {}
1754  bool empty(void) const { return size == 0; }
1755  void clear(void) { size = 0; }
1756  void print(qstring *vout) const;
1757  const char *hexapi dstr(void) const;
1758 
1759  bool extend_to_cover(const ivl_t &r) // extend interval to cover 'r'
1760  {
1761  uval_t new_end = end();
1762  bool changed = false;
1763  if ( off > r.off )
1764  {
1765  off = r.off;
1766  changed = true;
1767  }
1768  if ( new_end < r.end() )
1769  {
1770  new_end = r.end();
1771  changed = true;
1772  }
1773  if ( changed )
1774  size = new_end - off;
1775  return changed;
1776  }
1777  void intersect(const ivl_t &r)
1778  {
1779  uval_t new_off = qmax(off, r.off);
1780  uval_t new_end = end();
1781  if ( new_end > r.end() )
1782  new_end = r.end();
1783  if ( new_off < new_end )
1784  {
1785  off = new_off;
1786  size = new_end - off;
1787  }
1788  else
1789  {
1790  size = 0;
1791  }
1792  }
1793 
1794  // do *this and ivl overlap?
1795  bool overlap(const ivl_t &ivl) const
1796  {
1797  return interval::overlap(off, size, ivl.off, ivl.size);
1798  }
1799  // does *this include ivl?
1800  bool includes(const ivl_t &ivl) const
1801  {
1802  return interval::includes(off, size, ivl.off, ivl.size);
1803  }
1804  // does *this contain off2?
1805  bool contains(uval_t off2) const
1806  {
1807  return interval::contains(off, size, off2);
1808  }
1809 
1810  DECLARE_COMPARISONS(ivl_t);
1811  static const ivl_t allmem;
1812 #define ALLMEM ivl_t::allmem
1813 };
1814 DECLARE_TYPE_AS_MOVABLE(ivl_t);
1815 
1816 //-------------------------------------------------------------------------
1818 {
1819  ivl_t ivl;
1820  const char *whole; // name of the whole interval
1821  const char *part; // prefix to use for parts of the interval (e.g. sp+4)
1822  ivl_with_name_t(): ivl(0, BADADDR), whole("<unnamed inteval>"), part(NULL) {}
1823  DEFINE_MEMORY_ALLOCATION_FUNCS()
1824 };
1825 
1826 //-------------------------------------------------------------------------
1827 template <class Ivl, class T>
1828 class ivlset_tpl // set of intervals
1829 {
1830 public:
1831  typedef qvector<Ivl> bag_t;
1832 
1833 protected:
1834  bag_t bag;
1835  bool verify(void) const;
1836  // we do not store the empty intervals in bag so size == 0 denotes
1837  // MAX_VALUE<T>+1, e.g. 0x100000000 for uint32
1838  static bool ivl_all_values(const Ivl &ivl) { return ivl.off == 0 && ivl.size == 0; }
1839 
1840 public:
1841  ivlset_tpl(void) {}
1842  ivlset_tpl(const Ivl &ivl) { if ( ivl.valid() ) bag.push_back(ivl); }
1843  DEFINE_MEMORY_ALLOCATION_FUNCS()
1844 
1845  void swap(ivlset_tpl &r) { bag.swap(r.bag); }
1846  const Ivl &getivl(int idx) const { return bag[idx]; }
1847  const Ivl &lastivl(void) const { return bag.back(); }
1848  size_t nivls(void) const { return bag.size(); }
1849  bool empty(void) const { return bag.empty(); }
1850  void clear(void) { bag.clear(); }
1851  void qclear(void) { bag.qclear(); }
1852  bool all_values() const { return nivls() == 1 && ivl_all_values(bag[0]); }
1853  void set_all_values() { clear(); bag.push_back(Ivl(0, 0)); }
1854  bool single_value(T v) const { return nivls() == 1 && bag[0].off == v && bag[0].size == 1; }
1855 
1856  bool operator==(const Ivl &v) const { return nivls() == 1 && bag[0] == v; }
1857  bool operator!=(const Ivl &v) const { return !(*this == v); }
1858 
1859  typedef typename bag_t::iterator iterator;
1860  typedef typename bag_t::const_iterator const_iterator;
1861  const_iterator begin(void) const { return bag.begin(); }
1862  const_iterator end(void) const { return bag.end(); }
1863  iterator begin(void) { return bag.begin(); }
1864  iterator end(void) { return bag.end(); }
1865 };
1866 
1867 //-------------------------------------------------------------------------
1868 /// Set of address intervals.
1869 /// Bit arrays are efficient only for small sets. Potentially huge
1870 /// sets, like memory ranges, require another representation.
1871 /// ivlset_t is used for a list of memory locations in our decompiler.
1873 struct ivlset_t : public uval_ivl_ivlset_t
1874 {
1876  ivlset_t() {}
1877  ivlset_t(const ivl_t &ivl) : inherited(ivl) {}
1878  bool hexapi add(const ivl_t &ivl);
1879  bool add(ea_t ea, asize_t size) { return add(ivl_t(ea, size)); }
1880  bool hexapi add(const ivlset_t &ivs);
1881  bool hexapi addmasked(const ivlset_t &ivs, const ivl_t &mask);
1882  bool hexapi sub(const ivl_t &ivl);
1883  bool sub(ea_t ea, asize_t size) { return sub(ivl_t(ea, size)); }
1884  bool hexapi sub(const ivlset_t &ivs);
1885  bool hexapi has_common(const ivl_t &ivl, bool strict=false) const;
1886  void hexapi print(qstring *vout) const;
1887  const char *hexapi dstr(void) const;
1888  asize_t hexapi count(void) const;
1889  bool hexapi has_common(const ivlset_t &ivs) const;
1890  bool hexapi contains(uval_t off) const;
1891  bool hexapi includes(const ivlset_t &ivs) const;
1892  bool hexapi intersect(const ivlset_t &ivs);
1893 
1894  DECLARE_COMPARISONS(ivlset_t);
1895 
1896 };
1897 DECLARE_TYPE_AS_MOVABLE(ivlset_t);
1898 typedef qvector<ivlset_t> array_of_ivlsets;
1899 //-------------------------------------------------------------------------
1900 // We use bitset_t to keep list of registers.
1901 // This is the most optimal storage for them.
1902 class rlist_t : public bitset_t
1903 {
1904 public:
1905  rlist_t(void) {}
1906  rlist_t(const rlist_t &m) : bitset_t(m)
1907  {
1908  }
1909  rlist_t(mreg_t reg, int width) { add(reg, width); }
1910  ~rlist_t(void) {}
1911  void hexapi print(qstring *vout) const;
1912  const char *hexapi dstr(void) const;
1913 };
1914 DECLARE_TYPE_AS_MOVABLE(rlist_t);
1915 
1916 //-------------------------------------------------------------------------
1917 // Microlist: list of register and memory locations
1918 struct mlist_t
1919 {
1920  rlist_t reg; // registers
1921  ivlset_t mem; // memory locations
1922 
1923  mlist_t(void) {}
1924  mlist_t(const ivl_t &ivl) : mem(ivl) {}
1925  mlist_t(mreg_t r, int size) : reg(r, size) {}
1926 
1927  void swap(mlist_t &r) { reg.swap(r.reg); mem.swap(r.mem); }
1928  bool hexapi addmem(ea_t ea, asize_t size);
1929  bool add(mreg_t r, int size) { return add(mlist_t(r, size)); } // also see append_def_list()
1930  bool add(const rlist_t &r) { return reg.add(r); }
1931  bool add(const ivl_t &ivl) { return add(mlist_t(ivl)); }
1932  bool add(const mlist_t &lst) { return reg.add(lst.reg) | mem.add(lst.mem); }
1933  bool sub(mreg_t r, int size) { return sub(mlist_t(r, size)); }
1934  bool sub(const ivl_t &ivl) { return sub(mlist_t(ivl)); }
1935  bool sub(const mlist_t &lst) { return reg.sub(lst.reg) | mem.sub(lst.mem); }
1936  asize_t count(void) const { return reg.count() + mem.count(); }
1937  void hexapi print(qstring *vout) const;
1938  const char *hexapi dstr(void) const;
1939  bool empty(void) const { return reg.empty() && mem.empty(); }
1940  void clear(void) { reg.clear(); mem.clear(); }
1941  bool has(mreg_t r) const { return reg.has(r); }
1942  bool has_all(mreg_t r, int size) const { return reg.has_all(r, size); }
1943  bool has_any(mreg_t r, int size) const { return reg.has_any(r, size); }
1944  bool has_memory(void) const { return !mem.empty(); }
1945  bool has_allmem(void) const { return mem == ALLMEM; }
1946  bool has_common(const mlist_t &lst) const { return reg.has_common(lst.reg) || mem.has_common(lst.mem); }
1947  bool includes(const mlist_t &lst) const { return reg.includes(lst.reg) && mem.includes(lst.mem); }
1948  bool intersect(const mlist_t &lst) { return reg.intersect(lst.reg) | mem.intersect(lst.mem); }
1949  bool is_subset_of(const mlist_t &lst) const { return lst.includes(*this); }
1950 
1951  DECLARE_COMPARISONS(mlist_t);
1952  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
1953 };
1954 DECLARE_TYPE_AS_MOVABLE(mlist_t);
1955 typedef qvector<mlist_t> mlistvec_t;
1956 DECLARE_TYPE_AS_MOVABLE(mlistvec_t);
1957 
1958 ///------------------------------------------------------------------------
1959 /// Map a processor register to a microregister.
1960 /// \param reg processor register number
1961 /// \return microregister register id or mr_none
1962 
1963 mreg_t hexapi reg2mreg(int reg);
1964 
1965 
1966 /// Map a microregister to a processor register.
1967 /// \param reg microregister number
1968 /// \param width size of microregister in bytes
1969 /// \return processor register id or -1
1970 
1971 int hexapi mreg2reg(mreg_t reg, int width);
1972 
1973 
1974 /// Get the microregister name
1975 /// \param out output buffer, may be nullptr
1976 /// \param bit microregister number
1977 /// \param width size of microregister in bytes. may be bigger than the real
1978 /// register size.
1979 /// \param ud reserved, must be nullptr
1980 /// \return width of the printed register. this value may be less than
1981 /// the WIDTH argument.
1982 
1983 int hexapi get_mreg_name(qstring *out, mreg_t reg, int width, void *ud=nullptr);
1984 
1985 //-------------------------------------------------------------------------
1986 /// User defined callback to optimize individual microcode instructions
1988 {
1989  /// Optimize an instruction.
1990  /// \param blk current basic block. maybe NULL, which means that
1991  /// the instruction must be optimized without context
1992  /// \param ins instruction to optimize; it is always a top-level instruction.
1993  /// the callback may not delete the instruction but may
1994  /// convert it into nop (see mblock_t::make_nop). to optimize
1995  /// sub-instructions, visit them using minsn_visitor_t.
1996  /// sub-instructions may not be converted into nop but
1997  /// can be converted to "mov x,x". for example:
1998  /// add x,0,x => mov x,x
1999  /// \return number of changes made to the instruction.
2000  /// if after this call the instruction's use/def lists have changed,
2001  /// you must mark the block level lists as dirty (see mark_lists_dirty)
2002  virtual int idaapi func(mblock_t *blk, minsn_t *ins) = 0;
2003 };
2004 
2005 /// Install an instruction level custom optimizer
2006 /// \param opt an instance of optinsn_t. cannot be destroyed before calling
2007 /// remove_optinsn_handler().
2009 
2010 /// Remove an instruction level custom optimizer
2012 
2013 /// User defined callback to optimize microcode blocks
2015 {
2016  /// Optimize a block.
2017  /// This function usually performs the optimizations that require analyzing
2018  /// the entire block and/or its neighbors. For example it can recognize
2019  /// patterns and perform conversions like:
2020  /// b0: b0:
2021  /// ... ...
2022  /// jnz x, 0, @b2 => jnz x, 0, @b2
2023  /// b1: b1:
2024  /// add x, 0, y mov x, y
2025  /// ... ...
2026  /// \param blk Basic block to optimize as a whole.
2027  /// \return number of changes made to the block. See also mark_lists_dirty.
2028  virtual int idaapi func(mblock_t *blk) = 0;
2029 };
2030 
2031 /// Install a block level custom optimizer.
2032 /// \param opt an instance of optblock_t. cannot be destroyed before calling
2033 /// remove_optblock_handler().
2035 
2036 /// Remove a block level custom optimizer
2038 
2039 
2040 //-------------------------------------------------------------------------
2041 // abstract graph interface
2042 class simple_graph_t : public gdl_graph_t
2043 {
2044 public:
2045  qstring title;
2046  bool colored_gdl_edges;
2047 private:
2048  friend class iterator;
2049  virtual int goup(int node) const;
2050 };
2051 
2052 //-------------------------------------------------------------------------
2053 // Since our data structures are quite complex, we use the visitor pattern
2054 // in many of our algorthims. This functionality is available for plugins too.
2055 // https://en.wikipedia.org/wiki/Visitor_pattern
2056 
2057 // All our visitor callbacks return an integer value.
2058 // Visiting is interrupted as soon an the return value is non-zero.
2059 // This non-zero value is returned as the result of the for_all_... function.
2060 // If for_all_... returns 0, it means that it successfully visited all items.
2061 
2062 /// The context info used by visitors
2064 {
2065  mbl_array_t *mba; // current block array
2066  mblock_t *blk; // current block
2067  minsn_t *topins; // top level instruction (parent of curins or curins itself)
2068  minsn_t *curins; // currently visited instruction
2070  mbl_array_t *_mba=NULL,
2071  mblock_t *_blk=NULL,
2072  minsn_t *_topins=NULL)
2073  : mba(_mba), blk(_blk), topins(_topins), curins(NULL) {}
2074  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
2075  bool really_alloc(void) const;
2076 };
2077 
2078 /// Micro instruction visitor.
2079 /// See mbl_array_t::for_all_topinsns, minsn_t::for_all_insns,
2080 /// mblock_::for_all_insns, mbl_array_t::for_all_insns
2082 {
2084  mbl_array_t *_mba=NULL,
2085  mblock_t *_blk=NULL,
2086  minsn_t *_topins=NULL)
2087  : op_parent_info_t(_mba, _blk, _topins) {}
2088  virtual int idaapi visit_minsn(void) = 0;
2089 };
2090 
2091 /// Micro operand visitor.
2092 /// See mop_t::for_all_ops, minsn_t::for_all_ops, mblock_t::for_all_insns,
2093 /// mbl_array_t::for_all_insns
2095 {
2096  mop_visitor_t(
2097  mbl_array_t *_mba=NULL,
2098  mblock_t *_blk=NULL,
2099  minsn_t *_topins=NULL)
2100  : op_parent_info_t(_mba, _blk, _topins), prune(false) {}
2101  /// Should skip sub-operands of the current operand?
2102  /// visit_mop() may set 'prune=true' for that.
2103  bool prune;
2104  virtual int idaapi visit_mop(mop_t *op, const tinfo_t *type, bool is_target) = 0;
2105 };
2106 
2107 /// Scattered mop: visit each of the scattered locations as a separate mop.
2108 /// See mop_t::for_all_scattered_submops
2110 {
2111  virtual int idaapi visit_scif_mop(const mop_t &r, int off) = 0;
2112 };
2113 
2114 // Used operand visitor.
2115 // See mblock_t::for_all_uses
2117 {
2118  minsn_t *topins;
2119  minsn_t *curins;
2120  bool changed;
2121  mlist_t *list;
2122  mlist_mop_visitor_t(void): topins(NULL), curins(NULL), changed(false), list(NULL) {}
2123  virtual int idaapi visit_mop(mop_t *op) = 0;
2124 };
2125 
2126 //-------------------------------------------------------------------------
2127 /// Instruction operand types
2128 
2129 typedef uint8 mopt_t;
2130 const mopt_t
2131  mop_z = 0, ///< none
2132  mop_r = 1, ///< register (they exist until MMAT_LVARS)
2133  mop_n = 2, ///< immediate number constant
2134  mop_str = 3, ///< immediate string constant
2135  mop_d = 4, ///< result of another instruction
2136  mop_S = 5, ///< local stack variable (they exist until MMAT_LVARS)
2137  mop_v = 6, ///< global variable
2138  mop_b = 7, ///< micro basic block (mblock_t)
2139  mop_f = 8, ///< list of arguments
2140  mop_l = 9, ///< local variable
2141  mop_a = 10, ///< mop_addr_t: address of operand (mop_l, mop_v, mop_S, mop_r)
2142  mop_h = 11, ///< helper function
2143  mop_c = 12, ///< mcases
2144  mop_fn = 13, ///< floating point constant
2145  mop_p = 14, ///< operand pair
2146  mop_sc = 15; ///< scattered
2147 
2148 const int NOSIZE = -1; ///< wrong or unexisting operand size
2149 
2150 //-------------------------------------------------------------------------
2151 /// Reference to a local variable. Used by mop_l
2153 {
2154  /// Pointer to the parent mbl_array_t object.
2155  /// Since we need to access the 'mba->vars' array in order to retrieve
2156  /// the referenced variable, we keep a pointer to mbl_array_t here.
2157  /// Note: this means this class and consequently mop_t, minsn_t, mblock_t
2158  /// are specific to a mbl_array_t object and cannot migrate between
2159  /// them. fortunately this is not something we need to do.
2160  /// second, lvar_ref_t's appear only after MMAT_LVARS.
2162  sval_t off; ///< offset from the beginning of the variable
2163  int idx; ///< index into mba->vars
2164  lvar_ref_t(mbl_array_t *m, int i, sval_t o=0) : mba(m), off(o), idx(i) {}
2165  lvar_ref_t(const lvar_ref_t &r) : mba(r.mba), off(r.off), idx(r.idx) {}
2166  lvar_ref_t &operator=(const lvar_ref_t &r)
2167  {
2168  off = r.off;
2169  idx = r.idx;
2170  return *this;
2171  }
2172  DECLARE_COMPARISONS(lvar_ref_t);
2173  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
2174  void swap(lvar_ref_t &r)
2175  {
2176  std::swap(off, r.off);
2177  std::swap(idx, r.idx);
2178  }
2179  lvar_t &hexapi var(void) const; ///< Retrieve the referenced variable
2180 };
2181 
2182 //-------------------------------------------------------------------------
2183 /// Reference to a stack variable. Used for mop_S
2185 {
2186  /// Pointer to the parent mbl_array_t object.
2187  /// We need it in order to retrieve the referenced stack variable.
2188  /// See notes for lvar_ref_t::mba.
2190 
2191  /// Offset to the stack variable from the bottom of the stack frame.
2192  /// It is called 'decompiler stkoff' and it is different from IDA stkoff.
2193  /// See a note and a picture about 'decompiler stkoff' below.
2194  sval_t off;
2195 
2196  stkvar_ref_t(mbl_array_t *m, sval_t o) : mba(m), off(o) {}
2197  DECLARE_COMPARISONS(stkvar_ref_t);
2198  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
2199  void swap(stkvar_ref_t &r)
2200  {
2201  std::swap(off, r.off);
2202  }
2203  /// Retrieve the referenced stack variable.
2204  /// \param p_off if specified, will hold IDA stkoff after the call.
2205  /// \return pointer to the stack variable
2206  member_t *hexapi get_stkvar(uval_t *p_off=NULL) const;
2207 };
2208 
2209 //-------------------------------------------------------------------------
2210 /// Scattered operand info. Used for mop_sc
2211 struct scif_t : public vdloc_t
2212 {
2213  /// Pointer to the parent mbl_array_t object.
2214  /// Some operations may convert a scattered operand into something simpler,
2215  /// (a stack operand, for example). We will need to create stkvar_ref_t at
2216  /// that moment, this is why we need this pointer.
2217  /// See notes for lvar_ref_t::mba.
2219 
2220  /// Usually scattered operands are created from a function prototype,
2221  /// which has the name information. We preserve it and use it to name
2222  /// the corresponding local variable.
2223  qstring name;
2224 
2225  /// Scattered operands always have type info assigned to them
2226  /// because without it we won't be able to manipulte them.
2227  tinfo_t type;
2228 
2229  scif_t(mbl_array_t *_mba, qstring *n, tinfo_t *tif) : mba(_mba)
2230  {
2231  n->swap(name);
2232  tif->swap(type);
2233  }
2234  scif_t &operator =(const vdloc_t &loc)
2235  {
2236  *(vdloc_t *)this = loc;
2237  return *this;
2238  }
2239 };
2240 
2241 //-------------------------------------------------------------------------
2242 /// An integer constant. Used for mop_n
2243 /// We support 64-bit values but 128-bit values can be represented with mop_p
2245 {
2246  uint64 value;
2247  uint64 org_value; // original value before changing the operand size
2248  mnumber_t(uint64 v, ea_t _ea=BADADDR, int n=0)
2249  : operand_locator_t(_ea, n), value(v), org_value(v) {}
2250  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
2251  DECLARE_COMPARISONS(mnumber_t)
2252  {
2253  if ( value < r.value )
2254  return -1;
2255  if ( value > r.value )
2256  return -1;
2257  return 0;
2258  }
2259  // always use this function instead of manually modifying the 'value' field
2260  void update_value(uint64 val64)
2261  {
2262  value = val64;
2263  org_value = val64;
2264  }
2265 };
2266 
2267 //-------------------------------------------------------------------------
2268 /// Floating point constant. Used for mop_fn
2269 /// For more details, please see the ieee.h file from IDA SDK.
2271 {
2272  uint16 fnum[6]; ///< Internal representation of the number
2273  int nbytes; ///< Original size of the constant in bytes
2274  operator uint16 *(void) { return fnum; }
2275  operator const uint16 *(void) const { return fnum; }
2276  void hexapi print(qstring *vout) const;
2277  const char *hexapi dstr(void) const;
2278  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
2279  DECLARE_COMPARISONS(fnumber_t)
2280  {
2281  return ecmp(fnum, r.fnum);
2282  }
2283 };
2284 
2285 //-------------------------------------------------------------------------
2286 /// \defgroup SHINS_ Bits to control how we print instructions
2287 //@{
2288 #define SHINS_NUMADDR 0x01 ///< display definition addresses for numbers
2289 #define SHINS_VALNUM 0x02 ///< display value numbers
2290 #define SHINS_SHORT 0x04 ///< do not display use-def chains and other attrs
2291 #define SHINS_LDXEA 0x08 ///< display address of ldx expressions (not used)
2292 //@}
2293 
2294 //-------------------------------------------------------------------------
2295 /// How to handle side effect of change_size()
2296 /// Sometimes we need to create a temporary operand and change its size in order
2297 /// to check some hypothesis. If we revert our changes, we do not want that the
2298 /// database (global variables, stack frame, etc) changes in any manner.
2300 {
2301  NO_SIDEFF, ///< change operand size but ignore side effects
2302  ///< if you decide to keep the changed operand,
2303  ///< handle_new_size() must be called
2304  WITH_SIDEFF, ///< change operand size and handle side effects
2305  ONLY_SIDEFF, ///< only handle side effects
2306  ANY_REGSIZE = 0x80, ///< any register size is permitted
2307 };
2308 
2309 // Max size of simple operands.
2310 // Please note there are some exceptions: udts, floating point, xmm/ymm, etc
2311 const int MAX_OPSIZE = 2 * sizeof(ea_t);
2312 const int DOUBLE_OPSIZE = 2 * MAX_OPSIZE;
2313 //-------------------------------------------------------------------------
2314 /// A microinstruction operand.
2315 /// This is the smallest building block of our microcode.
2316 /// Operands will be part of instructions, which are then grouped into basic blocks.
2317 /// The microcode consists of an array of such basic blocks + some additional info.
2318 class mop_t
2319 {
2320  void hexapi copy(const mop_t &rop);
2321 public:
2322  /// Operand type.
2323  mopt_t t;
2324 
2325  /// Operand properties.
2326  uint8 oprops;
2327 #define OPROP_IMPDONE 0x01 ///< imported operand (a pointer) has been dereferenced
2328 #define OPROP_UDT 0x02 ///< a struct or union
2329 #define OPROP_FLOAT 0x04 ///< possibly floating value
2330 #define OPROP_CCFLAGS 0x08 ///< condition codes register value
2331 #define OPROP_UDEFVAL 0x10 ///< uses undefined value
2332 
2333  /// Value number.
2334  /// Zero means unknown.
2335  /// Operands with the same value number are equal.
2336  uint16 valnum;
2337 
2338  /// Operand size.
2339  /// Usually it is 1,2,4,8 or NOSIZE but for UDTs other sizes are permitted
2340  int size;
2341 
2342  /// The following union holds additional details about the operand.
2343  /// Depending on the operand type different kinds of info are stored.
2344  /// You should access these fields only after verifying the operand type.
2345  /// All pointers are owned by the operand and are freed by its destructor.
2346  union
2347  {
2348  mreg_t r; // mop_r register number
2349  mnumber_t *nnn; // mop_n immediate value
2350  minsn_t *d; // mop_d result (destination) of another instruction
2351  stkvar_ref_t *s; // mop_S stack variable
2352  ea_t g; // mop_v global variable (its linear address)
2353  int b; // mop_b block number (used in jmp,call instructions)
2354  mcallinfo_t *f; // mop_f function call information
2355  lvar_ref_t *l; // mop_l local variable
2356  mop_addr_t *a; // mop_a variable whose address is taken
2357  char *helper; // mop_h helper function name
2358  char *cstr; // mop_str string constant
2359  mcases_t *c; // mop_c cases
2360  fnumber_t *fpc; // mop_fn floating point constant
2361  mop_pair_t *pair; // mop_p operand pair
2362  scif_t *scif; // mop_sc scattered operand info
2363  };
2364  // -- End of data fields, member function declarations follow:
2365 
2366  void set_impptr_done(void) { oprops |= OPROP_IMPDONE; }
2367  void set_udt(void) { oprops |= OPROP_UDT; }
2368  void set_undef_val(void) { oprops |= OPROP_UDEFVAL; }
2369  bool is_impptr_done(void) const { return (oprops & OPROP_IMPDONE) != 0; }
2370  bool is_udt(void) const { return (oprops & OPROP_UDT) != 0; }
2371  bool probably_floating(void) const { return (oprops & OPROP_FLOAT) != 0; }
2372  bool is_ccflags(void) const { return (oprops & OPROP_CCFLAGS) != 0; }
2373  bool is_undef_val(void) const { return (oprops & OPROP_UDEFVAL) != 0; }
2374 
2375  mop_t(void) { zero(); }
2376  mop_t(const mop_t &rop) { copy(rop); }
2377  mop_t(mreg_t _r, int _s) : t(mop_r), oprops(0), valnum(0), size(_s), r(_r) {}
2378  mop_t &operator=(const mop_t &rop) { return assign(rop); }
2379  mop_t &hexapi assign(const mop_t &rop);
2380  ~mop_t(void)
2381  {
2382  erase();
2383  }
2384  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
2385  void zero(void) { t = mop_z; oprops = 0; valnum = 0; size = NOSIZE; nnn = NULL; }
2386  void hexapi swap(mop_t &rop);
2387  void hexapi erase(void);
2388  void erase_but_keep_size(void) { int s2 = size; erase(); size = s2; }
2389 
2390  void hexapi print(qstring *vout, int shins_flags=SHINS_SHORT|SHINS_VALNUM) const;
2391  const char *hexapi dstr(void) const; // use this function for debugging
2392 
2393  //-----------------------------------------------------------------------
2394  // Operand creation
2395  //-----------------------------------------------------------------------
2396  /// Create operand from mlist_t.
2397  /// Example: if LST contains 4 bits for R0.4, our operand will be
2398  /// (t=mop_r, r=R0, size=4)
2399  /// \param mba pointer to microcode
2400  /// \param lst list of locations
2401  /// \param fullsize mba->fullsize
2402  /// \return success
2403  bool hexapi create_from_mlist(mbl_array_t *mba, const mlist_t &lst, sval_t fullsize);
2404 
2405  /// Create operand from ivlset_t.
2406  /// Example: if IVS contains [glbvar..glbvar+4), our operand will be
2407  /// (t=mop_v, g=&glbvar, size=4)
2408  /// \param mba pointer to microcode
2409  /// \param ivs set of memory intervals
2410  /// \param fullsize mba->fullsize
2411  /// \return success
2412  bool hexapi create_from_ivlset(mbl_array_t *mba, const ivlset_t &ivs, sval_t fullsize);
2413 
2414  /// Create operand from vdloc_t.
2415  /// Example: if LOC contains (type=ALOC_REG1, r=R0), our operand will be
2416  /// (t=mop_r, r=R0, size=_SIZE)
2417  /// \param mba pointer to microcode
2418  /// \param loc location
2419  /// \param fullsize mba->fullsize
2420  /// Note: this function cannot handle scattered locations.
2421  /// \return success
2422  void hexapi create_from_vdloc(mbl_array_t *mba, const vdloc_t &loc, int _size);
2423 
2424  /// Create operand from scattered vdloc_t.
2425  /// Example: if LOC is (ALOC_DIST, {EAX.4, EDX.4}) and TYPE is _LARGE_INTEGER,
2426  /// our operand will be
2427  /// (t=mop_sc, scif={EAX.4, EDX.4})
2428  /// \param mba pointer to microcode
2429  /// \param name name of the operand, if available
2430  /// \param type type of the operand, must be present
2431  /// \param loc a scattered location
2432  /// \return success
2433  void hexapi create_from_scattered_vdloc(
2434  mbl_array_t *mba,
2435  const char *name,
2436  tinfo_t type,
2437  const vdloc_t &loc);
2438 
2439  /// Create operand from an instruction.
2440  /// This function creates a nested instruction that can be used as an operand.
2441  /// Example: if m="add x,y,z", our operand will be (t=mop_d,d=m).
2442  /// The destination operand of 'add' (z) is lost.
2443  /// \param m instruction to embed into operand. may not be NULL.
2444  void hexapi create_from_insn(const minsn_t *m);
2445 
2446  /// Create an integer constant operand.
2447  /// \param _value value to store in the operand
2448  /// \param _size size of the value in bytes (1,2,4,8)
2449  /// \param _ea address of the processor instruction that made the value
2450  /// \param opnum operand number of the processor instruction
2451  void hexapi make_number(uint64 _value, int _size, ea_t _ea=BADADDR, int opnum=0);
2452 
2453  /// Create a floating point constant operand.
2454  /// \param bytes pointer to the floating point value as used by the current
2455  /// processor (e.g. for x86 it must be in IEEE 754)
2456  /// \param _size number of bytes occupied by the constant.
2457  /// \return success
2458  bool hexapi make_fpnum(const void *bytes, size_t _size);
2459 
2460  /// Create a register operand without erasing previous data.
2461  /// \param reg micro register number
2462  /// Note: this function does not erase the previous contents of the operand;
2463  /// call erase() if necessary
2464  void _make_reg(mreg_t reg)
2465  {
2466  t = mop_r;
2467  r = reg;
2468  }
2469  void _make_reg(mreg_t reg, int _size)
2470  {
2471  t = mop_r;
2472  r = reg;
2473  size = _size;
2474  }
2475  /// Create a register operand.
2476  void make_reg(mreg_t reg) { erase(); _make_reg(reg); }
2477  void make_reg(mreg_t reg, int _size) { erase(); _make_reg(reg, _size); }
2478 
2479  /// Create a local variable operand.
2480  /// \param mba pointer to microcode
2481  /// \param idx index into mba->vars
2482  /// \param off offset from the beginning of the variable
2483  /// Note: this function does not erase the previous contents of the operand;
2484  /// call erase() if necessary
2485  void _make_lvar(mbl_array_t *mba, int idx, sval_t off=0)
2486  {
2487  t = mop_l;
2488  l = new lvar_ref_t(mba, idx, off);
2489  }
2490 
2491  /// Create a global variable operand without erasing previous data.
2492  /// \param ea address of the variable
2493  /// Note: this function does not erase the previous contents of the operand;
2494  /// call erase() if necessary
2495  void _make_gvar(ea_t ea)
2496  {
2497  t = mop_v;
2498  g = ea;
2499  }
2500  /// Create a global variable operand.
2501  void make_gvar(ea_t ea) { erase(); _make_gvar(ea); }
2502 
2503  /// Create a stack variable operand.
2504  /// \param mba pointer to microcode
2505  /// \param off decompiler stkoff
2506  /// Note: this function does not erase the previous contents of the operand;
2507  /// call erase() if necessary
2508  void _make_stkvar(mbl_array_t *mba, sval_t off)
2509  {
2510  t = mop_S;
2511  s = new stkvar_ref_t(mba, off);
2512  }
2513 
2514  /// Create pair of registers.
2515  /// \param loreg register holding the low part of the value
2516  /// \param hireg register holding the high part of the value
2517  /// \param halfsize the size of each of loreg/hireg
2518  void hexapi make_reg_pair(int loreg, int hireg, int halfsize);
2519 
2520  /// Create a nested instruction without erasing previous data.
2521  /// \param ea address of the nested instruction
2522  /// Note: this function does not erase the previous contents of the operand;
2523  /// call erase() if necessary
2524  /// See also create_from_insn, which is higher level
2525  void _make_insn(minsn_t *ins);
2526  /// Create a nested instruction.
2527  void make_insn(minsn_t *ins) { erase(); _make_insn(ins); }
2528 
2529  /// Create a block reference operand without erasing previous data.
2530  /// \param blknum block number
2531  /// Note: this function does not erase the previous contents of the operand;
2532  /// call erase() if necessary
2533  void _make_blkref(int blknum)
2534  {
2535  t = mop_b;
2536  b = blknum;
2537  }
2538  /// Create a global variable operand.
2539  void make_blkref(int blknum) { erase(); _make_blkref(blknum); }
2540 
2541  /// Create a helper operand.
2542  /// A helper operand usually keeps a built-in function name like "va_start"
2543  /// It is essentially just an arbitrary identifier without any additional info.
2544  void hexapi make_helper(const char *name);
2545 
2546  /// Create a constant string operand.
2547  void _make_strlit(const char *str)
2548  {
2549  t = mop_str;
2550  cstr = ::qstrdup(str);
2551  }
2552  void _make_strlit(qstring *str) // str is consumed
2553  {
2554  t = mop_str;
2555  cstr = str->extract();
2556  }
2557 
2558  /// Create a call info operand without erasing previous data.
2559  /// \param fi callinfo
2560  /// Note: this function does not erase the previous contents of the operand;
2561  /// call erase() if necessary
2563  {
2564  t = mop_f;
2565  f = fi;
2566  }
2567 
2568  /// Create a 'switch cases' operand without erasing previous data.
2569  /// Note: this function does not erase the previous contents of the operand;
2570  /// call erase() if necessary
2571  void _make_cases(mcases_t *_cases)
2572  {
2573  t = mop_c;
2574  c = _cases;
2575  }
2576 
2577  /// Create a pair operand without erasing previous data.
2578  /// Note: this function does not erase the previous contents of the operand;
2579  /// call erase() if necessary
2580  void _make_pair(mop_pair_t *_pair)
2581  {
2582  t = mop_p;
2583  pair = _pair;
2584  }
2585 
2586  //-----------------------------------------------------------------------
2587  // Various operand tests
2588  //-----------------------------------------------------------------------
2589  bool empty(void) const { return t == mop_z; }
2590  /// Is a register operand?
2591  /// See also get_mreg_name()
2592  bool is_reg(void) const { return t == mop_r; }
2593  /// Is the specified register?
2594  bool is_reg(mreg_t _r) const { return t == mop_r && r == _r; }
2595  /// Is the specified register of the specified size?
2596  bool is_reg(mreg_t _r, int _size) const { return t == mop_r && r == _r && size == _size; }
2597  /// Is a list of arguments?
2598  bool is_arglist(void) const { return t == mop_f; }
2599  /// Is a condition code?
2600  bool is_cc(void) const { return is_reg() && r >= mr_cf && r < mr_first; }
2601  /// Is a bit register?
2602  /// This includes condition codes and eventually other bit registers
2603  static bool hexapi is_bit_reg(mreg_t reg);
2604  bool is_bit_reg(void) const { return is_reg() && is_bit_reg(r); }
2605  /// Is a kernel register?
2606  bool is_kreg(void) const;
2607  /// Is a block reference to the specified block?
2608  bool is_mob(int serial) const { return t == mop_b && b == serial; }
2609  /// Is a scattered operand?
2610  bool is_scattered(void) const { return t == mop_sc; }
2611  /// Is address of a global memory cell?
2612  bool is_glbaddr() const;
2613  /// Is address of the specified global memory cell?
2614  bool is_glbaddr(ea_t ea) const;
2615  /// Is address of a stack variable?
2616  bool is_stkaddr() const;
2617  /// Is a sub-instruction?
2618  bool is_insn(void) const { return t == mop_d; }
2619  /// Is a sub-instruction with the specified opcode?
2620  bool is_insn(mcode_t code) const;
2621  /// Has any side effects?
2622  /// \param include_ldx_and_divs consider ldx/div/mod as having side effects?
2623  bool has_side_effects(bool include_ldx_and_divs=false) const;
2624  /// Is it possible for the operand to use aliased memory?
2625  bool hexapi may_use_aliased_memory(void) const;
2626 
2627  /// Are the possible values of the operand only 0 and 1?
2628  /// This function returns true for 0/1 constants, bit registers,
2629  /// the result of 'set' insns, etc.
2630  bool hexapi is01(void) const;
2631 
2632  /// Does the high part of the operand consist of the sign bytes?
2633  /// \param nbytes number of bytes that were sign extended.
2634  /// the remaining size-nbytes high bytes must be sign bytes
2635  /// Example: is_sign_extended_from(xds.4(op.1), 1) -> true
2636  /// because the high 3 bytes are certainly sign bits
2637  bool hexapi is_sign_extended_from(int nbytes) const;
2638 
2639  /// Does the high part of the operand consist of zero bytes?
2640  /// \param nbytes number of bytes that were zero extended.
2641  /// the remaining size-nbytes high bytes must be zero
2642  /// Example: is_zero_extended_from(xdu.8(op.1), 2) -> true
2643  /// because the high 6 bytes are certainly zero
2644  bool hexapi is_zero_extended_from(int nbytes) const;
2645 
2646  /// Does the high part of the operand consist of zero or sign bytes?
2647  bool is_extended_from(int nbytes, bool is_signed) const
2648  {
2649  if ( is_signed )
2650  return is_sign_extended_from(nbytes);
2651  else
2652  return is_zero_extended_from(nbytes);
2653  }
2654 
2655  //-----------------------------------------------------------------------
2656  // Comparisons
2657  //-----------------------------------------------------------------------
2658  /// Compare operands.
2659  /// This is the main comparison function for operands.
2660  /// \param rop operand to compare with
2661  /// \param eqflags combination of \ref EQ_ bits
2662  bool hexapi equal_mops(const mop_t &rop, int eqflags) const;
2663  bool operator==(const mop_t &rop) const { return equal_mops(rop, 0); }
2664  bool operator!=(const mop_t &rop) const { return !equal_mops(rop, 0); }
2665 
2666  /// Lexographical operand comparison.
2667  /// It can be used to store mop_t in various containers, like std::set
2668  bool operator <(const mop_t &rop) const { return lexcompare(rop) < 0; }
2669  friend int lexcompare(const mop_t &a, const mop_t &b) { return a.lexcompare(b); }
2670  int hexapi lexcompare(const mop_t &rop) const;
2671 
2672  //-----------------------------------------------------------------------
2673  // Visiting operand parts
2674  //-----------------------------------------------------------------------
2675  /// Visit the operand and all its sub-operands.
2676  /// This function visits the current operand as well.
2677  /// \param mv visitor object
2678  /// \param type operand type
2679  /// \param is_target is a destination operand?
2680  int hexapi for_all_ops(
2681  mop_visitor_t &mv,
2682  const tinfo_t *type=NULL,
2683  bool is_target=false);
2684 
2685  /// Visit all sub-operands of a scattered operand.
2686  /// This function does not visit the current operand, only its sub-operands.
2687  /// All sub-operands are synthetic and are destroyed after the visitor.
2688  /// This function works only with scattered operands.
2689  /// \param sv visitor object
2690  int hexapi for_all_scattered_submops(scif_visitor_t &sv) const;
2691 
2692  //-----------------------------------------------------------------------
2693  // Working with mop_n operands
2694  //-----------------------------------------------------------------------
2695  /// Retrieve value of a constant integer operand.
2696  /// These functions can be called only for mop_n operands.
2697  /// See is_constant() that can be called on any operand.
2698  uint64 value(bool is_signed) const { return extend_sign(nnn->value, size, is_signed); }
2699  int64 signed_value(void) const { return value(true); }
2700  uint64 unsigned_value(void) const { return value(false); }
2701 
2702  /// Retrieve value of a constant integer operand.
2703  /// \param out pointer to the output buffer
2704  /// \param is_signed should treat the value as signed
2705  /// \return true if the operand is mop_n
2706  bool hexapi is_constant(uint64 *out=NULL, bool is_signed=true) const;
2707 
2708  bool is_equal_to(uint64 n, bool is_signed=true) const
2709  {
2710  uint64 v;
2711  return is_constant(&v, is_signed) && v == n;
2712  }
2713  bool is_zero(void) const { return is_equal_to(0, false); }
2714  bool is_one(void) const { return is_equal_to(1, false); }
2715  bool is_positive_constant(void) const
2716  {
2717  uint64 v;
2718  return is_constant(&v, true) && int64(v) > 0;
2719  }
2720  bool is_negative_constant(void) const
2721  {
2722  uint64 v;
2723  return is_constant(&v, true) && int64(v) < 0;
2724  }
2725 
2726  //-----------------------------------------------------------------------
2727  // Working with mop_S operands
2728  //-----------------------------------------------------------------------
2729  /// Retrieve the referenced stack variable.
2730  /// \param p_off if specified, will hold IDA stkoff after the call.
2731  /// \return pointer to the stack variable
2732  member_t *get_stkvar(uval_t *p_off) const { return s->get_stkvar(p_off); }
2733 
2734  /// Get the referenced stack offset.
2735  /// This function can also handle mop_sc if it is entirely mapped into
2736  /// a continuous stack region.
2737  /// \param p_off the output buffer
2738  /// \return success
2739  bool hexapi get_stkoff(sval_t *p_off) const;
2740 
2741  //-----------------------------------------------------------------------
2742  // Working with mop_d operands
2743  //-----------------------------------------------------------------------
2744  /// Get subinstruction of the operand.
2745  /// If the operand has a subinstruction with the specified opcode, return it.
2746  /// \param code desired opcode
2747  /// \return pointer to the instruction or NULL
2748  const minsn_t *get_insn(mcode_t code) const;
2749  minsn_t *get_insn(mcode_t code);
2750 
2751  //-----------------------------------------------------------------------
2752  // Transforming operands
2753  //-----------------------------------------------------------------------
2754  /// Make the low part of the operand.
2755  /// This function takes into account the memory endianness (byte sex)
2756  /// \param width the desired size of the operand part in bytes
2757  /// \return success
2758  bool hexapi make_low_half(int width);
2759 
2760  /// Make the high part of the operand.
2761  /// This function takes into account the memory endianness (byte sex)
2762  /// \param width the desired size of the operand part in bytes
2763  /// \return success
2764  bool hexapi make_high_half(int width);
2765 
2766  /// Make the first part of the operand.
2767  /// This function does not care about the memory endianness
2768  /// \param width the desired size of the operand part in bytes
2769  /// \return success
2770  bool hexapi make_first_half(int width);
2771 
2772  /// Make the second part of the operand.
2773  /// This function does not care about the memory endianness
2774  /// \param width the desired size of the operand part in bytes
2775  /// \return success
2776  bool hexapi make_second_half(int width);
2777 
2778  /// Shift the operand.
2779  /// This function shifts only the beginning of the operand.
2780  /// The operand size will be changed.
2781  /// Examples: shift_mop(AH.1, -1) -> AX.2
2782  /// shift_mop(qword_00000008.8, 4) -> dword_0000000C.4
2783  /// shift_mop(xdu.8(op.4), 4) -> #0.4
2784  /// shift_mop(#0x12345678.4, 3) -> #12.1
2785  /// \param offset shift count (the number of bytes to shift)
2786  /// \return success
2787  bool hexapi shift_mop(int offset);
2788 
2789  /// Change the operand size.
2790  /// Examples: change_size(AL.1, 2) -> AX.2
2791  /// change_size(qword_00000008.8, 4) -> dword_00000008.4
2792  /// change_size(xdu.8(op.4), 4) -> op.4
2793  /// change_size(#0x12345678.4, 1) -> #0x78.1
2794  /// \param nsize new operand size
2795  /// \param sideff may modify the database because of the size change?
2796  /// \return success
2797  bool hexapi change_size(int nsize, side_effect_t sideff=WITH_SIDEFF);
2798  bool double_size(side_effect_t sideff=WITH_SIDEFF) { return change_size(size*2, sideff); }
2799 
2800  /// Move subinstructions with side effects out of the operand.
2801  /// If we decide to delete an instruction operand, it is a good idea to
2802  /// call this function. Alternatively we should skip such operands
2803  /// by calling mop_t::has_side_effects()
2804  /// For example, if we transform: jnz x, x, @blk => goto @blk
2805  /// then we must call this function before deleting the X operands.
2806  /// \param blk current block
2807  /// \param top top level instruction that contains our operand
2808  /// \param moved_calls pointer to the boolean that will track if all side
2809  /// effects get handled correctly. must be false initially.
2810  /// \return false failed to preserve a side effect, it is not safe to
2811  /// delete the operand
2812  /// true no side effects or successfully preserved them
2813  bool hexapi preserve_side_effects(
2814  mblock_t *blk,
2815  minsn_t *top,
2816  bool *moved_calls=NULL);
2817 
2818  /// Apply a unary opcode to the operand.
2819  /// \param mcode opcode to apply. it must accept 'l' and 'd' operands
2820  /// but not 'r'. examples: m_low/m_high/m_xds/m_xdu
2821  /// \param ea value of minsn_t::ea for the newly created insruction
2822  /// \param newsize new operand size
2823  /// Example: apply_ld_mcode(m_low) will convert op => low(op)
2824  void hexapi apply_ld_mcode(mcode_t mcode, ea_t ea, int newsize);
2825  void apply_xdu(ea_t ea, int newsize) { apply_ld_mcode(m_xdu, ea, newsize); }
2826  void apply_xds(ea_t ea, int newsize) { apply_ld_mcode(m_xds, ea, newsize); }
2827 };
2828 DECLARE_TYPE_AS_MOVABLE(mop_t);
2829 
2830 /// Pair of operands
2832 {
2833 public:
2834  mop_t lop; ///< low operand
2835  mop_t hop; ///< high operand
2836  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
2837 };
2838 
2839 /// Address of an operand (mop_l, mop_v, mop_S, mop_r)
2840 class mop_addr_t : public mop_t
2841 {
2842 public:
2843  int insize; // how many bytes of the pointed operand can be read
2844  int outsize; // how many bytes of the pointed operand can be written
2845 
2846  mop_addr_t(): insize(NOSIZE), outsize(NOSIZE) {}
2847  mop_addr_t(const mop_addr_t &ra)
2848  : mop_t(ra), insize(ra.insize), outsize(ra.outsize) {}
2849  mop_addr_t(const mop_t &ra, int isz, int osz)
2850  : mop_t(ra), insize(isz), outsize(osz) {}
2851 
2852  mop_addr_t &operator=(const mop_addr_t &rop)
2853  {
2854  *(mop_t *)this = mop_t(rop);
2855  insize = rop.insize;
2856  outsize = rop.outsize;
2857  return *this;
2858  }
2859  int lexcompare(const mop_addr_t &ra) const
2860  {
2861  int code = mop_t::lexcompare(ra);
2862  return code != 0 ? code
2863  : insize != ra.insize ? (insize-ra.insize)
2864  : outsize != ra.outsize ? (outsize-ra.outsize)
2865  : 0;
2866  }
2867 };
2868 
2869 /// A call argument
2870 class mcallarg_t : public mop_t // #callarg
2871 {
2872 public:
2873  ea_t ea; ///< address where the argument was initialized.
2874  ///< BADADDR means unknown.
2875  tinfo_t type; ///< formal argument type
2876  qstring name; ///< formal argument name
2877  argloc_t argloc; ///< ida argloc
2878 
2879  mcallarg_t(void) : ea(BADADDR) {}
2880  mcallarg_t(const mop_t &rarg) : mop_t(rarg), ea(BADADDR) {}
2881  void copy_mop(const mop_t &op) { *(mop_t *)this = op; }
2882  void hexapi print(qstring *vout, int shins_flags=SHINS_SHORT|SHINS_VALNUM) const;
2883  const char *hexapi dstr(void) const;
2884  void hexapi set_regarg(mreg_t mr, int sz, const tinfo_t &tif);
2885  void set_regarg(mreg_t mr, const tinfo_t &tif)
2886  {
2887  set_regarg(mr, tif.get_size(), tif);
2888  }
2889  void set_regarg(mreg_t mr, char dt, type_sign_t sign = type_unsigned)
2890  {
2891  int sz = get_dtype_size(dt);
2892  set_regarg(mr, sz, get_int_type_by_width_and_sign(sz, sign));
2893  }
2894  void make_int(int val, ea_t val_ea, int opno = 0)
2895  {
2896  type = tinfo_t(BTF_INT);
2897  make_number(val, inf_get_cc_size_i(), val_ea, opno);
2898  }
2899  void make_uint(int val, ea_t val_ea, int opno = 0)
2900  {
2901  type = tinfo_t(BTF_UINT);
2902  make_number(val, inf_get_cc_size_i(), val_ea, opno);
2903  }
2904 };
2905 DECLARE_TYPE_AS_MOVABLE(mcallarg_t);
2906 typedef qvector<mcallarg_t> mcallargs_t;
2907 
2908 /// Function roles.
2909 /// They are used to calculate use/def lists and to recognize functions
2910 /// without using string comparisons.
2912 {
2913  ROLE_UNK, ///< unknown function role
2914  ROLE_EMPTY, ///< empty, does not do anything (maybe spoils regs)
2915  ROLE_MEMSET, ///< memset(void *dst, uchar value, size_t count);
2916  ROLE_MEMSET32, ///< memset32(void *dst, uint32 value, size_t count);
2917  ROLE_MEMSET64, ///< memset32(void *dst, uint64 value, size_t count);
2918  ROLE_MEMCPY, ///< memcpy(void *dst, const void *src, size_t count);
2919  ROLE_STRCPY, ///< strcpy(char *dst, const char *src);
2920  ROLE_STRLEN, ///< strlen(const char *src);
2921  ROLE_STRCAT, ///< strcat(char *dst, const char *src);
2922  ROLE_TAIL, ///< char *tail(const char *str);
2923  ROLE_BUG, ///< BUG() helper macro: never returns, causes exception
2924  ROLE_ALLOCA, ///< alloca() function
2925  ROLE_BSWAP, ///< bswap() function (any size)
2926  ROLE_PRESENT, ///< present() function (used in patterns)
2927  ROLE_CONTAINING_RECORD, ///< CONTAINING_RECORD() macro
2928  ROLE_FASTFAIL, ///< __fastfail()
2929  ROLE_READFLAGS, ///< __readeflags, __readcallersflags
2930  ROLE_IS_MUL_OK, ///< is_mul_ok
2931  ROLE_SATURATED_MUL, ///< saturated_mul
2932  ROLE_BITTEST, ///< [lock] bt
2933  ROLE_BITTESTANDSET, ///< [lock] bts
2934  ROLE_BITTESTANDRESET, ///< [lock] btr
2935  ROLE_BITTESTANDCOMPLEMENT, ///< [lock] btc
2936  ROLE_VA_ARG, ///< va_arg() macro
2937  ROLE_VA_COPY, ///< va_copy() function
2938  ROLE_VA_START, ///< va_start() function
2939  ROLE_VA_END, ///< va_end() function
2940  ROLE_ROL, ///< rotate left
2941  ROLE_ROR, ///< rotate right
2942  ROLE_CFSUB3, ///< carry flag after subtract with carry
2943  ROLE_OFSUB3, ///< overflow flag after subtract with carry
2944  ROLE_ABS, ///< integer absolute value
2945 };
2946 
2947 /// \defgroup FUNC_NAME_ Well known function names
2948 //@{
2949 #define FUNC_NAME_MEMCPY "memcpy"
2950 #define FUNC_NAME_MEMSET "memset"
2951 #define FUNC_NAME_MEMSET32 "memset32"
2952 #define FUNC_NAME_MEMSET64 "memset64"
2953 #define FUNC_NAME_STRCPY "strcpy"
2954 #define FUNC_NAME_STRLEN "strlen"
2955 #define FUNC_NAME_STRCAT "strcat"
2956 #define FUNC_NAME_TAIL "tail"
2957 #define FUNC_NAME_VA_ARG "va_arg"
2958 #define FUNC_NAME_EMPTY "$empty"
2959 #define FUNC_NAME_PRESENT "$present"
2960 #define FUNC_NAME_CONTAINING_RECORD "CONTAINING_RECORD"
2961 //@}
2962 
2963 
2964 // the default 256 function arguments is too big, we use a lower value
2965 #undef MAX_FUNC_ARGS
2966 #define MAX_FUNC_ARGS 64
2967 
2968 /// Information about a call
2969 class mcallinfo_t // #callinfo
2970 {
2971 public:
2972  ea_t callee; ///< address of the called function, if known
2973  int solid_args; ///< number of solid args.
2974  ///< there may be variadic args in addtion
2975  int call_spd; ///< sp value at call insn
2976  int stkargs_top; ///< first offset past stack arguments
2977  cm_t cc; ///< calling convention
2978  mcallargs_t args; ///< call arguments
2979  mopvec_t retregs; ///< return register(s) (e.g., AX, AX:DX, etc.)
2980  ///< this vector is built from return_regs
2981  tinfo_t return_type; ///< type of the returned value
2982  argloc_t return_argloc; ///< location of the returned value
2983 
2984  mlist_t return_regs; ///< list of values returned by the function
2985  mlist_t spoiled; ///< list of spoiled locations (includes return_regs)
2986  mlist_t pass_regs; ///< passthrough registers: registers that depend on input
2987  ///< values (subset of spoiled)
2988  ivlset_t visible_memory; ///< what memory is visible to the call?
2989  mlist_t dead_regs; ///< registers defined by the function but never used.
2990  ///< upon propagation we do the following:
2991  ///< - dead_regs += return_regs
2992  ///< - retregs.clear() since the call is propagated
2993  int flags; ///< combination of \ref FCI_... bits
2994 /// \defgroup FCI_ Call properties
2995 //@{
2996 #define FCI_PROP 0x001 ///< call has been propagated
2997 #define FCI_DEAD 0x002 ///< some return registers were determined dead
2998 #define FCI_FINAL 0x004 ///< call type is final, should not be changed
2999 #define FCI_NORET 0x008 ///< call does not return
3000 #define FCI_PURE 0x010 ///< pure function
3001 #define FCI_NOSIDE 0x020 ///< call does not have side effects
3002 #define FCI_SPLOK 0x040 ///< spoiled/visible_memory lists have been
3003  ///< optimized. for some functions we can reduce them
3004  ///< as soon as information about the arguments becomes
3005  ///< available. in order not to try optimize them again
3006  ///< we use this bit.
3007 #define FCI_HASCALL 0x080 ///< A function is an synthetic helper combined
3008  ///< from several instructions and at least one
3009  ///< of them was a call to a real functions
3010 #define FCI_HASFMT 0x100 ///< A variadic function with recognized
3011  ///< printf- or scanf-style format string
3012 //@}
3013  funcrole_t role; ///< function role
3014  type_attrs_t fti_attrs; ///< extended function attributes
3015 
3016  mcallinfo_t(ea_t _callee=BADADDR, int _sargs=0)
3017  : callee(_callee), solid_args(_sargs), call_spd(0), stkargs_top(0),
3018  cc(CM_CC_INVALID), flags(0), role(ROLE_UNK) {}
3019  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
3020  int hexapi lexcompare(const mcallinfo_t &f) const;
3021  bool hexapi set_type(const tinfo_t &type);
3022  tinfo_t hexapi get_type(void) const;
3023  bool is_vararg(void) const { return is_vararg_cc(cc); }
3024  void hexapi print(qstring *vout, int size=-1, int shins_flags=SHINS_SHORT|SHINS_VALNUM) const;
3025  const char *hexapi dstr(void) const;
3026 };
3027 
3028 /// List of switch cases and targets
3029 class mcases_t // #cases
3030 {
3031 public:
3032  casevec_t values; ///< expression values for each target
3033  intvec_t targets; ///< target block numbers
3034 
3035  void swap(mcases_t &r) { values.swap(r.values); targets.swap(r.targets); }
3036  DECLARE_COMPARISONS(mcases_t);
3037  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
3038  bool empty(void) const { return targets.empty(); }
3039  size_t size(void) const { return targets.size(); }
3040  void resize(int s) { values.resize(s); targets.resize(s); }
3041  void hexapi print(qstring *vout) const;
3042  const char *hexapi dstr(void) const;
3043 };
3044 
3045 //-------------------------------------------------------------------------
3046 /// Value offset (microregister number or stack offset)
3047 struct voff_t
3048 {
3049  sval_t off; ///< register number or stack offset
3050  mopt_t type; ///< mop_r - register, mop_S - stack, mop_z - undefined
3051 
3052  voff_t() : off(-1), type(mop_z) {}
3053  voff_t(mopt_t _type, sval_t _off) : off(_off), type(_type) {}
3054  voff_t(const mop_t &op) : off(-1), type(mop_z)
3055  {
3056  if ( op.is_reg() || op.t == mop_S )
3057  set(op.t, op.is_reg() ? op.r : op.s->off);
3058  }
3059 
3060  void set(mopt_t _type, sval_t _off) { type = _type; off = _off; }
3061  void set_stkoff(sval_t stkoff) { set(mop_S, stkoff); }
3062  void set_reg (mreg_t mreg) { set(mop_r, mreg); }
3063  void undef() { set(mop_z, -1); }
3064 
3065  bool defined() const { return type != mop_z; }
3066  bool is_reg() const { return type == mop_r; }
3067  bool is_stkoff() const { return type == mop_S; }
3068  mreg_t get_reg() const { QASSERT(51892, is_reg()); return off; }
3069  sval_t get_stkoff() const { QASSERT(51893, is_stkoff()); return off; }
3070 
3071  void inc(sval_t delta) { off += delta; }
3072  voff_t add(int width) const { return voff_t(type, off+width); }
3073  sval_t diff(const voff_t &r) const { QASSERT(51894, type == r.type); return off - r.off; }
3074 
3075 
3076  DECLARE_COMPARISONS(voff_t)
3077  {
3078  int code = ::compare(type, r.type);
3079  return code != 0 ? code : ::compare(off, r.off);
3080  }
3081 };
3082 
3083 //-------------------------------------------------------------------------
3084 /// Value interval (register or stack range)
3085 struct vivl_t : voff_t
3086 {
3087  int size; ///< Interval size in bytes
3088 
3089  vivl_t(mopt_t _type = mop_z, sval_t _off = -1, int _size = 0)
3090  : voff_t(_type, _off), size(_size) {}
3091  vivl_t(const class chain_t &ch);
3092  vivl_t(const mop_t &op) : voff_t(op), size(op.size) {}
3093 
3094  // Make a value interval
3095  void set(mopt_t _type, sval_t _off, int _size = 0)
3096  { voff_t::set(_type, _off); size = _size; }
3097  void set(const voff_t &voff, int _size)
3098  { set(voff.type, voff.off, _size); }
3099  void set_stkoff(sval_t stkoff, int sz = 0) { set(mop_S, stkoff, sz); }
3100  void set_reg (mreg_t mreg, int sz = 0) { set(mop_r, mreg, sz); }
3101 
3102  /// Extend a value interval using another value interval of the same type
3103  /// \return success
3104  bool hexapi extend_to_cover(const vivl_t &r);
3105 
3106  /// Intersect value intervals the same type
3107  /// \return size of the resulting intersection
3108  uval_t hexapi intersect(const vivl_t &r);
3109 
3110  /// Do two value intervals overlap?
3111  bool overlap(const vivl_t &r) const
3112  {
3113  return type == r.type
3114  && interval::overlap(off, size, r.off, r.size);
3115  }
3116  /// Does our value interval include another?
3117  bool includes(const vivl_t &r) const
3118  {
3119  return type == r.type
3120  && interval::includes(off, size, r.off, r.size);
3121  }
3122 
3123  /// Does our value interval contain the specified value offset?
3124  bool contains(const voff_t &voff2) const
3125  {
3126  return type == voff2.type
3127  && interval::contains(off, size, voff2.off);
3128  }
3129 
3130  // Comparisons
3131  DECLARE_COMPARISONS(vivl_t)
3132  {
3133  int code = voff_t::compare(r);
3134  return code; //return code != 0 ? code : ::compare(size, r.size);
3135  }
3136  bool operator==(const mop_t &mop) const
3137  {
3138  return type == mop.t && off == (mop.is_reg() ? mop.r : mop.s->off);
3139  }
3140  void hexapi print(qstring *vout) const;
3141  const char *hexapi dstr(void) const;
3142 };
3143 
3144 //-------------------------------------------------------------------------
3145 /// ud (use->def) and du (def->use) chain.
3146 /// We store in chains only the block numbers, not individual instructions
3147 /// See https://en.wikipedia.org/wiki/Use-define_chain
3148 class chain_t : public intvec_t // sequence of block numbers
3149 {
3150  voff_t k; ///< Value offset of the chain.
3151  ///< (what variable is this chain about)
3152 
3153 public:
3154  int width; ///< size of the value in bytes
3155  int varnum; ///< allocated variable index (-1 - not allocated yet)
3156  uchar flags; ///< combination \ref CHF_ bits
3157 /// \defgroup CHF_ Chain properties
3158 //@{
3159 #define CHF_INITED 0x01 ///< is chain initialized? (valid only after lvar allocation)
3160 #define CHF_REPLACED 0x02 ///< chain operands have been replaced?
3161 #define CHF_OVER 0x04 ///< overlapped chain
3162 #define CHF_FAKE 0x08 ///< fake chain created by widen_chains()
3163 #define CHF_PASSTHRU 0x10 ///< pass-thru chain, must use the input variable to the block
3164 #define CHF_TERM 0x20 ///< terminating chain; the variable does not survive across the block
3165 //@}
3166  chain_t() : width(0), varnum(-1), flags(CHF_INITED) {}
3167  chain_t(mopt_t t, sval_t off, int w=1, int v=-1)
3168  : k(t, off), width(w), varnum(v), flags(CHF_INITED) {}
3169  chain_t(const voff_t &_k, int w=1)
3170  : k(_k), width(w), varnum(-1), flags(CHF_INITED) {}
3171  void set_value(const chain_t &r)
3172  { width = r.width; varnum = r.varnum; flags = r.flags; *(intvec_t *)this = (intvec_t &)r; }
3173  const voff_t &key() const { return k; }
3174  bool is_inited(void) const { return (flags & CHF_INITED) != 0; }
3175  bool is_reg(void) const { return k.is_reg(); }
3176  bool is_stkoff(void) const { return k.is_stkoff(); }
3177  bool is_replaced(void) const { return (flags & CHF_REPLACED) != 0; }
3178  bool is_overlapped(void) const { return (flags & CHF_OVER) != 0; }
3179  bool is_fake(void) const { return (flags & CHF_FAKE) != 0; }
3180  bool is_passreg(void) const { return (flags & CHF_PASSTHRU) != 0; }
3181  bool is_term(void) const { return (flags & CHF_TERM) != 0; }
3182  void set_inited(bool b) { setflag(flags, CHF_INITED, b); }
3183  void set_replaced(bool b) { setflag(flags, CHF_REPLACED, b); }
3184  void set_overlapped(bool b) { setflag(flags, CHF_OVER, b); }
3185  void set_term(bool b) { setflag(flags, CHF_TERM, b); }
3186  mreg_t get_reg() const { return k.get_reg(); }
3187  sval_t get_stkoff() const { return k.get_stkoff(); }
3188  bool overlap(const chain_t &r) const
3189  { return k.type == r.k.type && interval::overlap(k.off, width, r.k.off, r.width); }
3190  bool includes(const chain_t &r) const
3191  { return k.type == r.k.type && interval::includes(k.off, width, r.k.off, r.width); }
3192  const voff_t endoff() const { return k.add(width); }
3193 
3194  bool operator<(const chain_t &r) const { return key() < r.key(); }
3195 
3196  void hexapi print(qstring *vout) const;
3197  const char *hexapi dstr(void) const;
3198  /// Append the contents of the chain to the specified list of locations.
3199  void hexapi append_list(mlist_t *list) const;
3200  void clear_varnum(void) { varnum = -1; set_replaced(false); }
3201 };
3202 
3203 //-------------------------------------------------------------------------
3204 #if defined(__NT__)
3205 #define SIZEOF_BLOCK_CHAINS 24
3206 #elif defined(__MAC__)
3207 #define SIZEOF_BLOCK_CHAINS 32
3208 #else
3209 #define SIZEOF_BLOCK_CHAINS 56
3210 #endif
3211 /// Chains of one block.
3212 /// Please note that this class is based on std::map and it must be accessed
3213 /// using the block_chains_begin(), block_chains_find() and similar functions.
3214 /// This is required because different compilers use different implementations
3215 /// of std::map. However, since the size of std::map depends on the compilation
3216 /// options, we replace it with a byte array.
3218 {
3219  size_t body[SIZEOF_BLOCK_CHAINS/sizeof(size_t)]; // opaque std::set, uncopyable
3220 public:
3221 
3222  /// Get chain for the specified register
3223  /// \param reg register number
3224  /// \param width size of register in bytes
3225  const chain_t *get_reg_chain(mreg_t reg, int width=1) const
3226  { return get_chain((chain_t(mop_r, reg, width))); }
3227  chain_t *get_reg_chain(mreg_t reg, int width=1)
3228  { return get_chain((chain_t(mop_r, reg, width))); }
3229 
3230  /// Get chain for the specified stack offset
3231  /// \param off stack offset
3232  /// \param width size of stack value in bytes
3233  const chain_t *get_stk_chain(sval_t off, int width=1) const
3234  { return get_chain(chain_t(mop_S, off, width)); }
3235  chain_t *get_stk_chain(sval_t off, int width=1)
3236  { return get_chain(chain_t(mop_S, off, width)); }
3237 
3238  /// Get chain for the specified value offset.
3239  /// \param k value offset (register number or stack offset)
3240  /// \param width size of value in bytes
3241  const chain_t *get_chain(const voff_t &k, int width=1) const
3242  { return get_chain(chain_t(k, width)); }
3243  chain_t *get_chain(const voff_t &k, int width=1)
3244  { return (chain_t*)((const block_chains_t *)this)->get_chain(k, width); }
3245 
3246  /// Get chain similar to the specified chain
3247  /// \param ch chain to search for. only its 'k' and 'width' are used.
3248  const chain_t *hexapi get_chain(const chain_t &ch) const;
3249  chain_t *get_chain(const chain_t &ch)
3250  { return (chain_t*)((const block_chains_t *)this)->get_chain(ch); }
3251 
3252  void hexapi print(qstring *vout) const;
3253  const char *hexapi dstr(void) const;
3254  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
3255 };
3256 //-------------------------------------------------------------------------
3257 /// Chain visitor class
3259 {
3260  block_chains_t *parent; ///< parent of the current chain
3261  chain_visitor_t(void) : parent(NULL) {}
3262  virtual int idaapi visit_chain(int nblock, chain_t &ch) = 0;
3263 };
3264 
3265 //-------------------------------------------------------------------------
3266 /// Graph chains.
3267 /// This class represents all ud and du chains of the decompiled function
3268 typedef qvector<block_chains_t> block_chains_vec_t;
3269 class graph_chains_t : public block_chains_vec_t
3270 {
3271  int lock; ///< are chained locked? (in-use)
3272 public:
3273  graph_chains_t(void) : lock(0) {}
3274  ~graph_chains_t(void) { QASSERT(50444, !lock); }
3275  /// Visit all chains
3276  /// \param cv chain visitor
3277  /// \param gca_flags combination of GCA_ bits
3278  int hexapi for_all_chains(chain_visitor_t &cv, int gca_flags);
3279  /// \defgroup GCA_ chain visitor flags
3280  //@{
3281 #define GCA_EMPTY 0x01 ///< include empty chains
3282 #define GCA_SPEC 0x02 ///< include chains for special registers
3283 #define GCA_ALLOC 0x04 ///< enumerate only allocated chains
3284 #define GCA_NALLOC 0x08 ///< enumerate only non-allocated chains
3285 #define GCA_OFIRST 0x10 ///< consider only chains of the first block
3286 #define GCA_OLAST 0x20 ///< consider only chains of the last block
3287  //@}
3288  /// Are the chains locked?
3289  /// It is a good idea to lock the chains before using them. This ensures
3290  /// that they won't be recalculated and reallocated during the use.
3291  /// See the \ref chain_keeper_t class for that.
3292  bool is_locked(void) const { return lock != 0; }
3293  /// Lock the chains
3294  void acquire(void) { lock++; }
3295  /// Unlock the chains
3296  void hexapi release(void);
3297  void swap(graph_chains_t &r)
3298  {
3299  qvector<block_chains_t>::swap(r);
3300  std::swap(lock, r.lock);
3301  }
3302 };
3303 //-------------------------------------------------------------------------
3304 /// Microinstruction class #insn
3305 class minsn_t
3306 {
3307  void hexapi init(ea_t _ea);
3308  void hexapi copy(const minsn_t &m);
3309 public:
3310  mcode_t opcode; ///< instruction opcode
3311  int iprops; ///< combination of \ref IPROP_ bits
3312  minsn_t *next; ///< next insn in doubly linked list. check also nexti()
3313  minsn_t *prev; ///< prev insn in doubly linked list. check also previ()
3314  ea_t ea; ///< instruction address
3315  mop_t l; ///< left operand
3316  mop_t r; ///< right operand
3317  mop_t d; ///< destination operand
3318 
3319  /// \defgroup IPROP_ instruction property bits
3320  //@{
3321  // bits to be used in patterns:
3322 #define IPROP_OPTIONAL 0x0001 ///< optional instruction
3323 #define IPROP_PERSIST 0x0002 ///< persistent insn; they are not destroyed
3324 #define IPROP_WILDMATCH 0x0004 ///< match multiple insns
3325 
3326  // instruction attributes:
3327 #define IPROP_CLNPOP 0x0008 ///< the purpose of the instruction is to clean stack
3328  ///< (e.g. "pop ecx" is often used for that)
3329 #define IPROP_FPINSN 0x0010 ///< floating point insn
3330 #define IPROP_FARCALL 0x0020 ///< call of a far function using push cs/call sequence
3331 #define IPROP_TAILCALL 0x0040 ///< tail call
3332 #define IPROP_ASSERT 0x0080 ///< assertion: usually mov #val, op.
3333  ///< assertions are used to help the optimizer.
3334  ///< assertions are ignored when generating ctree
3335 
3336  // instruction history:
3337 #define IPROP_SPLIT 0x0700 ///< the instruction has been split:
3338 #define IPROP_SPLIT1 0x0100 ///< into 1 byte
3339 #define IPROP_SPLIT2 0x0200 ///< into 2 bytes
3340 #define IPROP_SPLIT4 0x0300 ///< into 4 bytes
3341 #define IPROP_SPLIT8 0x0400 ///< into 8 bytes
3342 #define IPROP_COMBINED 0x0800 ///< insn has been modified because of a partial reference
3343 #define IPROP_EXTSTX 0x1000 ///< this is m_ext propagated into m_stx
3344 #define IPROP_IGNLOWSRC 0x2000 ///< low part of the instruction source operand
3345  ///< has been created artificially
3346  ///< (this bit is used only for 'and x, 80...')
3347 #define IPROP_INV_JX 0x4000 ///< inverted conditional jump
3348 #define IPROP_WAS_NORET 0x8000 ///< was noret icall
3349 #define IPROP_MULTI_MOV 0x10000 ///< the minsn was generated as part of insn that moves multiple registers
3350  ///< (example: STM on ARM may transfer multiple registers)
3351 
3352  ///< bits that can be set by plugins:
3353 #define IPROP_DONT_PROP 0x20000 ///< may not propagate
3354 #define IPROP_DONT_COMB 0x40000 ///< may not combine this instruction with others
3355 #define IPROP_MBARRIER 0x80000 ///< this instruction acts as a memory barrier
3356  ///< (instructions accessing memory may not be reordered past it)
3357  //@}
3358 
3359  bool is_optional(void) const { return (iprops & IPROP_OPTIONAL) != 0; }
3360  bool is_combined(void) const { return (iprops & IPROP_COMBINED) != 0; }
3361  bool is_farcall(void) const { return (iprops & IPROP_FARCALL) != 0; }
3362  bool is_cleaning_pop(void) const { return (iprops & IPROP_CLNPOP) != 0; }
3363  bool is_extstx(void) const { return (iprops & IPROP_EXTSTX) != 0; }
3364  bool is_tailcall(void) const { return (iprops & IPROP_TAILCALL) != 0; }
3365  bool is_fpinsn(void) const { return (iprops & IPROP_FPINSN) != 0; }
3366  bool is_assert(void) const { return (iprops & IPROP_ASSERT) != 0; }
3367  bool is_persistent(void) const { return (iprops & IPROP_PERSIST) != 0; }
3368  bool is_wild_match(void) const { return (iprops & IPROP_WILDMATCH) != 0; }
3369  bool is_propagatable(void) const { return (iprops & IPROP_DONT_PROP) == 0; }
3370  bool is_ignlowsrc(void) const { return (iprops & IPROP_IGNLOWSRC) != 0; }
3371  bool is_inverted_jx(void) const { return (iprops & IPROP_INV_JX) != 0; }
3372  bool was_noret_icall(void) const { return (iprops & IPROP_WAS_NORET) != 0; }
3373  bool is_multimov(void) const { return (iprops & IPROP_MULTI_MOV) != 0; }
3374  bool is_combinable(void) const { return (iprops & IPROP_DONT_COMB) == 0; }
3375  bool was_split(void) const { return (iprops & IPROP_SPLIT) != 0; }
3376  bool is_mbarrier(void) const { return (iprops & IPROP_MBARRIER) != 0; }
3377 
3378  void set_optional(void) { iprops |= IPROP_OPTIONAL; }
3379  void set_combined(void);
3380  void clr_combined(void) { iprops &= ~IPROP_COMBINED; }
3381  void set_farcall(void) { iprops |= IPROP_FARCALL; }
3382  void set_cleaning_pop(void) { iprops |= IPROP_CLNPOP; }
3383  void set_extstx(void) { iprops |= IPROP_EXTSTX; }
3384  void set_tailcall(void) { iprops |= IPROP_TAILCALL; }
3385  void clr_tailcall(void) { iprops &= ~IPROP_TAILCALL; }
3386  void set_fpinsn(void) { iprops |= IPROP_FPINSN; }
3387  void clr_fpinsn(void) { iprops &= ~IPROP_FPINSN; }
3388  void set_assert(void) { iprops |= IPROP_ASSERT; }
3389  void clr_assert(void) { iprops &= ~IPROP_ASSERT; }
3390  void set_persistent(void) { iprops |= IPROP_PERSIST; }
3391  void set_wild_match(void) { iprops |= IPROP_WILDMATCH; }
3392  void clr_propagatable(void) { iprops |= IPROP_DONT_PROP; }
3393  void set_ignlowsrc(void) { iprops |= IPROP_IGNLOWSRC; }
3394  void clr_ignlowsrc(void) { iprops &= ~IPROP_IGNLOWSRC; }
3395  void set_inverted_jx(void) { iprops |= IPROP_INV_JX; }
3396  void set_noret_icall(void) { iprops |= IPROP_WAS_NORET; }
3397  void clr_noret_icall(void) { iprops &= ~IPROP_WAS_NORET; }
3398  void set_multimov(void) { iprops |= IPROP_MULTI_MOV; }
3399  void clr_multimov(void) { iprops &= ~IPROP_MULTI_MOV; }
3400  void set_combinable(void) { iprops &= ~IPROP_DONT_COMB; }
3401  void clr_combinable(void) { iprops |= IPROP_DONT_COMB; }
3402  void set_mbarrier(void) { iprops |= IPROP_MBARRIER; }
3403  void set_split_size(int s)
3404  { // s may be only 1,2,4,8. other values are ignored
3405  iprops &= ~IPROP_SPLIT;
3406  iprops |= (s == 1 ? IPROP_SPLIT1
3407  : s == 2 ? IPROP_SPLIT2
3408  : s == 4 ? IPROP_SPLIT4
3409  : s == 8 ? IPROP_SPLIT8 : 0);
3410  }
3411  int get_split_size(void) const
3412  {
3413  int cnt = (iprops & IPROP_SPLIT) >> 8;
3414  return cnt == 0 ? 0 : 1 << (cnt-1);
3415  }
3416 
3417  /// Constructor
3418  minsn_t(ea_t _ea) { init(_ea); }
3419  minsn_t(const minsn_t &m) { next = prev = NULL; copy(m); }
3420  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
3421 
3422  /// Assignment operator. It does not copy prev/next fields.
3423  minsn_t &operator=(const minsn_t &m) { copy(m); return *this; }
3424 
3425  /// Swap two instructions.
3426  /// The prev/next fields are not modified by this function
3427  /// because it would corrupt the doubly linked list.
3428  void hexapi swap(minsn_t &m);
3429 
3430  /// Generate insn text into the buffer
3431  void hexapi print(qstring *vout, int shins_flags=SHINS_SHORT|SHINS_VALNUM) const;
3432 
3433  /// Get displayable text without tags in a static buffer
3434  const char *hexapi dstr(void) const;
3435 
3436  /// Change the instruction address.
3437  /// This function modifies subinstructions as well.
3438  void hexapi setaddr(ea_t new_ea);
3439 
3440  /// Optimize one instruction without context.
3441  /// This function does not have access to the instruction context (the
3442  /// previous and next instructions in the list, the block number, etc).
3443  /// It performs only basic optimizations that are available without this info.
3444  /// \param optflags combination of \ref OPTI_ bits
3445  /// \return number of changes, 0-unchanged
3446  /// See also mblock_t::optimize_insn()
3447  int optimize_solo(int optflags=0) { return optimize_subtree(NULL, NULL, NULL, NULL, optflags); }
3448  /// \defgroup OPTI_ optimization flags
3449  //@{
3450 #define OPTI_ADDREXPRS 0x0001 ///< optimize all address expressions (&x+N; &x-&y)
3451 #define OPTI_MINSTKREF 0x0002 ///< may update minstkref
3452 #define OPTI_COMBINSNS 0x0004 ///< may combine insns (only for optimize_insn)
3453 #define OPTI_NO_LDXOPT 0x0008 ///< do not optimize low/high(ldx)
3454  //@}
3455 
3456  /// Optimize instruction in its context.
3457  /// Do not use this function, use mblock_t::optimize()
3458  int hexapi optimize_subtree(
3459  mblock_t *blk,
3460  minsn_t *top,
3461  minsn_t *parent,
3462  minsn_t **converted_call,
3463  int optflags=OPTI_MINSTKREF);
3464 
3465  /// Visit all instruction operands.
3466  /// This function visits subinstruction operands as well.
3467  /// \param mv operand visitor
3468  /// \return non-zero value returned by mv.visit_mop() or zero
3469  int hexapi for_all_ops(mop_visitor_t &mv);
3470 
3471  /// Visit all instructions.
3472  /// This function visits the instruction itself and all its subinstructions.
3473  /// \param mv instruction visitor
3474  /// \return non-zero value returned by mv.visit_mop() or zero
3475  int hexapi for_all_insns(minsn_visitor_t &mv);
3476 
3477  /// Convert instruction to nop.
3478  /// This function erases all info but the prev/next fields.
3479  /// In most cases it is better to use mblock_t::make_nop(), which also
3480  /// marks the block lists as dirty.
3481  void hexapi _make_nop(void);
3482 
3483  /// Compare instructions.
3484  /// This is the main comparison function for instructions.
3485  /// \param m instruction to compare with
3486  /// \param eqflags combination of \ref EQ_ bits
3487  bool hexapi equal_insns(const minsn_t &m, int eqflags) const; // intelligent comparison
3488  /// \defgroup EQ_ comparison bits
3489  //@{
3490 #define EQ_IGNSIZE 0x0001 ///< ignore operand sizes
3491 #define EQ_IGNCODE 0x0002 ///< ignore instruction opcodes
3492 #define EQ_CMPDEST 0x0004 ///< compare instruction destinations
3493 #define EQ_OPTINSN 0x0008 ///< optimize mop_d operands
3494  //@}
3495 
3496  /// Lexographical comparison
3497  /// It can be used to store minsn_t in various containers, like std::set
3498  bool operator <(const minsn_t &ri) const { return lexcompare(ri) < 0; }
3499  int hexapi lexcompare(const minsn_t &ri) const;
3500 
3501  //-----------------------------------------------------------------------
3502  // Call instructions
3503  //-----------------------------------------------------------------------
3504  /// Is a non-returing call?
3505  /// \param ignore_noret_icall if set, indirect calls to noret functions will
3506  /// return false
3507  bool hexapi is_noret_call(bool ignore_noret_icall=false);
3508 
3509  /// Is an unknown call?
3510  /// Unknown calls are resolved by mbl_array_t::analyze_calls()
3511  /// They exist until the MMAT_CALLS maturity level.
3512  /// See also \ref mblock_t::is_call_block
3513  bool is_unknown_call(void) const { return is_mcode_call(opcode) && d.empty(); }
3514 
3515  /// Is a helper call with the specified name?
3516  /// Helper calls usually have well-known function names (see \ref FUNC_NAME_)
3517  /// but they may have any other name. The decompiler does not assume any
3518  /// special meaning for non-well-known names.
3519  bool hexapi is_helper(const char *name) const;
3520 
3521  /// Find a call instruction.
3522  /// Check for the current instruction and its subinstructions.
3523  /// \param with_helpers consider helper calls as well?
3524  minsn_t *hexapi find_call(bool with_helpers=false) const;
3525 
3526  /// Does the instruction contain a call?
3527  bool contains_call(bool with_helpers=false) const { return find_call(with_helpers) != NULL; }
3528 
3529  /// Does the instruction have a side effect?
3530  /// \param include_ldx_and_divs consider ldx/div/mod as having side effects?
3531  /// stx is always considered as having side effects.
3532  /// Apart from ldx/std only call may have side effects.
3533  bool hexapi has_side_effects(bool include_ldx_and_divs=false) const;
3534 
3535  /// Get the function role of a call
3536  funcrole_t get_role(void) const { return d.is_arglist() ? d.f->role : ROLE_UNK; }
3537  bool is_memcpy(void) const { return get_role() == ROLE_MEMCPY; }
3538  bool is_memset(void) const { return get_role() == ROLE_MEMSET; }
3539  bool is_alloca(void) const { return get_role() == ROLE_ALLOCA; }
3540  bool is_bswap (void) const { return get_role() == ROLE_BSWAP; }
3541  bool is_readflags (void) const { return get_role() == ROLE_READFLAGS; }
3542 
3543  //-----------------------------------------------------------------------
3544  // Misc
3545  //-----------------------------------------------------------------------
3546  /// Does the instruction have the specified opcode?
3547  /// This function searches subinstructions as well.
3548  /// \param mcode opcode to search for.
3549  bool contains_opcode(mcode_t mcode) const { return find_opcode(mcode) != NULL; }
3550 
3551  /// Find a (sub)insruction with the specified opcode.
3552  /// \param mcode opcode to search for.
3553  const minsn_t *find_opcode(mcode_t mcode) const { return (CONST_CAST(minsn_t*)(this))->find_opcode(mcode); }
3554  minsn_t *hexapi find_opcode(mcode_t mcode);
3555 
3556  /// Find an operand that is a subinsruction with the specified opcode.
3557  /// This function checks only the 'l' and 'r' operands of the current insn.
3558  /// \param[out] other pointer to the other operand
3559  /// (&r if we return &l and vice versa)
3560  /// \param op opcode to search for
3561  /// \return &l or &r or NULL
3562  const minsn_t *hexapi find_ins_op(const mop_t **other, mcode_t op=m_nop) const;
3563  minsn_t *find_ins_op(mop_t **other, mcode_t op=m_nop) { return CONST_CAST(minsn_t*)((CONST_CAST(const minsn_t*)(this))->find_ins_op((const mop_t**)other, op)); }
3564 
3565  /// Find a numeric operand of the current instruction.
3566  /// This function checks only the 'l' and 'r' operands of the current insn.
3567  /// \param[out] other pointer to the other operand
3568  /// (&r if we return &l and vice versa)
3569  /// \return &l or &r or NULL
3570  const mop_t *hexapi find_num_op(const mop_t **other) const;
3571  mop_t *find_num_op(mop_t **other) { return CONST_CAST(mop_t*)((CONST_CAST(const minsn_t*)(this))->find_num_op((const mop_t**)other)); }
3572 
3573  bool is_mov(void) const { return opcode == m_mov || (opcode == m_f2f && l.size == d.size); }
3574  bool is_like_move(void) const { return is_mov() || is_mcode_xdsu(opcode) || opcode == m_low; }
3575 
3576  /// Does the instruction modify its 'd' operand?
3577  /// Some instructions (e.g. m_stx) do not modify the 'd' operand.
3578  bool hexapi modifes_d(void) const;
3579  bool modifies_pair_mop(void) const { return d.t == mop_p && modifes_d(); }
3580 
3581  /// Is the instruction in the specified range of instructions?
3582  /// \param m1 beginning of the range in the doubly linked list
3583  /// \param m2 end of the range in the doubly linked list (excluded, may be NULL)
3584  /// This function assumes that m1 and m2 belong to the same basic block
3585  /// and they are top level instructions.
3586  bool hexapi is_between(const minsn_t *m1, const minsn_t *m2) const;
3587 
3588  /// Is the instruction after the specified one?
3589  /// \param m the instruction to compare against in the list
3590  bool is_after(const minsn_t *m) const { return m != NULL && is_between(m->next, NULL); }
3591 
3592  /// Is it possible for the instruction to use aliased memory?
3593  bool hexapi may_use_aliased_memory(void) const;
3594 };
3595 
3596 /// Skip assertions forward
3597 const minsn_t *hexapi getf_reginsn(const minsn_t *ins);
3598 /// Skip assertions backward
3599 const minsn_t *hexapi getb_reginsn(const minsn_t *ins);
3600 inline minsn_t *getf_reginsn(minsn_t *ins) { return CONST_CAST(minsn_t*)(getf_reginsn(CONST_CAST(const minsn_t *)(ins))); }
3601 inline minsn_t *getb_reginsn(minsn_t *ins) { return CONST_CAST(minsn_t*)(getb_reginsn(CONST_CAST(const minsn_t *)(ins))); }
3602 
3603 //-------------------------------------------------------------------------
3604 /// Basic block types
3606 {
3607  BLT_NONE = 0, ///< unknown block type
3608  BLT_STOP = 1, ///< stops execution regularly (must be the last block)
3609  BLT_0WAY = 2, ///< does not have successors (tail is a noret function)
3610  BLT_1WAY = 3, ///< passes execution to one block (regular or goto block)
3611  BLT_2WAY = 4, ///< passes execution to two blocks (conditional jump)
3612  BLT_NWAY = 5, ///< passes execution to many blocks (switch idiom)
3613  BLT_XTRN = 6, ///< external block (out of function address)
3614 };
3615 
3616 // Maximal bit range
3617 #define MAXRANGE bitrange_t(0, USHRT_MAX)
3618 
3619 //-------------------------------------------------------------------------
3620 /// Microcode of one basic block.
3621 /// All blocks are part of a doubly linked list. They can also be addressed
3622 /// by indexing the mba->natural array. A block contains a doubly linked list
3623 /// of instructions, various location lists that are used for data flow
3624 /// analysis, and other attributes.
3626 {
3627  friend class codegen_t;
3628  DECLARE_UNCOPYABLE(mblock_t)
3629  void hexapi init(void);
3630 public:
3631  mblock_t *nextb; ///< next block in the doubly linked list
3632  mblock_t *prevb; ///< previous block in the doubly linked list
3633  uint32 flags; ///< combination of \ref MBL_ bits
3634  /// \defgroup MBL_ Basic block properties
3635  //@{
3636 #define MBL_PRIV 0x0001 ///< private block - no instructions except
3637  ///< the specified are accepted (used in patterns)
3638 #define MBL_NONFAKE 0x0000 ///< regular block
3639 #define MBL_FAKE 0x0002 ///< fake block (after a tail call)
3640 #define MBL_GOTO 0x0004 ///< this block is a goto target
3641 #define MBL_TCAL 0x0008 ///< aritifical call block for tail calls
3642 #define MBL_PUSH 0x0010 ///< needs "convert push/pop instructions"
3643 #define MBL_DMT64 0x0020 ///< needs "demote 64bits"
3644 #define MBL_COMB 0x0040 ///< needs "combine" pass
3645 #define MBL_PROP 0x0080 ///< needs 'propagation' pass
3646 #define MBL_DEAD 0x0100 ///< needs "eliminate deads" pass
3647 #define MBL_LIST 0x0200 ///< use/def lists are ready (not dirty)
3648 #define MBL_INCONST 0x0400 ///< inconsistent lists: we are building them
3649 #define MBL_CALL 0x0800 ///< call information has been built
3650 #define MBL_BACKPROP 0x1000 ///< performed backprop_cc
3651 #define MBL_NORET 0x2000 ///< dead end block: doesn't return execution control
3652 #define MBL_DSLOT 0x4000 ///< block for delay slot
3653 #define MBL_VALRANGES 0x8000 ///< should optimize using value ranges
3654  //@}
3655  ea_t start; ///< start address
3656  ea_t end; ///< end address
3657  ///< note: we cannot rely on start/end addresses
3658  ///< very much because instructions are
3659  ///< propagated between blocks
3660  minsn_t *head; ///< pointer to the first instruction of the block
3661  minsn_t *tail; ///< pointer to the last instruction of the block
3662  mbl_array_t *mba; ///< the parent micro block array
3663  int serial; ///< block number
3664  mblock_type_t type; ///< block type (BLT_NONE - not computed yet)
3665 
3666  mlist_t dead_at_start; ///< data that is dead at the block entry
3667  mlist_t mustbuse; ///< data that must be used by the block
3668  mlist_t maybuse; ///< data that may be used by the block
3669  mlist_t mustbdef; ///< data that must be defined by the block
3670  mlist_t maybdef; ///< data that may be defined by the block
3671  mlist_t dnu; ///< data that is defined but not used in the block
3672 
3673  sval_t maxbsp; ///< maximal sp value in the block (0...stacksize)
3674  sval_t minbstkref; ///< lowest stack location accessible with indirect
3675  ///< addressing (offset from the stack bottom)
3676  ///< initially it is 0 (not computed)
3677  sval_t minbargref; ///< the same for arguments
3678 
3679  intvec_t predset; ///< control flow graph: list of our predecessors
3680  ///< use npred() and pred() to access it
3681  intvec_t succset; ///< control flow graph: list of our successors
3682  ///< use nsucc() and succ() to access it
3683 
3684  // the exact size of this class is not documented, they may be more fields
3685  char reserved[];
3686 
3687  void mark_lists_dirty(void) { flags &= ~MBL_LIST; request_propagation(); }
3688  void request_propagation(void) { flags |= MBL_PROP; }
3689  bool needs_propagation(void) const { return (flags & MBL_PROP) != 0; }
3690  void request_demote64(void) { flags |= MBL_DMT64; }
3691  bool lists_dirty(void) const { return (flags & MBL_LIST) == 0; }
3692  bool lists_ready(void) const { return (flags & (MBL_LIST|MBL_INCONST)) == MBL_LIST; }
3693  int make_lists_ready(void) // returns number of changes
3694  {
3695  if ( lists_ready() )
3696  return 0;
3697  return build_lists(false);
3698  }
3699 
3700  /// Get number of block predecessors
3701  int npred(void) const { return predset.size(); } // number of xrefs to the block
3702  /// Get number of block successors
3703  int nsucc(void) const { return succset.size(); } // number of xrefs from the block
3704  // Get predecessor number N
3705  int pred(int n) const { return predset[n]; }
3706  // Get successor number N
3707  int succ(int n) const { return succset[n]; }
3708 
3709  mblock_t(void) { init(); }
3710  virtual ~mblock_t(void);
3711  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
3712  bool empty(void) const { return head == NULL; }
3713 
3714  /// Print block contents.
3715  /// \param vp print helpers class. it can be used to direct the printed
3716  /// info to any destination
3717  void hexapi print(vd_printer_t &vp) const;
3718 
3719  /// Dump block info.
3720  /// This function is useful for debugging, see mbl_array_t::dump for info
3721  void hexapi dump(void) const;
3722  AS_PRINTF(2, 0) void hexapi vdump_block(const char *title, va_list va) const;
3723  AS_PRINTF(2, 3) void dump_block(const char *title, ...) const
3724  {
3725  va_list va;
3726  va_start(va, title);
3727  vdump_block(title, va);
3728  va_end(va);
3729  }
3730 
3731  //-----------------------------------------------------------------------
3732  // Functions to insert/remove insns during the microcode optimization phase.
3733  // See codegen_t, microcode_filter_t, udcall_t classes for the initial
3734  // microcode generation.
3735  //-----------------------------------------------------------------------
3736  /// Insert instruction into the doubly linked list
3737  /// \param nm new instruction
3738  /// \param om existing instruction, part of the doubly linked list
3739  /// if NULL, then the instruction will be inserted at the beginning
3740  /// of the list
3741  /// NM will be inserted immediately after OM
3742  /// \return pointer to NM
3743  minsn_t *hexapi insert_into_block(minsn_t *nm, minsn_t *om);
3744 
3745  /// Remove instruction from the doubly linked list
3746  /// \param m instruction to remove
3747  /// The removed instruction is not deleted, the caller gets its ownership
3748  /// \return pointer to the next instruction
3749  minsn_t *hexapi remove_from_block(minsn_t *m);
3750 
3751  //-----------------------------------------------------------------------
3752  // Iterator over instructions and operands
3753  //-----------------------------------------------------------------------
3754  /// Visit all instructions.
3755  /// This function visits subinstructions too.
3756  /// \param mv instruction visitor
3757  /// \return zero or the value returned by mv.visit_insn()
3758  /// See also mbl_array_t::for_all_topinsns()
3759  int hexapi for_all_insns(minsn_visitor_t &mv);
3760 
3761  /// Visit all operands.
3762  /// This function visit subinstruction operands too.
3763  /// \param mv operand visitor
3764  /// \return zero or the value returned by mv.visit_mop()
3765  int hexapi for_all_ops(mop_visitor_t &mv);
3766 
3767  /// Visit all operands that use LIST.
3768  /// \param list ptr to the list of locations. it may be modified:
3769  /// parts that get redefined by the instructions in [i1,i2)
3770  /// will be deleted.
3771  /// \param i1 starting instruction. must be a top level insn.
3772  /// \param i2 ending instruction (excluded). must be a top level insn.
3773  /// \param mmv operand visitor
3774  /// \return zero or the value returned by mmv.visit_mop()
3775  int hexapi for_all_uses(
3776  mlist_t *list,
3777  minsn_t *i1,
3778  minsn_t *i2,
3779  mlist_mop_visitor_t &mmv);
3780 
3781  //-----------------------------------------------------------------------
3782  // Optimization functions
3783  //-----------------------------------------------------------------------
3784  /// Optimize one instruction in the context of the block.
3785  /// \param m pointer to a top level instruction
3786  /// \param optflags combination of \ref OPTI_ bits
3787  /// \return number of changes made to the block
3788  /// This function may change other instructions in the block too.
3789  /// However, it will not destroy top level instructions (it may convert them
3790  /// to nop's). This function performs only intrablock modifications.
3791  /// See also minsn_t::optimize_solo()
3792  int hexapi optimize_insn(minsn_t *m, int optflags=OPTI_MINSTKREF|OPTI_COMBINSNS);
3793 
3794  /// Optimize a basic block.
3795  /// Usually there is no need to call this function explicitly because the
3796  /// decompiler will call it itself if optinsn_t::func or optblock_t::func
3797  /// return non-zero.
3798  /// \return number of changes made to the block
3799  int hexapi optimize_block(void);
3800 
3801  /// Build def-use lists and eliminate deads.
3802  /// \param kill_deads do delete dead instructions?
3803  /// \return the number of eliminated instructions
3804  /// Better mblock_t::call make_lists_ready() rather than this function.
3805  int hexapi build_lists(bool kill_deads);
3806 
3807  /// Remove a jump at the end of the block if it is useless.
3808  /// This function preserves any side effects when removing a useless jump.
3809  /// Both conditional and unconditional jumps are handled (and jtbl too).
3810  /// This function deletes useless jumps, not only replaces them with a nop.
3811  /// (please note that \optimize_insn does not handle useless jumps).
3812  /// \return number of changes made to the block
3813  int hexapi optimize_useless_jump(void);
3814 
3815  //-----------------------------------------------------------------------
3816  // Functions that build with use/def lists. These lists are used to
3817  // reprsent list of registers and stack locations that are either modified
3818  // or accessed by microinstructions.
3819  //-----------------------------------------------------------------------
3820  /// Append use-list of an operand.
3821  /// This function calculates list of locations that may or must be used
3822  /// by the operand and appends it to LIST.
3823  /// \param list ptr to the output buffer. we will append to it.
3824  /// \param op operand to calculate the use list of
3825  /// \param maymust should we calculate 'may-use' or 'must-use' list?
3826  /// see \ref maymust_t for more details.
3827  /// \param mask if only part of the operand should be considered,
3828  /// a bitmask can be used to specify which part.
3829  /// example: op=AX,mask=0xFF means that we will consider only AL.
3830  void hexapi append_use_list(
3831  mlist_t *list,
3832  const mop_t &op,
3833  maymust_t maymust,
3834  bitrange_t mask=MAXRANGE) const;
3835 
3836  /// Append def-list of an operand.
3837  /// This function calculates list of locations that may or must be modified
3838  /// by the operand and appends it to LIST.
3839  /// \param list ptr to the output buffer. we will append to it.
3840  /// \param op operand to calculate the def list of
3841  /// \param maymust should we calculate 'may-def' or 'must-def' list?
3842  /// see \ref maymust_t for more details.
3843  void hexapi append_def_list(
3844  mlist_t *list,
3845  const mop_t &op,
3846  maymust_t maymust) const;
3847 
3848  /// Build use-list of an instruction.
3849  /// This function calculates list of locations that may or must be used
3850  /// by the instruction. Examples:
3851  /// "ldx ds.2, eax.4, ebx.4", may-list: all aliasable memory
3852  /// "ldx ds.2, eax.4, ebx.4", must-list: empty
3853  /// Since LDX uses EAX for indirect access, it may access any aliasable
3854  /// memory. On the other hand, we cannot tell for sure which memory cells
3855  /// will be accessed, this is why the must-list is empty.
3856  /// \param ins instruction to calculate the use list of
3857  /// \param maymust should we calculate 'may-use' or 'must-use' list?
3858  /// see \ref maymust_t for more details.
3859  /// \return the calculated use-list
3860  mlist_t hexapi build_use_list(const minsn_t &ins, maymust_t maymust) const;
3861 
3862  /// Build def-list of an instruction.
3863  /// This function calculates list of locations that may or must be modified
3864  /// by the instruction. Examples:
3865  /// "stx ebx.4, ds.2, eax.4", may-list: all aliasable memory
3866  /// "stx ebx.4, ds.2, eax.4", must-list: empty
3867  /// Since STX uses EAX for indirect access, it may modify any aliasable
3868  /// memory. On the other hand, we cannot tell for sure which memory cells
3869  /// will be modified, this is why the must-list is empty.
3870  /// \param ins instruction to calculate the def list of
3871  /// \param maymust should we calculate 'may-def' or 'must-def' list?
3872  /// see \ref maymust_t for more details.
3873  /// \return the calculated def-list
3874  mlist_t hexapi build_def_list(const minsn_t &ins, maymust_t maymust) const;
3875 
3876  //-----------------------------------------------------------------------
3877  // The use/def lists can be used to search for interesting instructions
3878  //-----------------------------------------------------------------------
3879  /// Is the list used by the specified instruction range?
3880  /// \param list list of locations. LIST may be modified by the function:
3881  /// redefined locations will be removed from it.
3882  /// \param i1 starting instruction of the range (must be a top level insn)
3883  /// \param i2 end instruction of the range (must be a top level insn)
3884  /// i2 is excluded from the range. it can be specified as NULL.
3885  /// i1 and i2 must belong to the same block.
3886  /// \param maymust should we search in 'may-access' or 'must-access' mode?
3887  bool is_used(mlist_t *list, const minsn_t *i1, const minsn_t *i2, maymust_t maymust=MAY_ACCESS) const
3888  { return find_first_use(list, i1, i2, maymust) != NULL; }
3889 
3890  /// Find the first insn that uses the specified list in the insn range.
3891  /// \param list list of locations. LIST may be modified by the function:
3892  /// redefined locations will be removed from it.
3893  /// \param i1 starting instruction of the range (must be a top level insn)
3894  /// \param i2 end instruction of the range (must be a top level insn)
3895  /// i2 is excluded from the range. it can be specified as NULL.
3896  /// i1 and i2 must belong to the same block.
3897  /// \param maymust should we search in 'may-access' or 'must-access' mode?
3898  /// \return pointer to such instruction or NULL.
3899  /// Upon return LIST will contain only locations not redefined
3900  /// by insns [i1..result]
3901  const minsn_t *hexapi find_first_use(mlist_t *list, const minsn_t *i1, const minsn_t *i2, maymust_t maymust=MAY_ACCESS) const;
3902  minsn_t *find_first_use(mlist_t *list, minsn_t *i1, const minsn_t *i2, maymust_t maymust=MAY_ACCESS) const
3903  {
3904  return CONST_CAST(minsn_t*)(find_first_use(list,
3905  CONST_CAST(const minsn_t*)(i1),
3906  i2,
3907  maymust));
3908  }
3909 
3910  /// Is the list redefined by the specified instructions?
3911  /// \param list list of locations to check.
3912  /// \param i1 starting instruction of the range (must be a top level insn)
3913  /// \param i2 end instruction of the range (must be a top level insn)
3914  /// i2 is excluded from the range. it can be specified as NULL.
3915  /// i1 and i2 must belong to the same block.
3916  /// \param maymust should we search in 'may-access' or 'must-access' mode?
3918  const mlist_t &list,
3919  const minsn_t *i1,
3920  const minsn_t *i2,
3921  maymust_t maymust=MAY_ACCESS) const
3922  {
3923  return find_redefinition(list, i1, i2, maymust) != NULL;
3924  }
3925 
3926  /// Find the first insn that redefines any part of the list in the insn range.
3927  /// \param list list of locations to check.
3928  /// \param i1 starting instruction of the range (must be a top level insn)
3929  /// \param i2 end instruction of the range (must be a top level insn)
3930  /// i2 is excluded from the range. it can be specified as NULL.
3931  /// i1 and i2 must belong to the same block.
3932  /// \param maymust should we search in 'may-access' or 'must-access' mode?
3933  /// \return pointer to such instruction or NULL.
3934  const minsn_t *hexapi find_redefinition(
3935  const mlist_t &list,
3936  const minsn_t *i1,
3937  const minsn_t *i2,
3938  maymust_t maymust=MAY_ACCESS) const;
3939  minsn_t *find_redefinition(
3940  const mlist_t &list,
3941  minsn_t *i1,
3942  const minsn_t *i2,
3943  maymust_t maymust=MAY_ACCESS) const
3944  {
3945  return CONST_CAST(minsn_t*)(find_redefinition(list,
3946  CONST_CAST(const minsn_t*)(i1),
3947  i2,
3948  maymust));
3949  }
3950 
3951  /// Is the right hand side of the instruction redefined the insn range?
3952  /// "right hand side" corresponds to the source operands of the instruction.
3953  /// \param ins instruction to consider
3954  /// \param i1 starting instruction of the range (must be a top level insn)
3955  /// \param i2 end instruction of the range (must be a top level insn)
3956  /// i2 is excluded from the range. it can be specified as NULL.
3957  /// i1 and i2 must belong to the same block.
3958  bool hexapi is_rhs_redefined(minsn_t *ins, minsn_t *i1, minsn_t *i2);
3959 
3960  /// Find the instruction that accesses the specified operand.
3961  /// This function search inside one block.
3962  /// \param op operand to search for
3963  /// \param p_i1 ptr to ptr to a top level instruction.
3964  /// denotes the beginning of the search range.
3965  /// \param i2 end instruction of the range (must be a top level insn)
3966  /// i2 is excluded from the range. it can be specified as NULL.
3967  /// i1 and i2 must belong to the same block.
3968  /// \fdflags combination of \ref FD_ bits
3969  /// \return the instruction that accesses the operand. this instruction
3970  /// may be a sub-instruction. to find out the top level
3971  /// instruction, check out *p_i1.
3972  /// NULL means 'not found'.
3973  minsn_t *hexapi find_access(
3974  const mop_t &op,
3975  minsn_t **parent,
3976  const minsn_t *mend,
3977  int fdflags) const;
3978  /// \defgroup FD_ bits for mblock_t::find_access
3979  //@{
3980 #define FD_BACKWARD 0x0000 ///< search direction
3981 #define FD_FORWARD 0x0001 ///< search direction
3982 #define FD_USE 0x0000 ///< look for use
3983 #define FD_DEF 0x0002 ///< look for definition
3984 #define FD_DIRTY 0x0004 ///< ignore possible implicit definitions
3985  ///< by function calls and indirect memory access
3986  //@}
3987 
3988  // Convenience functions:
3989  minsn_t *find_def(
3990  const mop_t &op,
3991  minsn_t **p_i1,
3992  const minsn_t *i2,
3993  int fdflags)
3994  {
3995  return find_access(op, p_i1, i2, fdflags|FD_DEF);
3996  }
3997  minsn_t *find_use(
3998  const mop_t &op,
3999  minsn_t **p_i1,
4000  const minsn_t *i2,
4001  int fdflags)
4002  {
4003  return find_access(op, p_i1, i2, fdflags|FD_USE);
4004  }
4005 
4006  /// Find possible values for a block.
4007  /// \param res set of value ranges
4008  /// \param vivl what to search for
4009  /// \param vrflags combination of \ref VR_ bits
4010  bool hexapi get_valranges(valrng_t *res, const vivl_t &vivl, int vrflags) const;
4011 
4012  /// Find possible values for an instruction.
4013  /// \param res set of value ranges
4014  /// \param vivl what to search for
4015  /// \param m insn to search value ranges at. \sa VR_ bits
4016  /// \param vrflags combination of \ref VR_ bits
4017  bool hexapi get_valranges(
4018  valrng_t *res,
4019  const vivl_t &vivl,
4020  const minsn_t *m,
4021  int vrflags) const;
4022 
4023  /// \defgroup VR_ bits for get_valranges
4024  //@{
4025 #define VR_AT_START 0x0000 ///< get value ranges before the instruction or
4026  ///< at the block start (if M is NULL)
4027 #define VR_AT_END 0x0001 ///< get value ranges after the instruction or
4028  ///< at the block end, just after the last
4029  ///< instruction (if M is NULL)
4030 #define VR_EXACT 0x0002 ///< find exact match. if not set, the returned
4031  ///< valrng size will be >= vivl.size
4032  //@}
4033 
4034  /// Erase the instruction (convert it to nop) and mark the lists dirty.
4035  /// This is the recommended function to use because it also marks the block
4036  /// use-def lists dirty.
4037  void make_nop(minsn_t *m) { m->_make_nop(); mark_lists_dirty(); }
4038 
4039  /// Calculate number of register instructions in the block.
4040  /// Assertions are skipped by this function.
4041  /// \return Number of non-assertion instructions in the block.
4042  size_t hexapi get_reginsn_qty(void) const;
4043 
4044  bool is_call_block(void) const { return tail != NULL && is_mcode_call(tail->opcode); }
4045  bool is_unknown_call(void) const { return tail != NULL && tail->is_unknown_call(); }
4046  bool is_nway(void) const { return type == BLT_NWAY; }
4047  bool is_branch(void) const { return type == BLT_2WAY && tail->d.t == mop_b; }
4048  bool is_simple_goto_block(void) const
4049  {
4050  return get_reginsn_qty() == 1
4051  && tail->opcode == m_goto
4052  && tail->l.t == mop_b;
4053  }
4054  bool is_simple_jcnd_block() const
4055  {
4056  return is_branch()
4057  && npred() == 1
4058  && get_reginsn_qty() == 1
4059  && is_mcode_convertible_to_set(tail->opcode);
4060  }
4061 };
4062 //-------------------------------------------------------------------------
4063 /// Warning ids
4065 {
4066  WARN_VARARG_REGS, ///< 0 cannot handle register arguments in vararg function, discarded them
4067  WARN_ILL_PURGED, ///< 1 odd caller purged bytes %d, correcting
4068  WARN_ILL_FUNCTYPE, ///< 2 invalid function type has been ignored
4069  WARN_VARARG_TCAL, ///< 3 cannot handle tail call to vararg
4070  WARN_VARARG_NOSTK, ///< 4 call vararg without local stack
4071  WARN_VARARG_MANY, ///< 5 too many varargs, some ignored
4072  WARN_ADDR_OUTARGS, ///< 6 cannot handle address arithmetics in outgoing argument area of stack frame -- unused
4073  WARN_DEP_UNK_CALLS, ///< 7 found interdependent unknown calls
4074  WARN_ILL_ELLIPSIS, ///< 8 erroneously detected ellipsis type has been ignored
4075  WARN_GUESSED_TYPE, ///< 9 using guessed type %s;
4076  WARN_EXP_LINVAR, ///< 10 failed to expand a linear variable
4077  WARN_WIDEN_CHAINS, ///< 11 failed to widen chains
4078  WARN_BAD_PURGED, ///< 12 inconsistent function type and number of purged bytes
4079  WARN_CBUILD_LOOPS, ///< 13 too many cbuild loops
4080  WARN_NO_SAVE_REST, ///< 14 could not find valid save-restore pair for %s
4081  WARN_ODD_INPUT_REG, ///< 15 odd input register %s
4082  WARN_ODD_ADDR_USE, ///< 16 odd use of a variable address
4083  WARN_MUST_RET_FP, ///< 17 function return type is incorrect (must be floating point)
4084  WARN_ILL_FPU_STACK, ///< 18 inconsistent fpu stack
4085  WARN_SELFREF_PROP, ///< 19 self-referencing variable has been detected
4086  WARN_WOULD_OVERLAP, ///< 20 variables would overlap: %s
4087  WARN_ARRAY_INARG, ///< 21 array has been used for an input argument
4088  WARN_MAX_ARGS, ///< 22 too many input arguments, some ignored
4089  WARN_BAD_FIELD_TYPE,///< 23 incorrect structure member type for %s::%s, ignored
4090  WARN_WRITE_CONST, ///< 24 write access to const memory at %a has been detected
4091  WARN_BAD_RETVAR, ///< 25 wrong return variable
4092  WARN_FRAG_LVAR, ///< 26 fragmented variable at %s may be wrong
4093  WARN_HUGE_STKOFF, ///< 27 exceedingly huge offset into the stack frame
4094  WARN_UNINITED_REG, ///< 28 reference to an uninitialized register has been removed: %s
4095  WARN_FIXED_MACRO, ///< 29 fixed broken macro-insn
4096  WARN_WRONG_VA_OFF, ///< 30 wrong offset of va_list variable
4097  WARN_CR_NOFIELD, ///< 31 CONTAINING_RECORD: no field '%s' in struct '%s' at %d
4098  WARN_CR_BADOFF, ///< 32 CONTAINING_RECORD: too small offset %d for struct '%s'
4099  WARN_BAD_STROFF, ///< 33 user specified stroff has not been processed: %s
4100  WARN_BAD_VARSIZE, ///< 34 inconsistent variable size for '%s'
4101  WARN_UNSUPP_REG, ///< 35 unsupported processor register '%s'
4102  WARN_UNALIGNED_ARG, ///< 36 unaligned function argument '%s'
4103  WARN_BAD_STD_TYPE, ///< 37 corrupted or unexisting local type '%s'
4104  WARN_BAD_CALL_SP, ///< 38 bad sp value at call
4105  WARN_MISSED_SWITCH, ///< 39 wrong markup of switch jump, skipped it
4106  WARN_BAD_SP, ///< 40 positive sp value %a has been found
4107  WARN_BAD_STKPNT, ///< 41 wrong sp change point
4108  WARN_UNDEF_LVAR, ///< 42 variable '%s' is possibly undefined
4109  WARN_JUMPOUT, ///< 43 control flows out of bounds
4110  WARN_BAD_VALRNG, ///< 44 values range analysis failed
4111  WARN_BAD_SHADOW, ///< 45 ignored the value written to the shadow area of the succeeding call
4112  WARN_OPT_VALRNG, ///< 46 conditional instruction was optimized away because of '%s'
4113  WARN_RET_LOCREF, ///< 47 returning address of temporary local variable '%s'
4114 
4115  WARN_MAX, ///< may be used in notes as a placeholder when the
4116  ///< warning id is not available
4117 };
4118 
4119 /// Warning instances
4121 {
4122  ea_t ea; ///< Address where the warning occurred
4123  warnid_t id; ///< Warning id
4124  qstring text; ///< Fully formatted text of the warning
4125  DECLARE_COMPARISONS(hexwarn_t)
4126  {
4127  if ( ea < r.ea )
4128  return -1;
4129  if ( ea > r.ea )
4130  return 1;
4131  if ( id < r.id )
4132  return -1;
4133  if ( id > r.id )
4134  return 1;
4135  return strcmp(text.c_str(), r.text.c_str());
4136  }
4137 };
4138 DECLARE_TYPE_AS_MOVABLE(hexwarn_t);
4139 typedef qvector<hexwarn_t> hexwarns_t;
4140 
4141 //-------------------------------------------------------------------------
4142 /// Microcode maturity levels
4144 {
4145  MMAT_ZERO, ///< microcode does not exist
4146  MMAT_GENERATED, ///< generated microcode
4147  MMAT_PREOPTIMIZED, ///< preoptimized pass is complete
4148  MMAT_LOCOPT, ///< local optimization of each basic block is complete.
4149  ///< control flow graph is ready too.
4150  MMAT_CALLS, ///< detected call arguments
4151  MMAT_GLBOPT1, ///< performed the first pass of global optimization
4152  MMAT_GLBOPT2, ///< most global optimization passes are done
4153  MMAT_GLBOPT3, ///< completed all global optimization. microcode is fixed now.
4154  MMAT_LVARS, ///< allocated local variables
4155 };
4156 
4157 //-------------------------------------------------------------------------
4158 enum memreg_index_t ///< memory region types
4159 {
4160  MMIDX_GLBLOW, ///< global memory: low part
4161  MMIDX_LVARS, ///< stack: local variables
4162  MMIDX_RETADDR, ///< stack: return address
4163  MMIDX_SHADOW, ///< stack: shadow arguments
4164  MMIDX_ARGS, ///< stack: regular stack arguments
4165  MMIDX_GLBHIGH, ///< global memory: high part
4166 };
4167 
4168 //-------------------------------------------------------------------------
4169 /// Ranges to decompile. Either a function or an explicit vector of ranges.
4171 {
4172  func_t *pfn; ///< function to decompile
4173  rangevec_t ranges; ///< empty ? function_mode : snippet mode
4174  mba_ranges_t(func_t *_pfn=NULL) : pfn(_pfn) {}
4175  mba_ranges_t(const rangevec_t &r) : pfn(NULL), ranges(r) {}
4176  ea_t start(void) const { return (ranges.empty() ? *pfn : ranges[0]).start_ea; }
4177  bool empty(void) const { return pfn == NULL && ranges.empty(); }
4178  void clear(void) { pfn = NULL; ranges.clear(); }
4179  bool is_snippet(void) const { return !ranges.empty(); }
4180  bool range_contains(ea_t ea) const;
4181  bool is_fragmented(void) const { return ranges.empty() ? pfn->tailqty > 0 : ranges.size() > 1; }
4182 };
4183 
4184 /// Item iterator of arbitrary rangevec items
4186 {
4187  const rangevec_t *ranges;
4188  const range_t *rptr; // pointer into ranges
4189  ea_t cur; // current address
4190  range_item_iterator_t(void) : ranges(NULL), rptr(NULL), cur(BADADDR) {}
4191  bool set(const rangevec_t &r);
4192  bool next_code(void);
4193  ea_t current(void) const { return cur; }
4194 };
4195 
4196 /// Item iterator for mba_ranges_t
4198 {
4200  func_item_iterator_t fii; // this is used if rii.ranges==NULL
4201  bool is_snippet(void) const { return rii.ranges != NULL; }
4202  bool set(const mba_ranges_t &mbr)
4203  {
4204  if ( mbr.is_snippet() )
4205  return rii.set(mbr.ranges);
4206  else
4207  return fii.set(mbr.pfn);
4208  }
4209  bool next_code(void)
4210  {
4211  if ( is_snippet() )
4212  return rii.next_code();
4213  else
4214  return fii.next_code();
4215  }
4216  ea_t current(void) const
4217  {
4218  return is_snippet() ? rii.current() : fii.current();
4219  }
4220 };
4221 
4222 /// Chunk iterator of arbitrary rangevec items
4224 {
4225  const range_t *rptr; // pointer into ranges
4226  const range_t *rend;
4227  range_chunk_iterator_t(void) : rptr(NULL), rend(NULL) {}
4228  bool set(const rangevec_t &r) { rptr = r.begin(); rend = r.end(); return rptr != rend; }
4229  bool next(void) { return ++rptr != rend; }
4230  const range_t &chunk(void) const { return *rptr; }
4231 };
4232 
4233 /// Chunk iterator for mba_ranges_t
4235 {
4237  func_tail_iterator_t fii; // this is used if rii.rptr==NULL
4238  bool is_snippet(void) const { return rii.rptr != NULL; }
4239  bool set(const mba_ranges_t &mbr)
4240  {
4241  if ( mbr.is_snippet() )
4242  return rii.set(mbr.ranges);
4243  else
4244  return fii.set(mbr.pfn);
4245  }
4246  bool next(void)
4247  {
4248  if ( is_snippet() )
4249  return rii.next();
4250  else
4251  return fii.next();
4252  }
4253  const range_t &chunk(void) const
4254  {
4255  return is_snippet() ? rii.chunk() : fii.chunk();
4256  }
4257 };
4258 
4259 //-------------------------------------------------------------------------
4260 /// Array of micro blocks representing microcode for a decompiled function.
4261 /// The first micro block is the entry point, the last one is the exit point.
4262 /// The entry and exit blocks are always empty. The exit block is generated
4263 /// at MMAT_LOCOPT maturity level.
4264 class mbl_array_t
4265 {
4266  DECLARE_UNCOPYABLE(mbl_array_t)
4267  uint32 flags;
4268  uint32 flags2;
4269 
4270 public:
4271  // bits to describe the microcode, set by the decompiler
4272 #define MBA_PRCDEFS 0x00000001 ///< use precise defeas for chain-allocated lvars
4273 #define MBA_NOFUNC 0x00000002 ///< function is not present, addresses might be wrong
4274 #define MBA_PATTERN 0x00000004 ///< microcode pattern, callinfo is present
4275 #define MBA_LOADED 0x00000008 ///< loaded gdl, no instructions (debugging)
4276 #define MBA_RETFP 0x00000010 ///< function returns floating point value
4277 #define MBA_SPLINFO 0x00000020 ///< (final_type ? idb_spoiled : spoiled_regs) is valid
4278 #define MBA_PASSREGS 0x00000040 ///< has mcallinfo_t::pass_regs
4279 #define MBA_THUNK 0x00000080 ///< thunk function
4280 #define MBA_CMNSTK 0x00000100 ///< stkvars+stkargs should be considered as one area
4281 
4282  // bits to describe analysis stages and requests
4283 #define MBA_PREOPT 0x00000200 ///< preoptimization stage complete
4284 #define MBA_CMBBLK 0x00000400 ///< request to combine blocks
4285 #define MBA_ASRTOK 0x00000800 ///< assertions have been generated
4286 #define MBA_CALLS 0x00001000 ///< callinfo has been built
4287 #define MBA_ASRPROP 0x00002000 ///< assertion have been propagated
4288 #define MBA_SAVRST 0x00004000 ///< save-restore analysis has been performed
4289 #define MBA_RETREF 0x00008000 ///< return type has been refined
4290 #define MBA_GLBOPT 0x00010000 ///< microcode has been optimized globally
4291 #define MBA_LVARS0 0x00040000 ///< lvar pre-allocation has been performed
4292 #define MBA_LVARS1 0x00080000 ///< lvar real allocation has been performed
4293 #define MBA_DELPAIRS 0x00100000 ///< pairs have been deleted once
4294 #define MBA_CHVARS 0x00200000 ///< can verify chain varnums
4295 
4296  // bits that can be set by the caller:
4297 #define MBA_SHORT 0x00400000 ///< use short display
4298 #define MBA_COLGDL 0x00800000 ///< display graph after each reduction
4299 #define MBA_INSGDL 0x01000000 ///< display instruction in graphs
4300 #define MBA_NICE 0x02000000 ///< apply transformations to c code
4301 #define MBA_REFINE 0x04000000 ///< may refine return value size
4302 #define MBA_WINGR32 0x10000000 ///< use wingraph32
4303 #define MBA_NUMADDR 0x20000000 ///< display definition addresses for numbers
4304 #define MBA_VALNUM 0x40000000 ///< display value numbers
4305 
4306 #define MBA_INITIAL_FLAGS (MBA_INSGDL|MBA_NICE|MBA_CMBBLK|MBA_REFINE\
4307  |MBA_PRCDEFS|MBA_WINGR32|MBA_VALNUM)
4308 
4309 #define MBA2_LVARNAMES_OK 0x00000001 // may verify lvar_names?
4310 #define MBA2_LVARS_RENAMED 0x00000002 // accept empty names now?
4311 #define MBA2_OVER_CHAINS 0x00000004 // has overlapped chains?
4312 #define MBA2_VALRNG_DONE 0x00000008 // calculated valranges?
4313 #define MBA2_IS_CTR 0x00000010 // is constructor?
4314 #define MBA2_IS_DTR 0x00000020 // is destructor?
4315 #define MBA2_ARGIDX_OK 0x00000040 // may verify input argument list?
4316 #define MBA2_NO_DUP_CALLS 0x00000080 // forbid multiple calls with the same ea
4317 #define MBA2_NO_DUP_LVARS 0x00000100 // forbid multiple lvars with the same ea
4318 #define MBA2_UNDEF_RETVAR 0x00000200 // return value is undefined
4319 
4320 #define MBA2_INITIAL_FLAGS (MBA2_LVARNAMES_OK|MBA2_LVARS_RENAMED)
4321 
4322 #define MBA2_ALL_FLAGS 0x000003FF
4323 
4324  bool precise_defeas(void) const { return (flags & MBA_PRCDEFS) != 0; }
4325  bool optimized(void) const { return (flags & MBA_GLBOPT) != 0; }
4326  bool short_display(void) const { return (flags & MBA_SHORT ) != 0; }
4327  bool show_reduction(void) const { return (flags & MBA_COLGDL) != 0; }
4328  bool graph_insns(void) const { return (flags & MBA_INSGDL) != 0; }
4329  bool loaded_gdl(void) const { return (flags & MBA_LOADED) != 0; }
4330  bool should_beautify(void)const { return (flags & MBA_NICE ) != 0; }
4331  bool rtype_refined(void) const { return (flags & MBA_RETREF) != 0; }
4332  bool may_refine_rettype(void) const { return (flags & MBA_REFINE) != 0; }
4333  bool use_wingraph32(void) const { return (flags & MBA_WINGR32) != 0; }
4334  bool display_numaddrs(void) const { return (flags & MBA_NUMADDR) != 0; }
4335  bool display_valnums(void) const { return (flags & MBA_VALNUM) != 0; }
4336  bool is_pattern(void) const { return (flags & MBA_PATTERN) != 0; }
4337  bool is_thunk(void) const { return (flags & MBA_THUNK) != 0; }
4338  bool saverest_done(void) const { return (flags & MBA_SAVRST) != 0; }
4339  bool callinfo_built(void) const { return (flags & MBA_CALLS) != 0; }
4340  bool really_alloc(void) const { return (flags & MBA_LVARS0) != 0; }
4341  bool lvars_allocated(void)const { return (flags & MBA_LVARS1) != 0; }
4342  bool chain_varnums_ok(void)const { return (flags & MBA_CHVARS) != 0; }
4343  bool returns_fpval(void) const { return (flags & MBA_RETFP) != 0; }
4344  bool has_passregs(void) const { return (flags & MBA_PASSREGS) != 0; }
4345  bool generated_asserts(void) const { return (flags & MBA_ASRTOK) != 0; }
4346  bool propagated_asserts(void) const { return (flags & MBA_ASRPROP) != 0; }
4347  bool deleted_pairs(void) const { return (flags & MBA_DELPAIRS) != 0; }
4348  bool common_stkvars_stkargs(void) const { return (flags & MBA_CMNSTK) != 0; }
4349  bool lvar_names_ok(void) const { return (flags2 & MBA2_LVARNAMES_OK) != 0; }
4350  bool lvars_renamed(void) const { return (flags2 & MBA2_LVARS_RENAMED) != 0; }
4351  bool has_over_chains(void) const { return (flags2 & MBA2_OVER_CHAINS) != 0; }
4352  bool valranges_done(void) const { return (flags2 & MBA2_VALRNG_DONE) != 0; }
4353  bool argidx_ok(void) const { return (flags2 & MBA2_ARGIDX_OK) != 0; }
4354  bool is_ctr(void) const { return (flags2 & MBA2_IS_CTR) != 0; }
4355  bool is_dtr(void) const { return (flags2 & MBA2_IS_DTR) != 0; }
4356  bool is_cdtr(void) const { return (flags2 & (MBA2_IS_CTR|MBA2_IS_DTR)) != 0; }
4357  int get_mba_flags(void) const { return flags; }
4358  int get_mba_flags2(void) const { return flags2; }
4359  void set_mba_flags(int f) { flags |= f; }
4360  void clr_mba_flags(int f) { flags &= ~f; }
4361  void set_mba_flags2(int f) { flags2 |= f; }
4362  void clr_mba_flags2(int f) { flags2 &= ~f; }
4363  void clr_cdtr(void) { flags2 &= ~(MBA2_IS_CTR|MBA2_IS_DTR); }
4364  int calc_shins_flags(void) const
4365  {
4366  int shins_flags = 0;
4367  if ( short_display() )
4368  shins_flags |= SHINS_SHORT;
4369  if ( display_valnums() )
4370  shins_flags |= SHINS_VALNUM;
4371  if ( display_numaddrs() )
4372  shins_flags |= SHINS_NUMADDR;
4373  return shins_flags;
4374  }
4375 
4376 /*
4377  +-----------+ <- inargtop
4378  | prmN |
4379  | ... | <- minargref
4380  | prm0 |
4381  +-----------+ <- inargoff
4382  |shadow_args|
4383  +-----------+
4384  | retaddr |
4385  frsize+frregs +-----------+ <- initial esp |
4386  | frregs | |
4387  +frsize +-----------+ <- typical ebp |
4388  | | | |
4389  | | | fpd |
4390  | | | |
4391  | frsize | <- current ebp |
4392  | | |
4393  | | |
4394  | | | stacksize
4395  | | |
4396  | | |
4397  | | <- minstkref |
4398  stkvar base off 0 +---.. | | | current
4399  | | | | stack
4400  | | | | pointer
4401  | | | | range
4402  |tmpstk_size| | | (what getspd() returns)
4403  | | | |
4404  | | | |
4405  +-----------+ <- minimal sp | | offset 0 for the decompiler (vd)
4406 
4407  There is a detail that may add confusion when working with stack variables.
4408  The decompiler does not use the same stack offsets as IDA.
4409  The picture above should explain the difference:
4410  - IDA stkoffs are displayed on the left, decompiler stkoffs - on the right
4411  - Decompiler stkoffs are always >= 0
4412  - IDA stkoff==0 corresponds to stkoff==tmpstk_size in the decompiler
4413  - See stkoff_vd2ida and stkoff_ida2vd below to convert IDA stkoffs to vd stkoff
4414 
4415 */
4416 
4417  // convert a stack offset used in vd to a stack offset used in ida stack frame
4418  sval_t stkoff_vd2ida(sval_t off) const
4419  {
4420  return off - tmpstk_size;
4421  }
4422  // convert a ida stack frame offset to a stack offset used in vd
4423  sval_t stkoff_ida2vd(sval_t off) const
4424  {
4425  return off + tmpstk_size;
4426  }
4427  sval_t argbase() const
4428  {
4429  return retsize + stacksize;
4430  }
4431  static vdloc_t hexapi idaloc2vd(const argloc_t &loc, int width, sval_t spd);
4432  vdloc_t idaloc2vd(const argloc_t &loc, int width) const
4433  {
4434  return idaloc2vd(loc, width, argbase());
4435  }
4436 
4437  static argloc_t hexapi vd2idaloc(const vdloc_t &loc, int width, sval_t spd);
4438  argloc_t vd2idaloc(const vdloc_t &loc, int width) const
4439  {
4440  return vd2idaloc(loc, width, argbase());
4441  }
4442 
4443  bool is_stkarg(const lvar_t &v) const
4444  {
4445  return v.is_stk_var() && v.get_stkoff() >= inargoff;
4446  }
4447  member_t *get_stkvar(sval_t vd_stkoff, uval_t *poff) const;
4448  // get lvar location
4449  argloc_t get_ida_argloc(const lvar_t &v) const
4450  {
4451  return vd2idaloc(v.location, v.width);
4452  }
4453  mba_ranges_t mbr;
4454  ea_t entry_ea;
4455  ea_t last_prolog_ea;
4456  ea_t first_epilog_ea;
4457  int qty; ///< number of basic blocks
4458  int npurged; ///< -1 - unknown
4459  cm_t cc; ///< calling convention
4460  sval_t tmpstk_size; ///< size of the temporary stack part
4461  ///< (which dynamically changes with push/pops)
4462  sval_t frsize; ///< size of local stkvars range in the stack frame
4463  sval_t frregs; ///< size of saved registers range in the stack frame
4464  sval_t fpd; ///< frame pointer delta
4465  int pfn_flags; ///< copy of func_t::flags
4466  int retsize; ///< size of return address in the stack frame
4467  int shadow_args; ///< size of shadow argument area
4468  sval_t fullsize; ///< Full stack size including incoming args
4469  sval_t stacksize; ///< The maximal size of the function stack including
4470  ///< bytes allocated for outgoing call arguments
4471  ///< (up to retaddr)
4472  sval_t inargoff; ///< offset of the first stack argument;
4473  ///< after fix_scattered_movs() INARGOFF may
4474  ///< be less than STACKSIZE
4475  sval_t minstkref; ///< The lowest stack location whose address was taken
4476  ea_t minstkref_ea; ///< address with lowest minstkref (for debugging)
4477  sval_t minargref; ///< The lowest stack argument location whose address was taken
4478  ///< This location and locations above it can be aliased
4479  ///< It controls locations >= inargoff-shadow_args
4480  sval_t spd_adjust; ///< If sp>0, the max positive sp value
4481  ivl_t aliased_vars; ///< Aliased stkvar locations
4482  ivl_t aliased_args; ///< Aliased stkarg locations
4483  ivlset_t gotoff_stkvars; ///< stkvars that hold .got offsets. considered to be unaliasable
4484  ivlset_t restricted_memory;
4485  ivlset_t aliased_memory; ///< aliased_memory+restricted_memory=ALLMEM
4486  mlist_t nodel_memory; ///< global dead elimination may not delete references to this area
4487  rlist_t consumed_argregs; ///< registers converted into stack arguments, should not be used as arguments
4488 
4489  mba_maturity_t maturity; ///< current maturity level
4490  mba_maturity_t reqmat; ///< required maturity level
4491 
4492  bool final_type; ///< is the function type final? (specified by the user)
4493  tinfo_t idb_type; ///< function type as retrieved from the database
4494  reginfovec_t idb_spoiled; ///< MBA_SPLINFO && final_type: info in ida format
4495  mlist_t spoiled_list; ///< MBA_SPLINFO && !final_type: info in vd format
4496  int fti_flags; ///< FTI_... constants for the current function
4497 
4498  netnode idb_node;
4499 #define NALT_VD 2 ///< this index is not used by ida
4500 
4501  qstring label; ///< name of the function or pattern (colored)
4502  lvars_t vars; ///< local variables
4503  intvec_t argidx; ///< input arguments (indexes into 'vars')
4504  int retvaridx; ///< index of variable holding the return value
4505  ///< -1 means none
4506 
4507  ea_t error_ea; ///< during microcode generation holds ins.ea
4508  qstring error_strarg;
4509 
4510  mblock_t *blocks; ///< double linked list of blocks
4511  mblock_t **natural; ///< natural order of blocks
4512 
4513  ivl_with_name_t std_ivls[6]; ///< we treat memory as consisting of 6 parts
4514  ///< see \ref memreg_index_t
4515 
4516  mutable hexwarns_t notes;
4517  mutable uchar occurred_warns[32]; // occurred warning messages
4518  // (even disabled warnings are taken into account)
4519  bool write_to_const_detected(void) const
4520  {
4521  return test_bit(occurred_warns, WARN_WRITE_CONST);
4522  }
4523  bool bad_call_sp_detected(void) const
4524  {
4525  return test_bit(occurred_warns, WARN_BAD_CALL_SP);
4526  }
4527  bool regargs_is_not_aligned(void) const
4528  {
4529  return test_bit(occurred_warns, WARN_UNALIGNED_ARG);
4530  }
4531  bool has_bad_sp(void) const
4532  {
4533  return test_bit(occurred_warns, WARN_BAD_SP);
4534  }
4535 
4536  // the exact size of this class is not documented, they may be more fields
4537  char reserved[];
4538  mbl_array_t(void);
4539  ~mbl_array_t(void) { term(); }
4540  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
4541  void hexapi term(void);
4542  func_t *get_curfunc(void) const { return mbr.pfn; }
4543  bool use_frame(void) const { return mbr.pfn != NULL; }
4544  bool range_contains(ea_t ea) const { return mbr.range_contains(ea); }
4545  bool is_snippet(void) const { return mbr.is_snippet(); }
4546  /// Optimize each basic block locally
4547  /// \param locopt_bits combination of \ref LOCOPT_ bits
4548  /// \return number of changes. 0 means nothing changed
4549  /// This function is called by the decompiler, usually there is no need to
4550  /// call it explicitly.
4551  int hexapi optimize_local(int locopt_bits);
4552  /// \defgroup LOCOPT_ Bits for optimize_local()
4553  //@{
4554 #define LOCOPT_ALL 0x0001 ///< redo optimization for all blocks. if this bit
4555  ///< is not set, only dirty blocks will be optimized
4556 #define LOCOPT_REFINE 0x0002 ///< refine return type, ok to fail
4557 #define LOCOPT_REFINE2 0x0004 ///< refine return type, try harder
4558  //@}
4559 
4560  /// Build control flow graph.
4561  /// This function may be called only once. It calculates the type of each
4562  /// basic block and the adjacency list. optimize_local() calls this function
4563  /// if necessary. You need to call this function only before MMAT_LOCOPT.
4564  /// \return error code
4565  merror_t hexapi build_graph(void);
4566 
4567  /// Get control graph.
4568  /// Call build_graph() if you need the graph before MMAT_LOCOPT.
4569  mbl_graph_t *hexapi get_graph(void);
4570 
4571  /// Analyze calls and determine calling conventions.
4572  /// \param acflags permitted actions that are necessary for successful detection
4573  /// of calling conventions. See \ref ACFL_
4574  /// \return number of calls. -1 means error.
4575  int hexapi analyze_calls(int acflags);
4576  /// \defgroup ACFL_ Bits for analyze_calls()
4577  //@{
4578 #define ACFL_LOCOPT 0x01 ///< perform local propagation (requires ACFL_BLKOPT)
4579 #define ACFL_BLKOPT 0x02 ///< perform interblock transformations
4580 #define ACFL_GLBPROP 0x04 ///< perform global propagation
4581 #define ACFL_GLBDEL 0x08 ///< perform dead code eliminition
4582 #define ACFL_GUESS 0x10 ///< may guess calling conventions
4583  //@}
4584 
4585  /// Optimize microcode globally.
4586  /// This function applies various optimization methods until we reach the
4587  /// fixed point. After that it preallocates lvars unless reqmat forbids it.
4588  /// \return error code
4589  merror_t hexapi optimize_global(void);
4590 
4591  /// Allocate local variables.
4592  /// Must be called only immediately after optimize_global(), with no
4593  /// modifications to the microcode. Converts registers,
4594  /// stack variables, and similar operands into mop_l. This call will not fail
4595  /// because all necessary checks were performed in optimize_global().
4596  /// After this call the microcode reaches its final state.
4597  void hexapi alloc_lvars(void);
4598 
4599  /// Dump microcode to a file.
4600  /// The file will be created in the directory pointed by IDA_DUMPDIR envvar.
4601  /// Dump will be created only if IDA is run under debugger.
4602  void hexapi dump(void) const;
4603  AS_PRINTF(3, 0) void hexapi vdump_mba(bool _verify, const char *title, va_list va) const;
4604  AS_PRINTF(3, 4) void dump_mba(bool _verify, const char *title, ...) const
4605  {
4606  va_list va;
4607  va_start(va, title);
4608  vdump_mba(_verify, title, va);
4609  va_end(va);
4610  }
4611 
4612  /// Print microcode to any destination.
4613  /// \param vp print sink
4614  void hexapi print(vd_printer_t &vp) const;
4615 
4616  /// Verify microcode consistency.
4617  /// \param always if false, the check will be performed only if ida runs
4618  /// under debugger
4619  /// If any inconsistency is discovered, an internal error will be generated.
4620  /// We strongly recommend you to call this function before returing control
4621  /// to the decompiler from your callbacks, in the case if you modified
4622  /// the microcode.
4623  void hexapi verify(bool always) const;
4624 
4625  /// Mark the microcode use-def chains dirty.
4626  /// Call this function is any inter-block data dependencies got changed
4627  /// because of your modifications to the microcode. Failing to do so may
4628  /// cause an internal error.
4629  void hexapi mark_chains_dirty(void);
4630 
4631  /// Get basic block by its serial number.
4632  const mblock_t *get_mblock(int n) const { return natural[n]; }
4633  mblock_t *get_mblock(int n) { return CONST_CAST(mblock_t*)((CONST_CAST(const mbl_array_t *)(this))->get_mblock(n)); }
4634 
4635  /// Insert a block in the middle of the mbl array.
4636  /// The very first block of microcode must be empty, it is the entry block.
4637  /// The very last block of microcode must be BLT_STOP, it is the exit block.
4638  /// Therefore inserting a new block before the entry point or after the exit
4639  /// block is not a good idea.
4640  /// \param bblk the new block will be inserted before BBLK
4641  /// \return ptr to the new block
4642  mblock_t *hexapi insert_block(int bblk);
4643 
4644  /// Delete a block.
4645  /// \param blk block to delete
4646  /// \return true if at least one of the other blocks became empty or unreachable
4647  bool hexapi remove_block(mblock_t *blk);
4648 
4649  /// Make a copy of a block.
4650  /// This function makes a simple copy of the block. It does not fix the
4651  /// predecessor and successor lists, they must be fixed if necessary.
4652  /// \param blk block to copy
4653  /// \param new_serial position of the copied block
4654  /// \param cpblk_flags combination of \ref CPBLK_... bits
4655  /// \return pointer to the new copy
4656  mblock_t *hexapi copy_block(mblock_t *blk, int new_serial, int cpblk_flags=3);
4657 /// \defgroup CPBLK_ Batch decompilation bits
4658 //@{
4659 #define CPBLK_FAST 0x0000 ///< do not update minbstkref and minbargref
4660 #define CPBLK_MINREF 0x0001 ///< update minbstkref and minbargref
4661 #define CPBLK_OPTJMP 0x0002 ///< del the jump insn at the end of the block
4662  ///< if it becomes useless
4663 //@}
4664 
4665  /// Delete all empty blocks.
4666  bool hexapi remove_empty_blocks(void);
4667 
4668  /// Combine blocks.
4669  /// This function merges blocks constituting linear flow.
4670  /// It calls remove_empty_blocks() as well.
4671  /// \return true if changed any blocks
4672  bool hexapi combine_blocks(void);
4673 
4674  /// Visit all operands of all instructions.
4675  /// \param mv operand visitor
4676  /// \return non-zero value returned by mv.visit_mop() or zero
4677  int hexapi for_all_ops(mop_visitor_t &mv);
4678 
4679  /// Visit all instructions.
4680  /// This function visits all instruction and subinstructions.
4681  /// \param mv instruction visitor
4682  /// \return non-zero value returned by mv.visit_mop() or zero
4683  int hexapi for_all_insns(minsn_visitor_t &mv);
4684 
4685  /// Visit all top level instructions.
4686  /// \param mv instruction visitor
4687  /// \return non-zero value returned by mv.visit_mop() or zero
4688  int hexapi for_all_topinsns(minsn_visitor_t &mv);
4689 
4690  /// Find an operand in the microcode.
4691  /// This function tries to find the operand that matches LIST.
4692  /// Any operand that overlaps with LIST is considered as a match.
4693  /// \param[out] ctx context information for the result
4694  /// \param ea desired address of the operand
4695  /// \param is_dest search for destination operand? this argument may be
4696  /// ignored if the exact match could not be found
4697  /// \param list list of locations the correspond to the operand
4698  /// \return pointer to the operand or NULL.
4699  mop_t *hexapi find_mop(op_parent_info_t *ctx, ea_t ea, bool is_dest, const mlist_t &list);
4700 
4701  /// Get input argument of the decompiled function.
4702  /// \param n argument number (0..nargs-1)
4703  lvar_t &hexapi arg(int n);
4704  const lvar_t &arg(int n) const { return CONST_CAST(mbl_array_t*)(this)->arg(n); }
4705 
4706  /// Get information about various memory regions.
4707  /// We map the stack frame to the global memory, to some unused range.
4708  const ivl_t &get_std_region(memreg_index_t idx) const;
4709  const ivl_t &get_lvars_region(void) const;
4710  const ivl_t &get_shadow_region(void) const;
4711  const ivl_t &get_args_region(void) const;
4712  ivl_t get_stack_region(void) const; // get entire stack region
4713 
4714  /// Serialize mbl array into a sequence of bytes.
4715  void hexapi serialize(bytevec_t &vout) const;
4716 
4717  /// Deserialize a byte sequence into mbl array.
4718  /// \param bytes pointer to the beginning of the byte sequence.
4719  /// \param nbytes number of bytes in the byte sequence.
4720  /// \return new mbl array
4721  static mbl_array_t *hexapi deserialize(const uchar *bytes, size_t nbytes);
4722 
4723 };
4724 //-------------------------------------------------------------------------
4725 /// Convenience class to release graph chains automatically.
4726 /// Use this class instead of using graph_chains_t directly.
4728 {
4729  graph_chains_t *gc;
4730  chain_keeper_t &operator=(const chain_keeper_t &); // not defined
4731 public:
4732  chain_keeper_t(graph_chains_t *_gc) : gc(_gc) { QASSERT(50446, gc != NULL); gc->acquire(); }
4733  ~chain_keeper_t(void)
4734  {
4735  gc->release();
4736  }
4737  block_chains_t &operator[](size_t idx) { return (*gc)[idx]; }
4738  block_chains_t &front(void) { return gc->front(); }
4739  block_chains_t &back(void) { return gc->back(); }
4740  operator graph_chains_t &(void) { return *gc; }
4741  int for_all_chains(chain_visitor_t &cv, int gca) { return gc->for_all_chains(cv, gca); }
4742  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
4743 };
4744 
4745 //-------------------------------------------------------------------------
4746 /// Kind of use-def and def-use chains
4748 {
4749  GC_REGS_AND_STKVARS, ///< registers and stkvars (restricted memory only)
4750  GC_ASR, ///< all the above and assertions
4751  GC_XDSU, ///< only registers calculated with FULL_XDSU
4752  GC_END, ///< number of chain types
4753  GC_DIRTY_ALL = (1 << (2*GC_END))-1, ///< bitmask to represent all chains
4754 };
4755 
4756 //-------------------------------------------------------------------------
4757 /// Control flow graph of microcode.
4759 {
4760  mbl_array_t *mba; ///< pointer to the mbl array
4761  int dirty; ///< what kinds of use-def chains are dirty?
4762  int chain_stamp; ///< we increment this counter each time chains are recalculated
4763  graph_chains_t gcs[2*GC_END]; ///< cached use-def chains
4764 
4765  /// Is LIST accessed between two instructions?
4766  /// This function can analyze all path between the specified instructions
4767  /// and find if the specified list is used in any of them. The instructions
4768  /// may be located in different basic blocks. This function does not use
4769  /// use-def chains but use the graph for analysis. It may be slow in some
4770  /// cases but its advantage is that is does not require building the use-def
4771  /// chains.
4772  /// \param list list to verify
4773  /// \param b1 starting block
4774  /// \param b2 ending block. may be -1, it means all possible paths from b1
4775  /// \param m1 starting instruction (in b1)
4776  /// \param m2 ending instruction (in b2). excluded. may be NULL.
4777  /// \param access_type read or write access?
4778  /// \param maymust may access or must access?
4779  /// \return true if found an access to the list
4780  bool hexapi is_accessed_globally(
4781  const mlist_t &list, // list to verify
4782  int b1, // starting block
4783  int b2, // ending block
4784  const minsn_t *m1, // starting instruction (in b1)
4785  const minsn_t *m2, // ending instruction (in b2)
4786  access_type_t access_type,
4787  maymust_t maymust) const;
4788  int get_ud_gc_idx(gctype_t gctype) const { return (gctype << 1); }
4789  int get_du_gc_idx(gctype_t gctype) const { return (gctype << 1)+1; }
4790  int get_ud_dirty_bit(gctype_t gctype) { return 1 << get_ud_gc_idx(gctype); }
4791  int get_du_dirty_bit(gctype_t gctype) { return 1 << get_du_gc_idx(gctype); }
4792 
4793 public:
4794  /// Is the use-def chain of the specified kind dirty?
4796  {
4797  int bit = get_ud_dirty_bit(gctype);
4798  return (dirty & bit) != 0;
4799  }
4800 
4801  /// Is the def-use chain of the specified kind dirty?
4803  {
4804  int bit = get_du_dirty_bit(gctype);
4805  return (dirty & bit) != 0;
4806  }
4807  int get_chain_stamp(void) const { return chain_stamp; }
4808 
4809  /// Get use-def chains.
4810  graph_chains_t *hexapi get_ud(gctype_t gctype);
4811 
4812  /// Get def-use chains.
4813  graph_chains_t *hexapi get_du(gctype_t gctype);
4814 
4815  /// Is LIST redefined in the graph?
4816  bool is_redefined_globally(const mlist_t &list, int b1, int b2, const minsn_t *m1, const minsn_t *m2, maymust_t maymust=MAY_ACCESS) const
4817  { return is_accessed_globally(list, b1, b2, m1, m2, WRITE_ACCESS, maymust); }
4818 
4819  /// Is LIST used in the graph?
4820  bool is_used_globally(const mlist_t &list, int b1, int b2, const minsn_t *m1, const minsn_t *m2, maymust_t maymust=MAY_ACCESS) const
4821  { return is_accessed_globally(list, b1, b2, m1, m2, READ_ACCESS, maymust); }
4822 
4823  mblock_t *get_mblock(int n) const { return mba->get_mblock(n); }
4824 };
4825 
4826 //-------------------------------------------------------------------------
4827 // helper for codegen_t. It takes into account delay slots
4829 {
4830  ea_t ea; // next insn to decode
4831  ea_t end; // end of the block
4832  ea_t dslot; // address of the insn in the delay slot
4833  insn_t dslot_insn; // instruction in the delay slot
4834  bool is_separate_dslot; // the current insn is the separate delay slot
4835  // insn (when the delay slot starts a block)
4836  bool is_likely_dslot; // execute delay slot only when jumping
4837 
4839  : ea(BADADDR),
4840  end(BADADDR),
4841  dslot(BADADDR),
4842  is_separate_dslot(false),
4843  is_likely_dslot(false) {}
4844  cdg_insn_iterator_t(const cdg_insn_iterator_t &r) = default;
4845  cdg_insn_iterator_t &operator=(const cdg_insn_iterator_t &r) = default;
4846 
4847  bool ok() const { return ea < end; }
4848  bool has_dslot() const { return dslot != BADADDR; }
4849  bool dslot_with_xrefs() const { return dslot >= end; }
4850  void start(const range_t &rng)
4851  {
4852  ea = rng.start_ea;
4853  end = rng.end_ea;
4854  }
4855  merror_t hexapi next(insn_t *ins);
4856 };
4857 
4858 //-------------------------------------------------------------------------
4859 /// Helper class to generate the initial microcode
4861 {
4862 public:
4863  mbl_array_t *mba; // ptr to mbl array
4864  mblock_t *mb; // current basic block
4865  insn_t insn; // instruction to generate microcode for
4866  char ignore_micro; // value of get_ignore_micro() for the insn
4867  cdg_insn_iterator_t ii; // instruction iterator
4868 
4869  codegen_t(mbl_array_t *m) : mba(m), mb(NULL), ignore_micro(IM_NONE) {}
4870  virtual ~codegen_t(void)
4871  {
4872  }
4873 
4874  /// Analyze prolog/epilog of the function to decompile.
4875  /// If prolog is found, allocate and fill 'mba->pi' structure.
4876  /// \param fc flow chart
4877  /// \param reachable bitmap of reachable blocks
4878  /// \return error code
4879  virtual merror_t idaapi analyze_prolog(
4880  const class qflow_chart_t &fc,
4881  const class bitset_t &reachable) = 0;
4882 
4883  /// Generate microcode for one instruction.
4884  /// The instruction is in INSN
4885  /// \return MERR_OK - all ok
4886  /// MERR_BLOCK - all ok, need to switch to new block
4887  /// MERR_BADBLK - delete current block and continue
4888  /// other error codes are fatal
4889  virtual merror_t idaapi gen_micro() = 0;
4890 
4891  /// Generate microcode to load one operand.
4892  virtual mreg_t idaapi load_operand(int opnum) = 0;
4893 
4894  /// Emit one microinstruction.
4895  /// See explanations for emit().
4896  virtual minsn_t *idaapi emit_micro_mvm(
4897  mcode_t code,
4898  op_dtype_t dtype,
4899  uval_t l,
4900  uval_t r,
4901  uval_t d,
4902  int offsize)
4903  {
4904  return emit(code, get_dtype_size(dtype), l, r, d, offsize);
4905  }
4906 
4907  /// Emit one microinstruction.
4908  /// The L, R, D arguments usually mean the register number. However, they depend
4909  /// on CODE. For example:
4910  /// - for m_goto and m_jcnd L is the target address
4911  /// - for m_ldc L is the constant value to load
4912  /// \param code instruction opcode
4913  /// \param width operand size in bytes
4914  /// \param l left operand
4915  /// \param r right operand
4916  /// \param d destination operand
4917  /// \param offsize for ldx/stx, the size of the offset operand
4918  /// for ldc, operand number of the constant value
4919  /// -1, set the FP instruction (e.g. for m_mov)
4920  /// \return created microinstruction. can be NULL if the instruction got
4921  /// immediately optimized away.
4922  minsn_t *hexapi emit(mcode_t code, int width, uval_t l, uval_t r, uval_t d, int offsize);
4923 
4924  /// Emit one microinstruction.
4925  /// This variant accepts pointers to operands. It is more difficult to use
4926  /// but permits to create virtually any instruction. Operands may be NULL
4927  /// when it makes sense.
4928  minsn_t *hexapi emit(mcode_t code, const mop_t *l, const mop_t *r, const mop_t *d);
4929 
4930 };
4931 
4932 //-------------------------------------------------------------------------
4933 /// Is a kernel register?
4934 bool hexapi is_kreg(mreg_t r);
4935 
4936 /// Get list of temporary registers.
4937 /// Tempregs are temporary registers that are used during code generation.
4938 /// They do not map to regular processor registers. They are used only to
4939 /// store temporary values during execution of one instruction.
4940 /// Tempregs may not be used to pass a value from one block to another.
4941 /// In other words, at the end of a block all tempregs must be dead.
4942 const mlist_t &hexapi get_temp_regs(void);
4943 
4944 inline void mop_t::_make_insn(minsn_t *ins)
4945 {
4946  t = mop_d;
4947  d = ins;
4948 }
4949 
4950 inline bool mop_t::has_side_effects(bool include_ldx_and_divs) const
4951 {
4952  return is_insn() && d->has_side_effects(include_ldx_and_divs);
4953 }
4954 
4955 inline bool mop_t::is_kreg(void) const
4956 {
4957  return t == mop_r && ::is_kreg(r);
4958 }
4959 
4960 inline minsn_t *mop_t::get_insn(mcode_t code)
4961 {
4962  return is_insn(code) ? d : NULL;
4963 }
4964 inline const minsn_t *mop_t::get_insn(mcode_t code) const
4965 {
4966  return is_insn(code) ? d : NULL;
4967 }
4968 
4969 inline bool mop_t::is_insn(mcode_t code) const
4970 {
4971  return is_insn() && d->opcode == code;
4972 }
4973 
4974 inline bool mop_t::is_glbaddr() const
4975 {
4976  return t == mop_a && a->t == mop_v;
4977 }
4978 
4979 inline bool mop_t::is_glbaddr(ea_t ea) const
4980 {
4981  return is_glbaddr() && a->g == ea;
4982 }
4983 
4984 inline bool mop_t::is_stkaddr() const
4985 {
4986  return t == mop_a && a->t == mop_S;
4987 }
4988 
4989 inline vivl_t::vivl_t(const chain_t &ch)
4990  : voff_t(ch.key().type, ch.is_reg() ? ch.get_reg() : ch.get_stkoff()),
4991  size(ch.width)
4992 {
4993 }
4994 
4995 // The following memory regions exist
4996 // start length
4997 // ------------------------ ---------
4998 // lvars spbase stacksize
4999 // retaddr spbase+stacksize retsize
5000 // shadow spbase+stacksize+retsize shadow_args
5001 // args inargoff MAX_FUNC_ARGS*sp_width-shadow_args
5002 // globals data_segment sizeof_data_segment
5003 // heap everything else?
5004 
5006 {
5007  return std_ivls[idx].ivl;
5008 }
5009 
5010 inline const ivl_t &mbl_array_t::get_lvars_region(void) const
5011 {
5012  return get_std_region(MMIDX_LVARS);
5013 }
5014 
5015 inline const ivl_t &mbl_array_t::get_shadow_region(void) const
5016 {
5017  return get_std_region(MMIDX_SHADOW);
5018 }
5019 
5020 inline const ivl_t &mbl_array_t::get_args_region(void) const
5021 {
5022  return get_std_region(MMIDX_ARGS);
5023 }
5024 
5025 inline ivl_t mbl_array_t::get_stack_region(void) const
5026 {
5027  return ivl_t(std_ivls[MMIDX_LVARS].ivl.off, fullsize);
5028 }
5029 
5030 //-------------------------------------------------------------------------
5031 /// Get decompiler version.
5032 /// The returned string is of the form <major>.<minor>.<revision>.<build-date>
5033 /// \return pointer to version string. For example: "2.0.0.140605"
5034</