SDK Reference

hexrays.hpp
Go to the documentation of this file.
1 /*!
2  * Hex-Rays Decompiler project
3  * Copyright (c) 1990-2019 Hex-Rays
4  * ALL RIGHTS RESERVED.
5  * \mainpage
6  * There are 2 representations of the binary code in the decompiler:
7  * - microcode: processor instructions are translated into it and then
8  * the decompiler optimizes and transforms it
9  * - ctree: ctree is built from the optimized microcode and represents
10  * AST-like tree with C statements and expressions. It can
11  * be printed as C code.
12  *
13  * Microcode is represented by the following classes:
14  * - mbl_array_t keeps general info about the decompiled code and
15  * array of basic blocks. usually mbl_array_t is named 'mba'
16  * - mblock_t a basic block. includes list of instructions
17  * - minsn_t an instruction. contains 3 operands: left, right, and
18  * destination
19  * - mop_t an operand. depending on its type may hold various info
20  * like a number, register, stack variable, etc.
21  * - mlist_t list of memory or register locations; can hold vast areas
22  * of memory and multiple registers. this class is used
23  * very extensively in the decompiler. it may represent
24  * list of locations accessed by an instruction or even
25  * an entire basic block. it is also used as argument of
26  * many functions. for example, there is a function
27  * that searches for an instruction that refers to a mlist_t.
28 
29  * See http://www.hexblog.com/?p=1232 for some pictures.
30  *
31  * Ctree is represented by:
32  * - cfunc_t keeps general info about the decompiled code, including a
33  * pointer to mbl_array_t. deleting cfunc_t will delete
34  * mbl_array_t too (however, decompiler returns cfuncptr_t,
35  * which is a reference counting object and deletes the
36  * underlying function as soon as all references to it go
37  * out of scope). cfunc_t has 'body', which represents the
38  * decompiled function body as cinsn_t.
39  * - cinsn_t a C statement. can be a compound statement or any other
40  * legal C statements (like if, for, while, return,
41  * expression-statement, etc). depending on the statement
42  * type has pointers to additional info. for example, the
43  * 'if' statement has poiner to cif_t, which holds the
44  * 'if' condition, 'then' branch, and optionally 'else'
45  * branch. Please note that despite of the name cinsn_t
46  * we say "statements", not "instructions". For us
47  * instructions are part of microcode, not ctree.
48  * - cexpr_t a C expression. is used as part of a C statement, when
49  * necessary. cexpr_t has 'type' field, which keeps the
50  * expression type.
51  * - citem_t a base class for cinsn_t and cexpr_t, holds common info
52  * like the address, label, and opcode.
53  * - cnumber_t a constant 64-bit number. in addition to its value also
54  * holds information how to represent it: decimal, hex, or
55  * as a symbolic constant (enum member). please note that
56  * numbers are represented by another class (mnumber_t)
57  * in microcode.
58 
59  * See http://www.hexblog.com/?p=107 for some pictures and more details.
60  *
61  * Both microcode and ctree use the following class:
62  * - lvar_t a local variable. may represent a stack or register
63  * variable. a variable has a name, type, location, etc.
64  * the list of variables is stored in mba->vars.
65  * - lvar_locator_t holds a variable location (vdloc_t) and its definition
66  * address.
67  * - vdloc_t describes a variable location, like a register number,
68  * a stack offset, or, in complex cases, can be a mix of
69  * register and stack locations. very similar to argloc_t,
70  * which is used in ida. the differences between argloc_t
71  * and vdloc_t are:
72  * - vdloc_t never uses ARGLOC_REG2
73  * - vdloc_t uses micro register numbers instead of
74  * processor register numbers
75  * - the stack offsets are never negative in vdloc_t, while
76  * in argloc_t there can be negative offsets
77  *
78  * The above are the most important classes in this header file. There are
79  * many auxiliary classes, please see their definitions in the header file.
80  *
81  * See also the description of \ref vmpage.
82  *
83  */
84 
85 #ifndef __HEXRAYS_HPP
86 #define __HEXRAYS_HPP
87 
88 #include <pro.h>
89 #include <fpro.h>
90 #include <ida.hpp>
91 #include <idp.hpp>
92 #include <gdl.hpp>
93 #include <ieee.h>
94 #include <loader.hpp>
95 #include <kernwin.hpp>
96 #include <typeinf.hpp>
97 #include <set>
98 #include <map>
99 #include <deque>
100 #include <queue>
101 #include <algorithm>
102 
103 /*!
104  * \page vmpage Virtual Machine used by Microcode
105  *
106  * We can imagine a virtual micro machine that executes microcode.
107  * This virtual micro machine has many registers.
108  * Each register is 8 bits wide. During translation of processor
109  * instructions into microcode, multibyte processor registers are mapped
110  * to adjacent microregisters. Processor condition codes are also
111  * represented by microregisters. The microregisters are grouped
112  * into following groups:
113  * - 0..7: condition codes
114  * - 8..n: all processor registers (including fpu registers, if necessary)
115  * this range may also include temporary registers used during
116  * the initial microcode generation
117  * - n.. : so called kernel registers; they are used during optimization
118  * see is_kreg()
119  *
120  * Each micro-instruction (minsn_t) has zero to three operands.
121  * Some of the possible operands types are:
122  * - immediate value
123  * - register
124  * - memory reference
125  * - result of another micro-instruction
126  *
127  * The operands (mop_t) are l (left), r (right), d (destination).
128  * An example of a microinstruction:
129  *
130  * add r0.4, #8.4, r2.4
131  *
132  * which means 'add constant 8 to r0 and place the result into r2'.
133  * where
134  * - the left operand is 'r0', its size is 4 bytes (r0.4)
135  * - the right operand is a constant '8', its size is 4 bytes (#8.4)
136  * - the destination operand is 'r2', its size is 4 bytes (r2.4)
137  * Note that 'd' is almost always the destination but there are exceptions.
138  * See mcode_modifies_d(). For example, stx does not modify 'd'.
139  * See the opcode map below for the list of microinstructions and their
140  * operands. Most instructions are very simple and do not need
141  * detailed explanations. There are no side effects in microinstructions.
142  *
143  * Each operand has a size specifier. The following sizes can be used in
144  * practically all contexts: 1, 2, 4, 8, 16 bytes. Floating types may have
145  * other sizes. Functions may return objects of arbitrary size, as well as
146  * operations upon UDT's (user-defined types, i.e. are structs and unions).
147  *
148  * Memory is considered to consist of several segments.
149  * A memory reference is made using a (selector, offset) pair.
150  * A selector is always 2 bytes long. An offset can be 4 or 8 bytes long,
151  * depending on the bitness of the target processor.
152  * Currently the selectors are not used very much. The decompiler tries to
153  * resolve (selector, offset) pairs into direct memory references at each
154  * opportunity and then operates on mop_v operands. In other words,
155  * while the decompiler can handle segmented memory models, internally
156  * it still uses simple linear addresses.
157  *
158  * The following memory regions are recognized:
159  * - GLBLOW global memory: low part, everything below the stack
160  * - LVARS stack: local variables
161  * - RETADDR stack: return address
162  * - SHADOW stack: shadow arguments
163  * - ARGS stack: regular stack arguments
164  * - GLBHIGH global memory: high part, everything above the stack
165  * Any stack region may be empty. Objects residing in one memory region
166  * are considered to be completely distinct from objects in other regions.
167  * We allocate the stack frame in some memory region, which is not
168  * allocated for any purposes in IDA. This permits us to use linear addresses
169  * for all memory references, including the stack frame.
170  *
171  * If the operand size is bigger than 1 then the register
172  * operand references a block of registers. For example:
173  *
174  * ldc #1.4, r8.4
175  *
176  * loads the constant 1 to registers 8, 9, 10, 11:
177  *
178  * #1 -> r8
179  * #0 -> r9
180  * #0 -> r10
181  * #0 -> r11
182  *
183  * This example uses little-endian byte ordering.
184  * Big-endian byte ordering is supported too. Registers are always little-
185  * endian, regardless of the memory endianness.
186  *
187  * Each instruction has 'next' and 'prev' fields that are used to form
188  * a doubly linked list. Such lists are present for each basic block (mblock_t).
189  * Basic blocks have other attributes, including:
190  * - dead_at_start: list of dead locations at the block start
191  * - maybuse: list of locations the block may use
192  * - maybdef: list of locations the block may define (or spoil)
193  * - mustbuse: list of locations the block will certainly use
194  * - mustbdef: list of locations the block will certainly define
195  * - dnu: list of locations the block will certainly define
196  * but will not use (registers or non-aliasable stkack vars)
197  *
198  * These lists are represented by the mlist_t class. It consists of 2 parts:
199  * - rlist_t: list of microregisters (possibly including virtual stack locations)
200  * - ivlset_t: list of memory locations represented as intervals
201  * we use linear addresses in this list.
202  * The mlist_t class is used quite often. For example, to find what an operand
203  * can spoil, we build its 'maybe-use' list. Then we can find out if this list
204  * is accessed using the is_accessed() or is_accessed_globally() functions.
205  *
206  * All basic blocks of the decompiled function constitute an array called
207  * mbl_array_t (array of microblocks). This is a huge class that has too
208  * many fields to describe here (some of the fields are not visible in the sdk)
209  * The most importants ones are:
210  * - stack frame: frregs, stacksize, etc
211  * - memory: aliased, restricted, and other ranges
212  * - type: type of the current function, its arguments (argidx) and
213  * local variables (vars)
214  * - natural: array of pointers to basic blocks. the basic blocks
215  * are also accessible as a doubly linked list starting from 'blocks'.
216  * - bg: control flow graph. the graph gives access to the use-def
217  * chains that describe data dependencies between basic blocks
218  *
219  */
220 
221 #ifdef __NT__
222 #pragma warning(push)
223 #pragma warning(disable:4062) // enumerator 'x' in switch of enum 'y' is not handled
224 #pragma warning(disable:4265) // virtual functions without virtual destructor
225 #endif
226 
227 #define hexapi ///< Public functions are marked with this keyword
228 
229 // Warning suppressions for PVS Studio:
230 //-V:2:654 The condition '2' of loop is always true.
231 //-V::719 The switch statement does not cover all values
232 //-V:verify:678
233 //-V:chain_keeper_t:690 copy ctr will be generated
234 //-V:add_block:656 call to the same function
235 //-V:add:792 The 'add' function located to the right of the operator '|' will be called regardless of the value of the left operand
236 //-V:sub:792 The 'sub' function located to the right of the operator '|' will be called regardless of the value of the left operand
237 //-V:intersect:792 The 'intersect' function located to the right of the operator '|' will be called regardless of the value of the left operand
238 // Lint suppressions:
239 //lint -sem(mop_t::_make_cases, custodial(1))
240 //lint -sem(mop_t::_make_pair, custodial(1))
241 //lint -sem(mop_t::_make_callinfo, custodial(1))
242 //lint -sem(mop_t::_make_insn, custodial(1))
243 //lint -sem(mop_t::make_insn, custodial(1))
244 
245 // Microcode level forward definitions:
246 class mop_t; // microinstruction operand
247 class mop_pair_t; // pair of operands. example, :(edx.4,eax.4).8
248 class mop_addr_t; // address of an operand. example: &global_var
249 class mcallinfo_t; // function call info. example: <cdecl:"int x" #10.4>.8
250 class mcases_t; // jump table cases. example: {0 => 12, 1 => 13}
251 class minsn_t; // microinstruction
252 class mblock_t; // basic block
253 class mbl_array_t; // array of blocks, represents microcode for a function
254 class codegen_t; // helper class to generate the initial microcode
255 class mbl_graph_t; // control graph of microcode
256 struct vdui_t; // widget representing the pseudocode window
257 struct hexrays_failure_t; // decompilation failure object, is thrown by exceptions
258 struct mba_stats_t; // statistics about decompilation of a function
259 struct mlist_t; // list of memory and register locations
260 struct voff_t; // value offset (microregister number or stack offset)
261 typedef std::set<voff_t> voff_set_t;
262 struct vivl_t; // value interval (register or stack range)
263 typedef int mreg_t; ///< Micro register
264 
265 // Ctree level forward definitions:
266 struct cfunc_t; // result of decompilation, the highest level object
267 struct citem_t; // base class for cexpr_t and cinsn_t
268 struct cexpr_t; // C expression
269 struct cinsn_t; // C statement
270 struct cblock_t; // C statement block (sequence of statements)
271 struct cswitch_t; // C switch statement
272 struct carg_t; // call argument
273 struct carglist_t; // vector of call arguments
274 
275 typedef std::set<ea_t> easet_t;
276 typedef std::set<minsn_t *> minsn_ptr_set_t;
277 typedef std::set<qstring> strings_t;
278 typedef qvector<minsn_t*> minsnptrs_t;
279 typedef qvector<mop_t*> mopptrs_t;
280 typedef qvector<mop_t> mopvec_t;
281 typedef qvector<uint64> uint64vec_t;
282 typedef qvector<mreg_t> mregvec_t;
283 
284 // Function frames must be smaller than this value, otherwise
285 // the decompiler will bail out with MERR_HUGESTACK
286 #define MAX_SUPPORTED_STACK_SIZE 0x100000 // 1MB
287 
288 //-------------------------------------------------------------------------
289 // Original version of macro DEFINE_MEMORY_ALLOCATION_FUNCS
290 // (uses decompiler-specific memory allocation functions)
291 #if defined(SWIG)
292  #define HEXRAYS_MEMORY_ALLOCATION_FUNCS()
293 #elif defined(SWIGPYTHON)
294  #define HEXRAYS_MEMORY_ALLOCATION_FUNCS DEFINE_MEMORY_ALLOCATION_FUNCS
295 #else
296  #define HEXRAYS_PLACEMENT_DELETE void operator delete(void *, void *) {}
297  #define HEXRAYS_MEMORY_ALLOCATION_FUNCS() \
298  void *operator new (size_t _s) { return hexrays_alloc(_s); } \
299  void *operator new[](size_t _s) { return hexrays_alloc(_s); } \
300  void *operator new(size_t /*size*/, void *_v) { return _v; } \
301  void operator delete (void *_blk) { hexrays_free(_blk); } \
302  void operator delete[](void *_blk) { hexrays_free(_blk); } \
303  HEXRAYS_PLACEMENT_DELETE
304 #endif
305 
306 void *hexapi hexrays_alloc(size_t size);
307 void hexapi hexrays_free(void *ptr);
308 
309 typedef uint64 uvlr_t;
310 typedef int64 svlr_t;
311 enum { MAX_VLR_SIZE = sizeof(uvlr_t) };
312 const uvlr_t MAX_VALUE = uvlr_t(-1);
313 const svlr_t MAX_SVALUE = svlr_t(uvlr_t(-1) >> 1);
314 const svlr_t MIN_SVALUE = ~MAX_SVALUE;
315 
316 enum cmpop_t
317 { // the order of comparisons is the same as in microcode opcodes
318  CMP_NZ,
319  CMP_Z,
320  CMP_AE,
321  CMP_B,
322  CMP_A,
323  CMP_BE,
324  CMP_GT,
325  CMP_GE,
326  CMP_LT,
327  CMP_LE,
328 };
329 
330 //-------------------------------------------------------------------------
331 // value-range class to keep possible operand value(s).
332 class valrng_t
333 {
334 protected:
335  int flags;
336 #define VLR_TYPE 0x0F // valrng_t type
337 #define VLR_NONE 0x00 // no values
338 #define VLR_ALL 0x01 // all values
339 #define VLR_IVLS 0x02 // union of disjoint intervals
340 #define VLR_RANGE 0x03 // strided range
341 #define VLR_SRANGE 0x04 // strided range with signed bound
342 #define VLR_BITS 0x05 // known bits
343 #define VLR_SECT 0x06 // intersection of sub-ranges
344  // each sub-range should be simple or union
345 #define VLR_UNION 0x07 // union of sub-ranges
346  // each sub-range should be simple or
347  // intersection
348 #define VLR_UNK 0x08 // unknown value (like 'null' in SQL)
349  int size; // operand size: 1..8 bytes
350  // all values must fall within the size
351  union
352  {
353  struct // VLR_RANGE/VLR_SRANGE
354  { // values that are between VALUE and LIMIT
355  // and conform to: value+stride*N
356  uvlr_t value; // initial value
357  uvlr_t limit; // final value
358  // we adjust LIMIT to be on the STRIDE lattice
359  svlr_t stride; // stride between values
360  };
361  struct // VLR_BITS
362  {
363  uvlr_t zeroes; // bits known to be clear
364  uvlr_t ones; // bits known to be set
365  };
366  char reserved[sizeof(qvector<int>)];
367  // VLR_IVLS/VLR_SECT/VLR_UNION
368  };
369  void hexapi clear(void);
370  void hexapi copy(const valrng_t &r);
371  valrng_t &hexapi assign(const valrng_t &r);
372 
373 public:
374  explicit valrng_t(int size_ = MAX_VLR_SIZE)
375  : flags(VLR_NONE), size(size_), value(0), limit(0), stride(0) {}
376  valrng_t(const valrng_t &r) { copy(r); }
377  ~valrng_t(void) { clear(); }
378  valrng_t &operator=(const valrng_t &r) { return assign(r); }
379  void swap(valrng_t &r) { qswap(*this, r); }
380  DECLARE_COMPARISONS(valrng_t);
381  DEFINE_MEMORY_ALLOCATION_FUNCS()
382 
383  void set_none(void) { clear(); }
384  void set_all(void) { clear(); flags = VLR_ALL; }
385  void set_unk(void) { clear(); flags = VLR_UNK; }
386  void hexapi set_eq(uvlr_t v);
387  void hexapi set_cmp(cmpop_t cmp, uvlr_t _value);
388 
389  // reduce size
390  // it takes the low part of size NEW_SIZE
391  // it returns "true" if size is changed successfully.
392  // e.g.: valrng_t vr(2); vr.set_eq(0x1234);
393  // vr.reduce_size(1);
394  // uvlr_t v; vr.cvt_to_single_value(&v);
395  // assert(v == 0x34);
396  bool hexapi reduce_size(int new_size);
397 
398  // Perform intersection or union or inversion.
399  // \return did we change something in THIS?
400  bool hexapi intersect_with(const valrng_t &r);
401  bool hexapi unite_with(const valrng_t &r);
402  void hexapi inverse(); // works for VLR_IVLS only
403 
404  bool empty(void) const { return flags == VLR_NONE; }
405  bool all_values(void) const { return flags == VLR_ALL; }
406  bool is_unknown(void) const { return flags == VLR_UNK; }
407  bool hexapi has(uvlr_t v) const;
408 
409  void hexapi print(qstring *vout) const;
410  const char *hexapi dstr(void) const;
411 
412  bool hexapi cvt_to_single_value(uvlr_t *v) const;
413  bool hexapi cvt_to_cmp(cmpop_t *cmp, uvlr_t *val, bool strict) const;
414 
415  int get_size() const { return size; }
416  static uvlr_t max_value(int size_)
417  {
418  return size_ == MAX_VLR_SIZE
419  ? MAX_VALUE
420  : (uvlr_t(1) << (size_ * 8)) - 1;
421  }
422  static uvlr_t min_svalue(int size_)
423  {
424  return size_ == MAX_VLR_SIZE
425  ? MIN_SVALUE
426  : (uvlr_t(1) << (size_ * 8 - 1));
427  }
428  static uvlr_t max_svalue(int size_)
429  {
430  return size_ == MAX_VLR_SIZE
431  ? MAX_SVALUE
432  : (uvlr_t(1) << (size_ * 8 - 1)) - 1;
433  }
434  uvlr_t max_value() const { return max_value(size); }
435  uvlr_t min_svalue() const { return min_svalue(size); }
436  uvlr_t max_svalue() const { return max_svalue(size); }
437 };
438 DECLARE_TYPE_AS_MOVABLE(valrng_t);
439 
440 //-------------------------------------------------------------------------
441 // possible memory and register access types.
442 enum access_type_t
443 {
444  NO_ACCESS = 0,
445  WRITE_ACCESS = 1,
446  READ_ACCESS = 2,
447  RW_ACCESS = WRITE_ACCESS | READ_ACCESS,
448 };
449 
450 // Are we looking for 'must access' or 'may access' information?
451 // 'must access' means that the code will always access the specified location(s)
452 // 'may access' means that the code may in some cases access the specified location(s)
453 // Example: ldx cs.2, r0.4, r1.4
454 // MUST_ACCESS: r0.4 and r1.4, usually displayed as r0.8 because r0 and r1 are adjacent
455 // MAY_ACCESS: r0.4 and r1.4, and all aliasable memory, because
456 // ldx may access any part of the aliasable memory
457 typedef int maymust_t;
458 const maymust_t
459  // One of the following two bits should be specified:
460  MUST_ACCESS = 0x00, // access information we can count on
461  MAY_ACCESS = 0x01, // access information we should take into account
462  // Optionally combined with the following bits:
463  MAYMUST_ACCESS_MASK = 0x01,
464 
465  ONE_ACCESS_TYPE = 0x20, // for find_first_use():
466  // use only the specified maymust access type
467  // (by default it inverts the access type for def-lists)
468  INCLUDE_SPOILED_REGS = 0x40, // for build_def_list() with MUST_ACCESS:
469  // include spoiled registers in the list
470  EXCLUDE_PASS_REGS = 0x80, // for build_def_list() with MAY_ACCESS:
471  // exclude pass_regs from the list
472  FULL_XDSU = 0x100, // for build_def_list():
473  // if xds/xdu source and targets are the same
474  // treat it as if xdsu redefines the entire destination
475  WITH_ASSERTS = 0x200, // for find_first_use():
476  // do not ignore assertions
477  EXCLUDE_VOLATILE = 0x400, // for build_def_list():
478  // exclude volatile memory from the list
479  INCLUDE_UNUSED_SRC = 0x800, // for build_use_list():
480  // do not exclude unused source bytes for m_and/m_or insns
481  INCLUDE_DEAD_RETREGS = 0x1000, // for build_def_list():
482  // include dead returned registers in the list
483  INCLUDE_RESTRICTED = 0x2000,// for MAY_ACCESS: include restricted memory
484  CALL_SPOILS_ONLY_ARGS = 0x4000;// for build_def_list() & MAY_ACCESS:
485  // do not include global memory into the
486  // spoiled list of a call
487 
488 inline THREAD_SAFE bool is_may_access(maymust_t maymust)
489 {
490  return (maymust & MAYMUST_ACCESS_MASK) != MUST_ACCESS;
491 }
492 
493 //-------------------------------------------------------------------------
494 /// \defgroup MERR_ Microcode error codes
495 //@{
497 {
498  MERR_OK = 0, ///< ok
499  MERR_BLOCK = 1, ///< no error, switch to new block
500  MERR_INTERR = -1, ///< internal error
501  MERR_INSN = -2, ///< cannot convert to microcode
502  MERR_MEM = -3, ///< not enough memory
503  MERR_BADBLK = -4, ///< bad block found
504  MERR_BADSP = -5, ///< positive sp value has been found
505  MERR_PROLOG = -6, ///< prolog analysis failed
506  MERR_SWITCH = -7, ///< wrong switch idiom
507  MERR_EXCEPTION = -8, ///< exception analysis failed
508  MERR_HUGESTACK = -9, ///< stack frame is too big
509  MERR_LVARS = -10, ///< local variable allocation failed
510  MERR_BITNESS = -11, ///< only 32/16bit functions can be decompiled
511  MERR_BADCALL = -12, ///< could not determine call arguments
512  MERR_BADFRAME = -13, ///< function frame is wrong
513  MERR_UNKTYPE = -14, ///< undefined type %s (currently unused error code)
514  MERR_BADIDB = -15, ///< inconsistent database information
515  MERR_SIZEOF = -16, ///< wrong basic type sizes in compiler settings
516  MERR_REDO = -17, ///< redecompilation has been requested
517  MERR_CANCELED = -18, ///< decompilation has been cancelled
518  MERR_RECDEPTH = -19, ///< max recursion depth reached during lvar allocation
519  MERR_OVERLAP = -20, ///< variables would overlap: %s
520  MERR_PARTINIT = -21, ///< partially initialized variable %s
521  MERR_COMPLEX = -22, ///< too complex function
522  MERR_LICENSE = -23, ///< no license available
523  MERR_ONLY32 = -24, ///< only 32-bit functions can be decompiled for the current database
524  MERR_ONLY64 = -25, ///< only 64-bit functions can be decompiled for the current database
525  MERR_BUSY = -26, ///< already decompiling a function
526  MERR_FARPTR = -27, ///< far memory model is supported only for pc
527  MERR_EXTERN = -28, ///< special segments cannot be decompiled
528  MERR_FUNCSIZE = -29, ///< too big function
529  MERR_BADRANGES = -30, ///< bad input ranges
530  MERR_STOP = -31, ///< no error, stop the analysis
531  MERR_MAX_ERR = 31,
532  MERR_LOOP = -32, ///< internal code: redo last loop (never reported)
533 };
534 //@}
535 
536 /// Get textual description of an error code
537 /// \param out the output buffer for the error description
538 /// \param code \ref MERR_
539 /// \param mba the microcode array
540 /// \return the error address
541 
542 ea_t hexapi get_merror_desc(qstring *out, merror_t code, mbl_array_t *mba);
543 
544 //-------------------------------------------------------------------------
545 // List of microinstruction opcodes.
546 // The order of setX and jX insns is important, it is used in the code.
547 
548 // Instructions marked with *F may have the FPINSN bit set and operate on fp values
549 // Instructions marked with +F must have the FPINSN bit set. They always operate on fp values
550 // Other instructions do not operate on fp values.
551 
552 enum mcode_t
553 {
554  m_nop = 0x00, // nop // no operation
555  m_stx = 0x01, // stx l, {r=sel, d=off} // store register to memory *F
556  m_ldx = 0x02, // ldx {l=sel,r=off}, d // load register from memory *F
557  m_ldc = 0x03, // ldc l=const, d // load constant
558  m_mov = 0x04, // mov l, d // move *F
559  m_neg = 0x05, // neg l, d // negate
560  m_lnot = 0x06, // lnot l, d // logical not
561  m_bnot = 0x07, // bnot l, d // bitwise not
562  m_xds = 0x08, // xds l, d // extend (signed)
563  m_xdu = 0x09, // xdu l, d // extend (unsigned)
564  m_low = 0x0A, // low l, d // take low part
565  m_high = 0x0B, // high l, d // take high part
566  m_add = 0x0C, // add l, r, d // l + r -> dst
567  m_sub = 0x0D, // sub l, r, d // l - r -> dst
568  m_mul = 0x0E, // mul l, r, d // l * r -> dst
569  m_udiv = 0x0F, // udiv l, r, d // l / r -> dst
570  m_sdiv = 0x10, // sdiv l, r, d // l / r -> dst
571  m_umod = 0x11, // umod l, r, d // l % r -> dst
572  m_smod = 0x12, // smod l, r, d // l % r -> dst
573  m_or = 0x13, // or l, r, d // bitwise or
574  m_and = 0x14, // and l, r, d // bitwise and
575  m_xor = 0x15, // xor l, r, d // bitwise xor
576  m_shl = 0x16, // shl l, r, d // shift logical left
577  m_shr = 0x17, // shr l, r, d // shift logical right
578  m_sar = 0x18, // sar l, r, d // shift arithmetic right
579  m_cfadd = 0x19, // cfadd l, r, d=carry // calculate carry bit of (l+r)
580  m_ofadd = 0x1A, // ofadd l, r, d=overf // calculate overflow bit of (l+r)
581  m_cfshl = 0x1B, // cfshl l, r, d=carry // calculate carry bit of (l<<r)
582  m_cfshr = 0x1C, // cfshr l, r, d=carry // calculate carry bit of (l>>r)
583  m_sets = 0x1D, // sets l, d=byte SF=1 Sign
584  m_seto = 0x1E, // seto l, r, d=byte OF=1 Overflow of (l-r)
585  m_setp = 0x1F, // setp l, r, d=byte PF=1 Unordered/Parity *F
586  m_setnz = 0x20, // setnz l, r, d=byte ZF=0 Not Equal *F
587  m_setz = 0x21, // setz l, r, d=byte ZF=1 Equal *F
588  m_setae = 0x22, // setae l, r, d=byte CF=0 Above or Equal *F
589  m_setb = 0x23, // setb l, r, d=byte CF=1 Below *F
590  m_seta = 0x24, // seta l, r, d=byte CF=0 & ZF=0 Above *F
591  m_setbe = 0x25, // setbe l, r, d=byte CF=1 | ZF=1 Below or Equal *F
592  m_setg = 0x26, // setg l, r, d=byte SF=OF & ZF=0 Greater
593  m_setge = 0x27, // setge l, r, d=byte SF=OF Greater or Equal
594  m_setl = 0x28, // setl l, r, d=byte SF!=OF Less
595  m_setle = 0x29, // setle l, r, d=byte SF!=OF | ZF=1 Less or Equal
596  m_jcnd = 0x2A, // jcnd l, d // d is mop_v or mop_b
597  m_jnz = 0x2B, // jnz l, r, d // ZF=0 Not Equal *F
598  m_jz = 0x2C, // jz l, r, d // ZF=1 Equal *F
599  m_jae = 0x2D, // jae l, r, d // CF=0 Above or Equal *F
600  m_jb = 0x2E, // jb l, r, d // CF=1 Below *F
601  m_ja = 0x2F, // ja l, r, d // CF=0 & ZF=0 Above *F
602  m_jbe = 0x30, // jbe l, r, d // CF=1 | ZF=1 Below or Equal *F
603  m_jg = 0x31, // jg l, r, d // SF=OF & ZF=0 Greater
604  m_jge = 0x32, // jge l, r, d // SF=OF Greater or Equal
605  m_jl = 0x33, // jl l, r, d // SF!=OF Less
606  m_jle = 0x34, // jle l, r, d // SF!=OF | ZF=1 Less or Equal
607  m_jtbl = 0x35, // jtbl l, r=mcases // Table jump
608  m_ijmp = 0x36, // ijmp {r=sel, d=off} // indirect unconditional jump
609  m_goto = 0x37, // goto l // l is mop_v or mop_b
610  m_call = 0x38, // call l d // l is mop_v or mop_b or mop_h
611  m_icall = 0x39, // icall {l=sel, r=off} d // indirect call
612  m_ret = 0x3A, // ret
613  m_push = 0x3B, // push l
614  m_pop = 0x3C, // pop d
615  m_und = 0x3D, // und d // undefine
616  m_ext = 0x3E, // ext in1, in2, out1 // external insn, not microcode *F
617  m_f2i = 0x3F, // f2i l, d int(l) => d; convert fp -> integer +F
618  m_f2u = 0x40, // f2u l, d uint(l)=> d; convert fp -> uinteger +F
619  m_i2f = 0x41, // i2f l, d fp(l) => d; convert integer -> fp e +F
620  m_u2f = 0x42, // i2f l, d fp(l) => d; convert uinteger -> fp +F
621  m_f2f = 0x43, // f2f l, d l => d; change fp precision +F
622  m_fneg = 0x44, // fneg l, d -l => d; change sign +F
623  m_fadd = 0x45, // fadd l, r, d l + r => d; add +F
624  m_fsub = 0x46, // fsub l, r, d l - r => d; subtract +F
625  m_fmul = 0x47, // fmul l, r, d l * r => d; multiply +F
626  m_fdiv = 0x48, // fdiv l, r, d l / r => d; divide +F
627 #define m_max 0x49 // first unused opcode
628 };
629 
630 /// Must an instruction with the given opcode be the last one in a block?
631 /// Such opcodes are called closing opcodes.
632 /// \param mcode instruction opcode
633 /// \param including_calls should m_call/m_icall be considered as the closing opcodes?
634 /// If this function returns true, the opcode cannot appear in the middle
635 /// of a block. Calls are a special case because before MMAT_CALLS they are
636 /// closing opcodes. Afteer MMAT_CALLS that are not considered as closing opcodes.
637 
638 THREAD_SAFE bool hexapi must_mcode_close_block(mcode_t mcode, bool including_calls);
639 
640 
641 /// May opcode be propagated?
642 /// Such opcodes can be used in sub-instructions (nested instructions)
643 /// There is a handful of non-propagatable opcodes, like jumps, ret, nop, etc
644 /// All other regular opcodes are propagatable and may appear in a nested
645 /// instruction.
646 
647 THREAD_SAFE bool hexapi is_mcode_propagatable(mcode_t mcode);
648 
649 
650 // Is add or sub instruction?
651 inline THREAD_SAFE bool is_mcode_addsub(mcode_t mcode) { return mcode == m_add || mcode == m_sub; }
652 // Is xds or xdu instruction? We use 'xdsu' as a shortcut for 'xds or xdu'
653 inline THREAD_SAFE bool is_mcode_xdsu(mcode_t mcode) { return mcode == m_xds || mcode == m_xdu; }
654 // Is a 'set' instruction? (an instruction that sets a condition code)
655 inline THREAD_SAFE bool is_mcode_set(mcode_t mcode) { return mcode >= m_sets && mcode <= m_setle; }
656 // Is a 1-operand 'set' instruction? Only 'sets' is in this group
657 inline THREAD_SAFE bool is_mcode_set1(mcode_t mcode) { return mcode == m_sets; }
658 // Is a 1-operand conditional jump instruction? Only 'jcnd' is in this group
659 inline THREAD_SAFE bool is_mcode_j1(mcode_t mcode) { return mcode == m_jcnd; }
660 // Is a conditional jump?
661 inline THREAD_SAFE bool is_mcode_jcond(mcode_t mcode) { return mcode >= m_jcnd && mcode <= m_jle; }
662 // Is a 'set' instruction that can be converted into a conditional jump?
663 inline THREAD_SAFE bool is_mcode_convertible_to_jmp(mcode_t mcode) { return mcode >= m_setnz && mcode <= m_setle; }
664 // Is a conditional jump instruction that can be converted into a 'set'?
665 inline THREAD_SAFE bool is_mcode_convertible_to_set(mcode_t mcode) { return mcode >= m_jnz && mcode <= m_jle; }
666 // Is a call instruction? (direct or indirect)
667 inline THREAD_SAFE bool is_mcode_call(mcode_t mcode) { return mcode == m_call || mcode == m_icall; }
668 // Must be an FPU instruction?
669 inline THREAD_SAFE bool is_mcode_fpu(mcode_t mcode) { return mcode >= m_f2i; }
670 // Is a commutative instruction?
671 inline THREAD_SAFE bool is_mcode_commutative(mcode_t mcode)
672 {
673  return mcode == m_add
674  || mcode == m_mul
675  || mcode == m_or
676  || mcode == m_and
677  || mcode == m_xor
678  || mcode == m_setz
679  || mcode == m_setnz
680  || mcode == m_cfadd
681  || mcode == m_ofadd;
682 }
683 // Is a shift instruction?
684 inline THREAD_SAFE bool is_mcode_shift(mcode_t mcode)
685 {
686  return mcode == m_shl
687  || mcode == m_shr
688  || mcode == m_sar;
689 }
690 // Is a kind of div or mod instruction?
691 inline THREAD_SAFE bool is_mcode_divmod(mcode_t op)
692 {
693  return op == m_udiv || op == m_sdiv || op == m_umod || op == m_smod;
694 }
695 
696 // Convert setX opcode into corresponding jX opcode
697 // This function relies on the order of setX and jX opcodes!
698 inline THREAD_SAFE mcode_t set2jcnd(mcode_t code)
699 {
700  return mcode_t(code - m_setnz + m_jnz);
701 }
702 
703 // Convert setX opcode into corresponding jX opcode
704 // This function relies on the order of setX and jX opcodes!
705 inline THREAD_SAFE mcode_t jcnd2set(mcode_t code)
706 {
707  return mcode_t(code + m_setnz - m_jnz);
708 }
709 
710 // Negate a conditional opcode.
711 // Conditional jumps can be negated, example: jle -> jg
712 // 'Set' instruction can be negated, example: seta -> setbe
713 // If the opcode cannot be negated, return m_nop
714 THREAD_SAFE mcode_t hexapi negate_mcode_relation(mcode_t code);
715 
716 
717 // Swap a conditional opcode.
718 // Only conditional jumps and set instructions can be swapped.
719 // The returned opcode the one required for swapped operands.
720 // Example "x > y" is the same as "y < x", therefore swap(m_jg) is m_jl.
721 // If the opcode cannot be swapped, return m_nop
722 
723 THREAD_SAFE mcode_t hexapi swap_mcode_relation(mcode_t code);
724 
725 // Return the opcode that performs signed operation.
726 // Examples: jae -> jge; udiv -> sdiv
727 // If the opcode cannot be transformed into signed form, simply return it.
728 
729 THREAD_SAFE mcode_t hexapi get_signed_mcode(mcode_t code);
730 
731 
732 // Return the opcode that performs unsigned operation.
733 // Examples: jl -> jb; xds -> xdu
734 // If the opcode cannot be transformed into unsigned form, simply return it.
735 
736 THREAD_SAFE mcode_t hexapi get_unsigned_mcode(mcode_t code);
737 
738 // Does the opcode perform a signed operation?
739 inline THREAD_SAFE bool is_signed_mcode(mcode_t code) { return get_unsigned_mcode(code) != code; }
740 // Does the opcode perform a unsigned operation?
741 inline THREAD_SAFE bool is_unsigned_mcode(mcode_t code) { return get_signed_mcode(code) != code; }
742 
743 
744 // Does the 'd' operand gets modified by the instruction?
745 // Example: "add l,r,d" modifies d, while instructions
746 // like jcnd, ijmp, stx does not modify it.
747 // Note: this function returns 'true' for m_ext but it may be wrong.
748 // Use minsn_t::modifes_d() if you have minsn_t.
749 
750 THREAD_SAFE bool hexapi mcode_modifies_d(mcode_t mcode);
751 
752 
753 // Processor condition codes are mapped to the first microregisters
754 // The order is important, see mop_t::is_cc()
755 const mreg_t mr_none = mreg_t(-1);
756 const mreg_t mr_cf = mreg_t(0); // carry bit
757 const mreg_t mr_zf = mreg_t(1); // zero bit
758 const mreg_t mr_sf = mreg_t(2); // sign bit
759 const mreg_t mr_of = mreg_t(3); // overflow bit
760 const mreg_t mr_pf = mreg_t(4); // parity bit
761 const int cc_count = mr_pf - mr_cf + 1; // number of condition code registers
762 const mreg_t mr_cc = mreg_t(5); // synthetic condition code, used internally
763 const mreg_t mr_first = mreg_t(8); // the first processor specific register
764 
765 //-------------------------------------------------------------------------
766 /// Operand locator.
767 /// It is used to denote a particular operand in the ctree, for example,
768 /// when the user right clicks on a constant and requests to represent it, say,
769 /// as a hexadecimal number.
771 {
772 private:
773  // forbid the default constructor, force the user to initialize objects of this class.
774  operand_locator_t(void) {}
775 public:
776  ea_t ea; ///< address of the original processor instruction
777  int opnum; ///< operand number in the instruction
778  operand_locator_t(ea_t _ea, int _opnum) : ea(_ea), opnum(_opnum) {}
779  DECLARE_COMPARISONS(operand_locator_t);
780  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
781 };
782 
783 //-------------------------------------------------------------------------
784 /// Number representation.
785 /// This structure holds information about a number format.
787 {
788  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
789  flags_t flags; ///< ida flags, which describe number radix, enum, etc
790  char opnum; ///< operand number: 0..UA_MAXOP
791  char props; ///< properties: combination of NF_ bits (\ref NF_)
792 /// \defgroup NF_ Number format property bits
793 /// Used in number_format_t::props
794 //@{
795 #define NF_FIXED 0x01 ///< number format has been defined by the user
796 #define NF_NEGDONE 0x02 ///< temporary internal bit: negation has been performed
797 #define NF_BINVDONE 0x04 ///< temporary internal bit: inverting bits is done
798 #define NF_NEGATE 0x08 ///< The user asked to negate the constant
799 #define NF_BITNOT 0x10 ///< The user asked to invert bits of the constant
800 #define NF_STROFF 0x20 ///< internal bit: used as stroff, valid iff is_stroff()
801 //@}
802  uchar serial; ///< for enums: constant serial number
803  char org_nbytes; ///< original number size in bytes
804  qstring type_name; ///< for stroffs: structure for offsetof()\n
805  ///< for enums: enum name
806  /// Contructor
807  number_format_t(int _opnum=0)
808  : flags(0), opnum(char(_opnum)), props(0), serial(0), org_nbytes(0) {}
809  /// Get number radix
810  /// \return 2,8,10, or 16
811  int get_radix(void) const { return ::get_radix(flags, opnum); }
812  /// Is number representation fixed?
813  /// Fixed representation cannot be modified by the decompiler
814  bool is_fixed(void) const { return props != 0; }
815  /// Is a hexadecimal number?
816  bool is_hex(void) const { return ::is_numop(flags, opnum) && get_radix() == 16; }
817  /// Is a decimal number?
818  bool is_dec(void) const { return ::is_numop(flags, opnum) && get_radix() == 10; }
819  /// Is a octal number?
820  bool is_oct(void) const { return ::is_numop(flags, opnum) && get_radix() == 8; }
821  /// Is a symbolic constant?
822  bool is_enum(void) const { return ::is_enum(flags, opnum); }
823  /// Is a character constant?
824  bool is_char(void) const { return ::is_char(flags, opnum); }
825  /// Is a structure field offset?
826  bool is_stroff(void) const { return ::is_stroff(flags, opnum); }
827  /// Is a number?
828  bool is_numop(void) const { return !is_enum() && !is_char() && !is_stroff(); }
829  /// Does the number need to be negated or bitwise negated?
830  /// Returns true if the user requested a negation but it is not done yet
831  bool needs_to_be_inverted(void) const
832  {
833  return (props & (NF_NEGATE|NF_BITNOT)) != 0 // the user requested it
834  && (props & (NF_NEGDONE|NF_BINVDONE)) == 0; // not done yet
835  }
836 };
837 
838 // Number formats are attached to (ea,opnum) pairs
839 typedef std::map<operand_locator_t, number_format_t> user_numforms_t;
840 
841 //-------------------------------------------------------------------------
842 /// Base helper class to convert binary data structures into text.
843 /// Other classes are derived from this class.
845 {
846  qstring tmpbuf;
847  int hdrlines; ///< number of header lines (prototype+typedef+lvars)
848  ///< valid at the end of print process
849  /// Print.
850  /// This function is called to generate a portion of the output text.
851  /// The output text may contain color codes.
852  /// \return the number of printed characters
853  /// \param indent number of spaces to generate as prefix
854  /// \param format printf-style format specifier
855  /// \return length of printed string
856  AS_PRINTF(3, 4) virtual int hexapi print(int indent, const char *format,...);
857  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
858 };
859 
860 /// Helper class to convert cfunc_t into text.
861 struct vc_printer_t : public vd_printer_t
862 {
863  const cfunc_t *func; ///< cfunc_t to generate text for
864  char lastchar; ///< internal: last printed character
865  /// Constructor
866  vc_printer_t(const cfunc_t *f) : func(f), lastchar(0) {}
867  /// Are we generating one-line text representation?
868  /// \return \c true if the output will occupy one line without line breaks
869  virtual bool idaapi oneliner(void) const { return false; }
870 };
871 
872 /// Helper class to convert binary data structures into text and put into a file.
874 {
875  FILE *fp; ///< Output file pointer
876  /// Print.
877  /// This function is called to generate a portion of the output text.
878  /// The output text may contain color codes.
879  /// \return the number of printed characters
880  /// \param indent number of spaces to generate as prefix
881  /// \param format printf-style format specifier
882  /// \return length of printed string
883  AS_PRINTF(3, 4) int hexapi print(int indent, const char *format, ...);
884  /// Constructor
885  file_printer_t(FILE *_fp) : fp(_fp) {}
886 };
887 
888 /// Helper class to convert cfunc_t into a text string
890 {
891  bool with_tags; ///< Generate output with color tags
892  qstring &s; ///< Reference to the output string
893  /// Constructor
894  qstring_printer_t(const cfunc_t *f, qstring &_s, bool tags)
895  : vc_printer_t(f), with_tags(tags), s(_s) {}
896  /// Print.
897  /// This function is called to generate a portion of the output text.
898  /// The output text may contain color codes.
899  /// \return the number of printed characters
900  /// \param indent number of spaces to generate as prefix
901  /// \param format printf-style format specifier
902  /// \return length of the printed string
903  AS_PRINTF(3, 4) int hexapi print(int indent, const char *format, ...);
904 };
905 
906 //-------------------------------------------------------------------------
907 /// \defgroup type Type string related declarations
908 /// Type related functions and class.
909 //@{
910 
911 /// Print the specified type info.
912 /// This function can be used from a debugger by typing "tif->dstr()"
913 
914 const char *hexapi dstr(const tinfo_t *tif);
915 
916 
917 /// Verify a type string.
918 /// \return true if type string is correct
919 
920 bool hexapi is_type_correct(const type_t *ptr);
921 
922 
923 /// Is a small structure or union?
924 /// \return true if the type is a small UDT (user defined type).
925 /// Small UDTs fit into a register (or pair or registers) as a rule.
926 
927 bool hexapi is_small_udt(const tinfo_t &tif);
928 
929 
930 /// Is definitely a non-boolean type?
931 /// \return true if the type is a non-boolean type (non bool and well defined)
932 
933 bool hexapi is_nonbool_type(const tinfo_t &type);
934 
935 
936 /// Is a boolean type?
937 /// \return true if the type is a boolean type
938 
939 bool hexapi is_bool_type(const tinfo_t &type);
940 
941 
942 /// Is a pointer or array type?
943 inline THREAD_SAFE bool is_ptr_or_array(type_t t)
944 {
945  return is_type_ptr(t) || is_type_array(t);
946 }
947 
948 /// Is a pointer, array, or function type?
949 inline THREAD_SAFE bool is_paf(type_t t)
950 {
951  return is_ptr_or_array(t) || is_type_func(t);
952 }
953 
954 /// Is struct/union/enum definition (not declaration)?
955 inline THREAD_SAFE bool is_inplace_def(const tinfo_t &type)
956 {
957  return type.is_decl_complex() && !type.is_typeref();
958 }
959 
960 /// Calculate number of partial subtypes.
961 /// \return number of partial subtypes. The bigger is this number, the uglier is the type.
962 
963 int hexapi partial_type_num(const tinfo_t &type);
964 
965 
966 /// Get a type of a floating point value with the specified width
967 /// \returns type info object
968 /// \param width width of the desired type
969 
970 tinfo_t hexapi get_float_type(int width);
971 
972 
973 /// Create a type info by width and sign.
974 /// Returns a simple type (examples: int, short) with the given width and sign.
975 /// \param srcwidth size of the type in bytes
976 /// \param sign sign of the type
977 
978 tinfo_t hexapi get_int_type_by_width_and_sign(int srcwidth, type_sign_t sign);
979 
980 
981 /// Create a partial type info by width.
982 /// Returns a partially defined type (examples: _DWORD, _BYTE) with the given width.
983 /// \param size size of the type in bytes
984 
985 tinfo_t hexapi get_unk_type(int size);
986 
987 
988 /// Generate a dummy pointer type
989 /// \param ptrsize size of pointed object
990 /// \param isfp is floating point object?
991 
992 tinfo_t hexapi dummy_ptrtype(int ptrsize, bool isfp);
993 
994 
995 /// Get type of a structure field.
996 /// This function performs validity checks of the field type. Wrong types are rejected.
997 /// \param mptr structure field
998 /// \param type pointer to the variable where the type is returned. This parameter can be NULL.
999 /// \return false if failed
1000 
1001 bool hexapi get_member_type(const member_t *mptr, tinfo_t *type);
1002 
1003 
1004 /// Create a pointer type.
1005 /// This function performs the following conversion: "type" -> "type*"
1006 /// \param type object type.
1007 /// \return "type*". for example, if 'char' is passed as the argument,
1008 // the function will return 'char *'
1009 
1010 tinfo_t hexapi make_pointer(const tinfo_t &type);
1011 
1012 
1013 /// Create a reference to a named type.
1014 /// \param name type name
1015 /// \return type which refers to the specified name. For example, if name is "DWORD",
1016 /// the type info which refers to "DWORD" is created.
1017 
1018 tinfo_t hexapi create_typedef(const char *name);
1019 
1020 
1021 /// Create a reference to an ordinal type.
1022 /// \param n ordinal number of the type
1023 /// \return type which refers to the specified ordinal. For example, if n is 1,
1024 /// the type info which refers to ordinal type 1 is created.
1025 
1026 inline tinfo_t create_typedef(int n)
1027 {
1028  tinfo_t tif;
1029  tif.create_typedef(NULL, n);
1030  return tif;
1031 }
1032 
1033 /// Type source (where the type information comes from)
1035 {
1036  GUESSED_NONE, // not guessed, specified by the user
1037  GUESSED_WEAK, // not guessed, comes from idb
1038  GUESSED_FUNC, // guessed as a function
1039  GUESSED_DATA, // guessed as a data item
1040  TS_NOELL = 0x8000000, // can be used in set_type() to avoid merging into ellipsis
1041  TS_SHRINK = 0x4000000, // can be used in set_type() to prefer smaller arguments
1042  TS_DONTREF = 0x2000000, // do not mark type as referenced (referenced_types)
1043  TS_MASK = 0xE000000, // all high bits
1044 };
1045 
1046 
1047 /// Get a global type.
1048 /// Global types are types of addressable objects and struct/union/enum types
1049 /// \param id address or id of the object
1050 /// \param tif buffer for the answer
1051 /// \param guess what kind of types to consider
1052 /// \return success
1053 
1054 bool hexapi get_type(uval_t id, tinfo_t *tif, type_source_t guess);
1055 
1056 
1057 /// Set a global type.
1058 /// \param id address or id of the object
1059 /// \param tif new type info
1060 /// \param source where the type comes from
1061 /// \param force true means to set the type as is, false means to merge the
1062 /// new type with the possibly existing old type info.
1063 /// \return success
1064 
1065 bool hexapi set_type(uval_t id, const tinfo_t &tif, type_source_t source, bool force=false);
1066 
1067 //@}
1068 
1069 //-------------------------------------------------------------------------
1070 // We use our own class to store argument and variable locations.
1071 // It is called vdloc_t that stands for 'vd location'.
1072 // 'vd' is the internal name of the decompiler, it stands for 'visual decompiler'.
1073 // The main differences between vdloc and argloc_t:
1074 // ALOC_REG1: the offset is always 0, so it is not used. the register number
1075 // uses the whole ~VLOC_MASK field.
1076 // ALOCK_STKOFF: stack offsets are always positive because they are based on
1077 // the lowest value of sp in the function.
1078 class vdloc_t : public argloc_t
1079 {
1080  int regoff(void); // inaccessible & undefined: regoff() should not be used
1081 public:
1082  // Get the register number.
1083  // This function works only for ALOC_REG1 and ALOC_REG2 location types.
1084  // It uses all available bits for register number for ALOC_REG1
1085  int reg1(void) const { return atype() == ALOC_REG2 ? argloc_t::reg1() : get_reginfo(); }
1086 
1087  // Set vdloc to point to the specified register without cleaning it up.
1088  // This is a dangerous function, use set_reg1() instead unless you understand
1089  // what it means to cleanup an argloc.
1090  void _set_reg1(int r1) { argloc_t::_set_reg1(r1, r1>>16); }
1091 
1092  // Set vdloc to point to the specified register.
1093  void set_reg1(int r1) { cleanup_argloc(this); _set_reg1(r1); }
1094 
1095  // Use member functions of argloc_t for other location types.
1096 
1097  // Return textual representation.
1098  // Note: this and all other dstr() functions can be used from a debugger.
1099  // It is much easier than to inspect the memory contents byte by byte.
1100  const char *hexapi dstr(int width=0) const;
1101  DECLARE_COMPARISONS(vdloc_t);
1102  bool hexapi is_aliasable(const mbl_array_t *mb, int size) const;
1103 };
1104 
1105 /// Print vdloc.
1106 /// Since vdloc does not always carry the size info, we pass it as NBYTES..
1107 void hexapi print_vdloc(qstring *vout, const vdloc_t &loc, int nbytes);
1108 
1109 //-------------------------------------------------------------------------
1110 /// Do two arglocs overlap?
1111 bool hexapi arglocs_overlap(const vdloc_t &loc1, size_t w1, const vdloc_t &loc2, size_t w2);
1112 
1113 /// Local variable locator.
1114 /// Local variables are located using definition ea and location.
1115 /// Each variable must have a unique locator, this is how we tell them apart.
1117 {
1118  vdloc_t location; ///< Variable location.
1119  ea_t defea; ///< Definition address. The address of an instruction
1120  ///< that initializes the variable. This value is
1121  ///< assigned to each lvar by lvar allocator.
1122  ///< BADADDR for function arguments
1123  lvar_locator_t(void) : defea(BADADDR) {}
1124  lvar_locator_t(const vdloc_t &loc, ea_t ea) : location(loc), defea(ea) {}
1125  /// Get offset of the varialbe in the stack frame.
1126  /// \return a non-negative value for stack variables. The value is
1127  /// an offset from the bottom of the stack frame in terms of
1128  /// vd-offsets.
1129  /// negative values mean error (not a stack variable)
1130  sval_t get_stkoff(void) const
1131  {
1132  return location.is_stkoff() ? location.stkoff() : -1;
1133  }
1134  /// Is variable located on one register?
1135  bool is_reg1(void) const { return location.is_reg1(); }
1136  /// Is variable located on two registers?
1137  bool is_reg2(void) const { return location.is_reg2(); }
1138  /// Is variable located on register(s)?
1139  bool is_reg_var(void) const { return location.is_reg(); }
1140  /// Is variable located on the stack?
1141  bool is_stk_var(void) const { return location.is_stkoff(); }
1142  /// Is variable scattered?
1143  bool is_scattered(void) const { return location.is_scattered(); }
1144  /// Get the register number of the variable
1145  mreg_t get_reg1(void) const { return location.reg1(); }
1146  /// Get the number of the second register (works only for ALOC_REG2 lvars)
1147  mreg_t get_reg2(void) const { return location.reg2(); }
1148  /// Get information about scattered variable
1149  const scattered_aloc_t &get_scattered(void) const { return location.scattered(); }
1150  scattered_aloc_t &get_scattered(void) { return location.scattered(); }
1151  DECLARE_COMPARISONS(lvar_locator_t);
1152  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
1153  // Debugging: get textual representation of a lvar locator.
1154  const char *hexapi dstr(void) const;
1155 };
1156 
1157 /// Definition of a local variable (register or stack) #var #lvar
1158 class lvar_t : public lvar_locator_t
1159 {
1160  friend class mbl_array_t;
1161  int flags; ///< \ref CVAR_
1162 /// \defgroup CVAR_ Local variable property bits
1163 /// Used in lvar_t::flags
1164 //@{
1165 #define CVAR_USED 0x00000001 ///< is used in the code?
1166 #define CVAR_TYPE 0x00000002 ///< the type is defined?
1167 #define CVAR_NAME 0x00000004 ///< has nice name?
1168 #define CVAR_MREG 0x00000008 ///< corresponding mregs were replaced?
1169 #define CVAR_NOWD 0x00000010 ///< width is unknown
1170 #define CVAR_UNAME 0x00000020 ///< user-defined name
1171 #define CVAR_UTYPE 0x00000040 ///< user-defined type
1172 #define CVAR_RESULT 0x00000080 ///< function result variable
1173 #define CVAR_ARG 0x00000100 ///< function argument
1174 #define CVAR_FAKE 0x00000200 ///< fake variable (return var or va_list)
1175 #define CVAR_OVER 0x00000400 ///< overlapping variable
1176 #define CVAR_FLOAT 0x00000800 ///< used in a fpu insn
1177 #define CVAR_SPOILED 0x00001000 ///< internal flag, do not use: spoiled var
1178 #define CVAR_MAPDST 0x00002000 ///< other variables are mapped to this var
1179 #define CVAR_PARTIAL 0x00004000 ///< variable type is partialy defined
1180 #define CVAR_THISARG 0x00008000 ///< 'this' argument of c++ member functions
1181 #define CVAR_FORCED 0x00010000 ///< variable was created by an explicit request
1182  ///< otherwise we could reuse an existing var
1183 #define CVAR_REGNAME 0x00020000 ///< has a register name (like _RAX)
1184 #define CVAR_NOPTR 0x00040000 ///< variable cannot be a pointer (user choice)
1185 #define CVAR_DUMMY 0x00080000 ///< dummy argument (added to fill a hole in
1186  ///< the argument list)
1187 #define CVAR_NOTARG 0x00100000 ///< variable cannot be an input argument
1188 #define CVAR_AUTOMAP 0x00200000 ///< variable was automatically mapped
1189 //@}
1190 
1191 public:
1192  qstring name; ///< variable name.
1193  ///< use mbl_array_t::set_nice_lvar_name() and
1194  ///< mbl_array_t::set_user_lvar_name() to modify it
1195  qstring cmt; ///< variable comment string
1196  tinfo_t tif; ///< variable type
1197  int width; ///< variable size in bytes
1198  int defblk; ///< first block defining the variable.
1199  ///< 0 for args, -1 if unknown
1200  uint64 divisor; ///< max known divisor of the variable
1201 
1202  lvar_t(void) : flags(CVAR_USED), width(0), defblk(-1), divisor(0) {}
1203  lvar_t(const qstring &n, const vdloc_t &l, ea_t e, const tinfo_t &t, int w, int db)
1204  : lvar_locator_t(l, e), flags(CVAR_USED), name(n), tif(t), width(w),
1205  defblk(db), divisor(0) {}
1206  lvar_t(mreg_t reg, int width, const tinfo_t &type, int nblock, ea_t defea);
1207  // Debugging: get textual representation of a local variable.
1208  const char *hexapi dstr(void) const;
1209 
1210  /// Is the variable used in the code?
1211  bool used(void) const { return (flags & CVAR_USED) != 0; }
1212  /// Has the variable a type?
1213  bool typed(void) const { return (flags & CVAR_TYPE) != 0; }
1214  /// Have corresponding microregs been replaced by references to this variable?
1215  bool mreg_done(void) const { return (flags & CVAR_MREG) != 0; }
1216  /// Does the variable have a nice name?
1217  bool has_nice_name(void) const { return (flags & CVAR_NAME) != 0; }
1218  /// Do we know the width of the variable?
1219  bool is_unknown_width(void) const { return (flags & CVAR_NOWD) != 0; }
1220  /// Has any user-defined information?
1221  bool has_user_info(void) const { return (flags & (CVAR_UNAME|CVAR_UTYPE|CVAR_NOPTR)) != 0 || !cmt.empty(); }
1222  /// Has user-defined name?
1223  bool has_user_name(void) const { return (flags & CVAR_UNAME) != 0; }
1224  /// Has user-defined type?
1225  bool has_user_type(void) const { return (flags & CVAR_UTYPE) != 0; }
1226  /// Is the function result?
1227  bool is_result_var(void) const { return (flags & CVAR_RESULT) != 0; }
1228  /// Is the function argument?
1229  bool is_arg_var(void) const { return (flags & CVAR_ARG) != 0; }
1230  /// Is the promoted function argument?
1231  bool hexapi is_promoted_arg(void) const;
1232  /// Is fake return variable?
1233  bool is_fake_var(void) const { return (flags & CVAR_FAKE) != 0; }
1234  /// Is overlapped variable?
1235  bool is_overlapped_var(void) const { return (flags & CVAR_OVER) != 0; }
1236  /// Used by a fpu insn?
1237  bool is_floating_var(void) const { return (flags & CVAR_FLOAT) != 0; }
1238  /// Is spoiled var? (meaningful only during lvar allocation)
1239  bool is_spoiled_var(void) const { return (flags & CVAR_SPOILED) != 0; }
1240  /// Variable type should be handled as a partial one
1241  bool is_partialy_typed(void) const { return (flags & CVAR_PARTIAL) != 0; }
1242  /// Variable type should not be a pointer
1243  bool is_noptr_var(void) const { return (flags & CVAR_NOPTR) != 0; }
1244  /// Other variable(s) map to this var?
1245  bool is_mapdst_var(void) const { return (flags & CVAR_MAPDST) != 0; }
1246  /// Is 'this' argument of a C++ member function?
1247  bool is_thisarg(void) const { return (flags & CVAR_THISARG) != 0; }
1248  /// Is a forced variable?
1249  bool is_forced_var(void) const { return (flags & CVAR_FORCED) != 0; }
1250  /// Has a register name? (like _RAX)
1251  bool has_regname(void) const { return (flags & CVAR_REGNAME) != 0; }
1252  /// Is a dummy argument (added to fill a hole in the argument list)
1253  bool is_dummy_arg(void) const { return (flags & CVAR_DUMMY) != 0; }
1254  /// Is a local variable? (local variable cannot be an input argument)
1255  bool is_notarg(void) const { return (flags & CVAR_NOTARG) != 0; }
1256  /// Was a local variable automatically mapped to another variable?
1257  bool is_automapped(void) const { return (flags & CVAR_AUTOMAP) != 0; }
1258  void set_used(void) { flags |= CVAR_USED; }
1259  void clear_used(void) { flags &= ~CVAR_USED; }
1260  void set_typed(void) { flags |= CVAR_TYPE; clr_noptr_var(); }
1261  void set_non_typed(void) { flags &= ~CVAR_TYPE; }
1262  void clr_user_info(void) { flags &= ~(CVAR_UNAME|CVAR_UTYPE|CVAR_NOPTR); }
1263  void set_user_name(void) { flags |= CVAR_NAME|CVAR_UNAME; }
1264  void set_user_type(void) { flags |= CVAR_TYPE|CVAR_UTYPE; }
1265  void clr_user_type(void) { flags &= ~CVAR_UTYPE; }
1266  void clr_user_name(void) { flags &= ~CVAR_UNAME; }
1267  void set_mreg_done(void) { flags |= CVAR_MREG; }
1268  void clr_mreg_done(void) { flags &= ~CVAR_MREG; }
1269  void set_unknown_width(void) { flags |= CVAR_NOWD; }
1270  void clr_unknown_width(void) { flags &= ~CVAR_NOWD; }
1271  void set_arg_var(void) { flags |= CVAR_ARG; }
1272  void clr_arg_var(void) { flags &= ~(CVAR_ARG|CVAR_THISARG); }
1273  void set_fake_var(void) { flags |= CVAR_FAKE; }
1274  void clr_fake_var(void) { flags &= ~CVAR_FAKE; }
1275  void set_overlapped_var(void) { flags |= CVAR_OVER; }
1276  void clr_overlapped_var(void) { flags &= ~CVAR_OVER; }
1277  void set_floating_var(void) { flags |= CVAR_FLOAT; }
1278  void clr_floating_var(void) { flags &= ~CVAR_FLOAT; }
1279  void set_spoiled_var(void) { flags |= CVAR_SPOILED; }
1280  void clr_spoiled_var(void) { flags &= ~CVAR_SPOILED; }
1281  void set_mapdst_var(void) { flags |= CVAR_MAPDST; }
1282  void clr_mapdst_var(void) { flags &= ~CVAR_MAPDST; }
1283  void set_partialy_typed(void) { flags |= CVAR_PARTIAL; }
1284  void clr_partialy_typed(void) { flags &= ~CVAR_PARTIAL; }
1285  void set_noptr_var(void) { flags |= CVAR_NOPTR; }
1286  void clr_noptr_var(void) { flags &= ~CVAR_NOPTR; }
1287  void set_thisarg(void) { flags |= CVAR_THISARG; }
1288  void clr_thisarg(void) { flags &= ~CVAR_THISARG; }
1289  void set_forced_var(void) { flags |= CVAR_FORCED; }
1290  void clr_forced_var(void) { flags &= ~CVAR_FORCED; }
1291  void set_dummy_arg(void) { flags |= CVAR_DUMMY; }
1292  void clr_dummy_arg(void) { flags &= ~CVAR_DUMMY; }
1293  void set_notarg(void) { clr_arg_var(); flags |= CVAR_NOTARG; }
1294  void clr_notarg(void) { flags &= ~CVAR_NOTARG; }
1295  void set_automapped(void) { flags |= CVAR_AUTOMAP; }
1296  void clr_automapped(void) { flags &= ~CVAR_AUTOMAP; }
1297 
1298  /// Do variables overlap?
1299  bool has_common(const lvar_t &v) const
1300  {
1301  return arglocs_overlap(location, width, v.location, v.width);
1302  }
1303  /// Does the variable overlap with the specified location?
1304  bool has_common_bit(const vdloc_t &loc, asize_t width2) const
1305  {
1306  return arglocs_overlap(location, width, loc, width2);
1307  }
1308  /// Get variable type
1309  const tinfo_t &type(void) const { return tif; }
1310  tinfo_t &type(void) { return tif; }
1311 
1312  /// Check if the variable accept the specified type.
1313  /// Some types are forbidden (void, function types, wrong arrays, etc)
1314  bool hexapi accepts_type(const tinfo_t &t, bool may_change_thisarg=false);
1315  /// Set variable type
1316  /// Note: this function does not modify the idb, only the lvar instance
1317  /// in the memory. For permanent changes see modify_user_lvars()
1318  /// Also, the variable type is not considered as final by the decompiler
1319  /// and may be modified later by the type derivation.
1320  /// In some cases set_final_var_type() may work better, but it does not
1321  /// do persistent changes to the database neither.
1322  /// \param t new type
1323  /// \param may_fail if false and type is bad, interr
1324  /// \return success
1325  bool hexapi set_lvar_type(const tinfo_t &t, bool may_fail=false);
1326 
1327  /// Set final variable type.
1328  void set_final_lvar_type(const tinfo_t &t)
1329  {
1330  set_lvar_type(t);
1331  set_typed();
1332  }
1333 
1334  /// Change the variable width.
1335  /// We call the variable size 'width', it is represents the number of bytes.
1336  /// This function may change the variable type using set_lvar_type().
1337  /// \param w new width
1338  /// \param svw_flags combination of SVW_... bits
1339  /// \return success
1340  bool hexapi set_width(int w, int svw_flags=0);
1341 #define SVW_INT 0x00 // integer value
1342 #define SVW_FLOAT 0x01 // floating point value
1343 #define SVW_SOFT 0x02 // may fail and return false;
1344  // if this bit is not set and the type is bad, interr
1345 
1346  /// Append local variable to mlist.
1347  /// \param lst list to append to
1348  /// \param if true, append padding bytes in case of scattered lvar
1349  void hexapi append_list(mlist_t *lst, bool pad_if_scattered=false) const;
1350 
1351  /// Is the variable aliasable?
1352  /// \param mba ptr to the current mbl_array_t
1353  /// Aliasable variables may be modified indirectly (through a pointer)
1354  bool is_aliasable(const mbl_array_t *mba) const
1355  {
1356  return location.is_aliasable(mba, width);
1357  }
1358 
1359 };
1360 DECLARE_TYPE_AS_MOVABLE(lvar_t);
1361 
1362 /// Vector of local variables
1363 struct lvars_t : public qvector<lvar_t>
1364 {
1365  /// Find input variable at the specified location.
1366  /// \param argloc variable location
1367  /// \param _size variable size
1368  /// \return -1 if failed, otherwise the index into the variables vector.
1369  int find_input_lvar(const vdloc_t &argloc, int _size) { return find_lvar(argloc, _size, 0); }
1370 
1371 
1372  /// Find stack variable at the specified location.
1373  /// \param spoff offset from the minimal sp
1374  /// \param width variable size
1375  /// \return -1 if failed, otherwise the index into the variables vector.
1376  int hexapi find_stkvar(int32 spoff, int width);
1377 
1378 
1379  /// Find variable at the specified location.
1380  /// \param ll variable location
1381  /// \return pointer to variable or NULL
1382  lvar_t *hexapi find(const lvar_locator_t &ll);
1383 
1384 
1385  /// Find variable at the specified location.
1386  /// \param location variable location
1387  /// \param width variable size
1388  /// \param defblk definition block of the lvar. -1 means any block
1389  /// \return -1 if failed, otherwise the index into the variables vector.
1390  int hexapi find_lvar(const vdloc_t &location, int width, int defblk=-1);
1391 };
1392 
1393 /// Saved user settings for local variables: name, type, comment.
1395 {
1396  lvar_locator_t ll; ///< Variable locator
1397  qstring name; ///< Name
1398  tinfo_t type; ///< Type
1399  qstring cmt; ///< Comment
1400  ssize_t size; ///< Type size (if not initialized then -1)
1401  int flags; ///< \ref LVINF_
1402 /// \defgroup LVINF_ saved user lvar info property bits
1403 /// Used in lvar_saved_info_t::flags
1404 //@{
1405 #define LVINF_KEEP 0x0001 ///< preserve saved user settings regardless of vars
1406  ///< for example, if a var loses all its
1407  ///< user-defined attributes or even gets
1408  ///< destroyed, keep its lvar_saved_info_t.
1409  ///< this is used for ephemeral variables that
1410  ///< get destroyed by macro recognition.
1411 #define LVINF_FORCE 0x0002 ///< force allocation of a new variable.
1412  ///< forces the decompiler to create a new
1413  ///< variable at ll.defea
1414 #define LVINF_NOPTR 0x0004 ///< variable type should not be a pointer
1415 #define LVINF_NOMAP 0x0008 ///< forbid automatic mapping of the variable
1416 //@}
1417  lvar_saved_info_t(void) : size(BADSIZE), flags(0) {}
1418  bool has_info(void) const
1419  {
1420  return !name.empty()
1421  || !type.empty()
1422  || !cmt.empty()
1423  || is_forced_lvar()
1424  || is_noptr_lvar()
1425  || is_nomap_lvar();
1426  }
1427  bool operator==(const lvar_saved_info_t &r) const
1428  {
1429  return name == r.name
1430  && cmt == r.cmt
1431  && ll == r.ll
1432  && type == r.type;
1433  }
1434  bool operator!=(const lvar_saved_info_t &r) const { return !(*this == r); }
1435  bool is_kept(void) const { return (flags & LVINF_KEEP) != 0; }
1436  void clear_keep(void) { flags &= ~LVINF_KEEP; }
1437  void set_keep(void) { flags |= LVINF_KEEP; }
1438  bool is_forced_lvar(void) const { return (flags & LVINF_FORCE) != 0; }
1439  void set_forced_lvar(void) { flags |= LVINF_FORCE; }
1440  void clr_forced_lvar(void) { flags &= ~LVINF_FORCE; }
1441  bool is_noptr_lvar(void) const { return (flags & LVINF_NOPTR) != 0; }
1442  void set_noptr_lvar(void) { flags |= LVINF_NOPTR; }
1443  void clr_noptr_lvar(void) { flags &= ~LVINF_NOPTR; }
1444  bool is_nomap_lvar(void) const { return (flags & LVINF_NOMAP) != 0; }
1445  void set_nomap_lvar(void) { flags |= LVINF_NOMAP; }
1446  void clr_nomap_lvar(void) { flags &= ~LVINF_NOMAP; }
1447 };
1448 DECLARE_TYPE_AS_MOVABLE(lvar_saved_info_t);
1449 typedef qvector<lvar_saved_info_t> lvar_saved_infos_t;
1450 
1451 /// Local variable mapping (is used to merge variables)
1452 typedef std::map<lvar_locator_t, lvar_locator_t> lvar_mapping_t;
1453 
1454 /// All user-defined information about local variables
1456 {
1457  /// User-specified names, types, comments for lvars. Variables without
1458  /// user-specified info are not present in this vector.
1459  lvar_saved_infos_t lvvec;
1460 
1461  /// Local variable mapping (used for merging variables)
1463 
1464  /// Delta to add to IDA stack offset to calculate Hex-Rays stack offsets.
1465  /// Should be set by the caller before calling save_user_lvar_settings();
1467 
1468  /// Various flags. Possible values are from \ref ULV_
1470 /// \defgroup ULV_ lvar_uservec_t property bits
1471 /// Used in lvar_uservec_t::ulv_flags
1472 //@{
1473 #define ULV_PRECISE_DEFEA 0x0001 ///< Use precise defea's for lvar locations
1474 //@}
1475 
1476  lvar_uservec_t(void) : stkoff_delta(0), ulv_flags(ULV_PRECISE_DEFEA) {}
1477  void swap(lvar_uservec_t &r)
1478  {
1479  lvvec.swap(r.lvvec);
1480  lmaps.swap(r.lmaps);
1481  std::swap(stkoff_delta, r.stkoff_delta);
1482  std::swap(ulv_flags, r.ulv_flags);
1483  }
1484  void clear()
1485  {
1486  lvvec.clear();
1487  lmaps.clear();
1488  stkoff_delta = 0;
1489  ulv_flags = ULV_PRECISE_DEFEA;
1490  }
1491 
1492  /// find saved user settings for given var
1494  {
1495  for ( lvar_saved_infos_t::iterator p=lvvec.begin(); p != lvvec.end(); ++p )
1496  {
1497  if ( p->ll == vloc )
1498  return p;
1499  }
1500  return NULL;
1501  }
1502 
1503  /// Preserve user settings for given var
1504  void keep_info(const lvar_t &v)
1505  {
1506  lvar_saved_info_t *p = find_info(v);
1507  if ( p != NULL )
1508  p->set_keep();
1509  }
1510 };
1511 
1512 /// Restore user defined local variable settings in the database.
1513 /// \param func_ea entry address of the function
1514 /// \param lvinf ptr to output buffer
1515 /// \return success
1516 
1517 bool hexapi restore_user_lvar_settings(lvar_uservec_t *lvinf, ea_t func_ea);
1518 
1519 
1520 /// Save user defined local variable settings into the database.
1521 /// \param func_ea entry address of the function
1522 /// \param lvinf user-specified info about local variables
1523 
1524 void hexapi save_user_lvar_settings(ea_t func_ea, const lvar_uservec_t &lvinf);
1525 
1526 
1527 /// Helper class to modify saved local variable settings.
1529 {
1530  /// Modify lvar settings.
1531  /// Returns: true-modified
1532  virtual bool idaapi modify_lvars(lvar_uservec_t *lvinf) = 0;
1533 };
1534 
1535 /// Modify saved local variable settings.
1536 /// \param entry_ea function start address
1537 /// \param mlv local variable modifier
1538 /// \return true if modified variables
1539 
1540 bool hexapi modify_user_lvars(ea_t entry_ea, user_lvar_modifier_t &mlv);
1541 
1542 
1543 /// Modify saved local variable settings of one variable.
1544 /// \param entry_ea function start address
1545 /// \param info local variable info attrs
1546 /// \param mli_flags bits that specify which attrs defined by INFO are to be set
1547 /// \return true if modified, false if invalid MLI_FLAGS passed
1548 
1549 bool hexapi modify_user_lvar_info(ea_t func_ea, uint mli_flags, const lvar_saved_info_t &info);
1550 /// \defgroup MLI_ user info bits
1551 //@{
1552 #define MLI_NAME 0x01 ///< apply lvar name
1553 #define MLI_TYPE 0x02 ///< apply lvar type
1554 #define MLI_CMT 0x04 ///< apply lvar comment
1555 #define MLI_SET_FLAGS 0x08 ///< set LVINF_... bits
1556 #define MLI_CLR_FLAGS 0x10 ///< clear LVINF_... bits
1557 //@}
1558 
1559 //-------------------------------------------------------------------------
1560 /// User-defined function calls
1561 struct udcall_t
1562 {
1563  qstring name; // name of the function
1564  tinfo_t tif; // function prototype
1565  DECLARE_COMPARISONS(udcall_t)
1566  {
1567  int code = ::compare(name, r.name);
1568  if ( code == 0 )
1569  code = ::compare(tif, r.tif);
1570  return 0;
1571  }
1572 };
1573 
1574 // All user-defined function calls (map address -> udcall)
1575 typedef std::map<ea_t, udcall_t> udcall_map_t;
1576 
1577 /// Restore user defined function calls from the database.
1578 /// \param udcalls ptr to output buffer
1579 /// \param func_ea entry address of the function
1580 /// \return success
1581 
1582 bool hexapi restore_user_defined_calls(udcall_map_t *udcalls, ea_t func_ea);
1583 
1584 
1585 /// Save user defined local function calls into the database.
1586 /// \param func_ea entry address of the function
1587 /// \param udcalls user-specified info about user defined function calls
1588 
1589 void hexapi save_user_defined_calls(ea_t func_ea, const udcall_map_t &udcalls);
1590 
1591 
1592 /// Convert function type declaration into internal structure
1593 /// \param udc - pointer to output structure
1594 /// \param decl - function type declaration
1595 /// \param silent - if TRUE: do not show warning in case of incorrect type
1596 /// \return success
1597 
1598 bool hexapi parse_user_call(udcall_t *udc, const char *decl, bool silent);
1599 
1600 
1601 /// try to generate user-defined call for an instruction
1602 /// \return \ref MERR_ code:
1603 /// MERR_OK - user-defined call generated
1604 /// else - error (MERR_INSN == inacceptable udc.tif)
1605 
1607 
1608 
1609 //-------------------------------------------------------------------------
1610 /// Generic microcode generator class.
1611 /// An instance of a derived class can be registered to be used for
1612 /// non-standard microcode generation. Before microcode generation for an
1613 /// instruction all registered object will be visited by the following way:
1614 /// if ( filter->match(cdg) )
1615 /// code = filter->apply(cdg);
1616 /// if ( code == MERR_OK )
1617 /// continue; // filter generated microcode, go to the next instruction
1619 {
1620  /// check if the filter object is to be appied
1621  /// \return success
1622  virtual bool match(codegen_t &cdg) = 0;
1623 
1624  /// generate microcode for an instruction
1625  /// \return MERR_... code:
1626  /// MERR_OK - user-defined call generated, go to the next instruction
1627  /// MERR_INSN - not generated - the caller should try the standard way
1628  /// else - error
1629  virtual merror_t apply(codegen_t &cdg) = 0;
1630 };
1631 
1632 /// register/unregister non-standard microcode generator
1633 /// \param filter - microcode generator object
1634 /// \param install - TRUE - register the object, FALSE - unregister
1635 void hexapi install_microcode_filter(microcode_filter_t *filter, bool install=true);
1636 
1637 //-------------------------------------------------------------------------
1638 /// Abstract class: User-defined call generator
1639 /// derived classes should implement method 'match'
1641 {
1642  udcall_t udc;
1643 
1644 public:
1645  /// return true if the filter object should be appied to given instruction
1646  virtual bool match(codegen_t &cdg) = 0;
1647 
1648  bool hexapi init(const char *decl);
1649  virtual merror_t hexapi apply(codegen_t &cdg);
1650 };
1651 
1652 //-------------------------------------------------------------------------
1653 typedef size_t mbitmap_t;
1654 const size_t bitset_width = sizeof(mbitmap_t) * CHAR_BIT;
1655 const size_t bitset_align = bitset_width - 1;
1656 const size_t bitset_shift = 6;
1657 
1658 /// Bit set class. See https://en.wikipedia.org/wiki/Bit_array
1660 {
1661  mbitmap_t *bitmap; ///< pointer to bitmap
1662  size_t high; ///< highest bit+1 (multiply of bitset_width)
1663 
1664 public:
1665  bitset_t(void) : bitmap(NULL), high(0) {}
1666  hexapi bitset_t(const bitset_t &m); // copy constructor
1667  ~bitset_t(void)
1668  {
1669  qfree(bitmap);
1670  bitmap = NULL;
1671  }
1672  void swap(bitset_t &r)
1673  {
1674  std::swap(bitmap, r.bitmap);
1675  std::swap(high, r.high);
1676  }
1677  bitset_t &operator=(const bitset_t &m) { return copy(m); }
1678  bitset_t &hexapi copy(const bitset_t &m); // assignment operator
1679  bool hexapi add(int bit); // add a bit
1680  bool hexapi add(int bit, int width); // add bits
1681  bool hexapi add(const bitset_t &ml); // add another bitset
1682  bool hexapi sub(int bit); // delete a bit
1683  bool hexapi sub(int bit, int width); // delete bits
1684  bool hexapi sub(const bitset_t &ml); // delete another bitset
1685  bool hexapi cut_at(int maxbit); // delete bits >= maxbit
1686  void hexapi shift_down(int shift); // shift bits down
1687  bool hexapi has(int bit) const; // test presence of a bit
1688  bool hexapi has_all(int bit, int width) const; // test presence of bits
1689  bool hexapi has_any(int bit, int width) const; // test presence of bits
1690  void print(
1691  qstring *vout,
1692  int (*get_bit_name)(qstring *out, int bit, int width, void *ud)=NULL,
1693  void *ud=NULL) const;
1694  const char *hexapi dstr(void) const;
1695  bool hexapi empty(void) const; // is empty?
1696  int hexapi count(void) const; // number of set bits
1697  int hexapi count(int bit) const; // get number set bits starting from 'bit'
1698  int hexapi last(void) const; // get the number of the last bit (-1-no bits)
1699  void clear(void) { high = 0; } // make empty
1700  void hexapi fill_with_ones(int maxbit);
1701  bool fill_gaps(int total_nbits);
1702  bool hexapi has_common(const bitset_t &ml) const; // has common elements?
1703  bool hexapi intersect(const bitset_t &ml); // intersect sets. returns true if changed
1704  bool hexapi is_subset_of(const bitset_t &ml) const; // is subset of?
1705  bool includes(const bitset_t &ml) const { return ml.is_subset_of(*this); }
1706  void extract(intvec_t &out) const;
1707  DECLARE_COMPARISONS(bitset_t);
1708  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
1709  class iterator
1710  {
1711  friend class bitset_t;
1712  int i;
1713  public:
1714  iterator(int n=-1) : i(n) {}
1715  bool operator==(const iterator &n) const { return i == n.i; }
1716  bool operator!=(const iterator &n) const { return i != n.i; }
1717  int operator*(void) const { return i; }
1718  };
1719  typedef iterator const_iterator;
1720  iterator itat(int n) const { return iterator(goup(n)); }
1721  iterator begin(void) const { return itat(0); }
1722  iterator end(void) const { return iterator(high); }
1723  int front(void) const { return *begin(); }
1724  int back(void) const { return *end(); }
1725  void inc(iterator &p, int n=1) const { p.i = goup(p.i+n); }
1726 private:
1727  int hexapi goup(int reg) const;
1728 };
1729 DECLARE_TYPE_AS_MOVABLE(bitset_t);
1730 typedef qvector<bitset_t> array_of_bitsets;
1731 
1732 //-------------------------------------------------------------------------
1733 template <class T>
1734 struct ivl_tpl // an interval
1735 {
1736 protected:
1737  // forbid the default constructor
1738  ivl_tpl(void) {}
1739 public:
1740  T off;
1741  T size;
1742  ivl_tpl(T _off, T _size) : off(_off), size(_size) {}
1743  bool valid() const { return last() >= off; }
1744  T end() const { return off + size; }
1745  T last() const { return off + size - 1; }
1746 
1747  DEFINE_MEMORY_ALLOCATION_FUNCS()
1748 };
1749 
1750 //-------------------------------------------------------------------------
1751 typedef ivl_tpl<uval_t> uval_ivl_t;
1752 struct ivl_t : public uval_ivl_t
1753 {
1754 private:
1755  typedef ivl_tpl<uval_t> inherited;
1756  // forbid the default constructor
1757  ivl_t(void) {}
1758  // ...except for use in a vector
1759  friend class qvector<ivl_t>;
1760 
1761 public:
1762  ivl_t(uval_t _off, uval_t _size) : inherited(_off,_size) {}
1763  bool empty(void) const { return size == 0; }
1764  void clear(void) { size = 0; }
1765  void print(qstring *vout) const;
1766  const char *hexapi dstr(void) const;
1767 
1768  bool extend_to_cover(const ivl_t &r) // extend interval to cover 'r'
1769  {
1770  uval_t new_end = end();
1771  bool changed = false;
1772  if ( off > r.off )
1773  {
1774  off = r.off;
1775  changed = true;
1776  }
1777  if ( new_end < r.end() )
1778  {
1779  new_end = r.end();
1780  changed = true;
1781  }
1782  if ( changed )
1783  size = new_end - off;
1784  return changed;
1785  }
1786  void intersect(const ivl_t &r)
1787  {
1788  uval_t new_off = qmax(off, r.off);
1789  uval_t new_end = end();
1790  if ( new_end > r.end() )
1791  new_end = r.end();
1792  if ( new_off < new_end )
1793  {
1794  off = new_off;
1795  size = new_end - off;
1796  }
1797  else
1798  {
1799  size = 0;
1800  }
1801  }
1802 
1803  // do *this and ivl overlap?
1804  bool overlap(const ivl_t &ivl) const
1805  {
1806  return interval::overlap(off, size, ivl.off, ivl.size);
1807  }
1808  // does *this include ivl?
1809  bool includes(const ivl_t &ivl) const
1810  {
1811  return interval::includes(off, size, ivl.off, ivl.size);
1812  }
1813  // does *this contain off2?
1814  bool contains(uval_t off2) const
1815  {
1816  return interval::contains(off, size, off2);
1817  }
1818 
1819  DECLARE_COMPARISONS(ivl_t);
1820  static const ivl_t allmem;
1821 #define ALLMEM ivl_t::allmem
1822 };
1823 DECLARE_TYPE_AS_MOVABLE(ivl_t);
1824 
1825 //-------------------------------------------------------------------------
1827 {
1828  ivl_t ivl;
1829  const char *whole; // name of the whole interval
1830  const char *part; // prefix to use for parts of the interval (e.g. sp+4)
1831  ivl_with_name_t(): ivl(0, BADADDR), whole("<unnamed inteval>"), part(NULL) {}
1832  DEFINE_MEMORY_ALLOCATION_FUNCS()
1833 };
1834 
1835 //-------------------------------------------------------------------------
1836 template <class Ivl, class T>
1837 class ivlset_tpl // set of intervals
1838 {
1839 public:
1840  typedef qvector<Ivl> bag_t;
1841 
1842 protected:
1843  bag_t bag;
1844  bool verify(void) const;
1845  // we do not store the empty intervals in bag so size == 0 denotes
1846  // MAX_VALUE<T>+1, e.g. 0x100000000 for uint32
1847  static bool ivl_all_values(const Ivl &ivl) { return ivl.off == 0 && ivl.size == 0; }
1848 
1849 public:
1850  ivlset_tpl(void) {}
1851  ivlset_tpl(const Ivl &ivl) { if ( ivl.valid() ) bag.push_back(ivl); }
1852  DEFINE_MEMORY_ALLOCATION_FUNCS()
1853 
1854  void swap(ivlset_tpl &r) { bag.swap(r.bag); }
1855  const Ivl &getivl(int idx) const { return bag[idx]; }
1856  const Ivl &lastivl(void) const { return bag.back(); }
1857  size_t nivls(void) const { return bag.size(); }
1858  bool empty(void) const { return bag.empty(); }
1859  void clear(void) { bag.clear(); }
1860  void qclear(void) { bag.qclear(); }
1861  bool all_values() const { return nivls() == 1 && ivl_all_values(bag[0]); }
1862  void set_all_values() { clear(); bag.push_back(Ivl(0, 0)); }
1863  bool single_value(T v) const { return nivls() == 1 && bag[0].off == v && bag[0].size == 1; }
1864 
1865  bool operator==(const Ivl &v) const { return nivls() == 1 && bag[0] == v; }
1866  bool operator!=(const Ivl &v) const { return !(*this == v); }
1867 
1868  typedef typename bag_t::iterator iterator;
1869  typedef typename bag_t::const_iterator const_iterator;
1870  const_iterator begin(void) const { return bag.begin(); }
1871  const_iterator end(void) const { return bag.end(); }
1872  iterator begin(void) { return bag.begin(); }
1873  iterator end(void) { return bag.end(); }
1874 };
1875 
1876 //-------------------------------------------------------------------------
1877 /// Set of address intervals.
1878 /// Bit arrays are efficient only for small sets. Potentially huge
1879 /// sets, like memory ranges, require another representation.
1880 /// ivlset_t is used for a list of memory locations in our decompiler.
1883 {
1885  ivlset_t() {}
1886  ivlset_t(const ivl_t &ivl) : inherited(ivl) {}
1887  bool hexapi add(const ivl_t &ivl);
1888  bool add(ea_t ea, asize_t size) { return add(ivl_t(ea, size)); }
1889  bool hexapi add(const ivlset_t &ivs);
1890  bool hexapi addmasked(const ivlset_t &ivs, const ivl_t &mask);
1891  bool hexapi sub(const ivl_t &ivl);
1892  bool sub(ea_t ea, asize_t size) { return sub(ivl_t(ea, size)); }
1893  bool hexapi sub(const ivlset_t &ivs);
1894  bool hexapi has_common(const ivl_t &ivl, bool strict=false) const;
1895  void hexapi print(qstring *vout) const;
1896  const char *hexapi dstr(void) const;
1897  asize_t hexapi count(void) const;
1898  bool hexapi has_common(const ivlset_t &ivs) const;
1899  bool hexapi contains(uval_t off) const;
1900  bool hexapi includes(const ivlset_t &ivs) const;
1901  bool hexapi intersect(const ivlset_t &ivs);
1902 
1903  DECLARE_COMPARISONS(ivlset_t);
1904 
1905 };
1906 DECLARE_TYPE_AS_MOVABLE(ivlset_t);
1907 typedef qvector<ivlset_t> array_of_ivlsets;
1908 //-------------------------------------------------------------------------
1909 // We use bitset_t to keep list of registers.
1910 // This is the most optimal storage for them.
1911 class rlist_t : public bitset_t
1912 {
1913 public:
1914  rlist_t(void) {}
1915  rlist_t(const rlist_t &m) : bitset_t(m)
1916  {
1917  }
1918  rlist_t(mreg_t reg, int width) { add(reg, width); }
1919  ~rlist_t(void) {}
1920  void hexapi print(qstring *vout) const;
1921  const char *hexapi dstr(void) const;
1922 };
1923 DECLARE_TYPE_AS_MOVABLE(rlist_t);
1924 
1925 //-------------------------------------------------------------------------
1926 // Microlist: list of register and memory locations
1927 struct mlist_t
1928 {
1929  rlist_t reg; // registers
1930  ivlset_t mem; // memory locations
1931 
1932  mlist_t(void) {}
1933  mlist_t(const ivl_t &ivl) : mem(ivl) {}
1934  mlist_t(mreg_t r, int size) : reg(r, size) {}
1935 
1936  void swap(mlist_t &r) { reg.swap(r.reg); mem.swap(r.mem); }
1937  bool hexapi addmem(ea_t ea, asize_t size);
1938  bool add(mreg_t r, int size) { return add(mlist_t(r, size)); } // also see append_def_list()
1939  bool add(const rlist_t &r) { return reg.add(r); }
1940  bool add(const ivl_t &ivl) { return add(mlist_t(ivl)); }
1941  bool add(const mlist_t &lst) { return reg.add(lst.reg) | mem.add(lst.mem); }
1942  bool sub(mreg_t r, int size) { return sub(mlist_t(r, size)); }
1943  bool sub(const ivl_t &ivl) { return sub(mlist_t(ivl)); }
1944  bool sub(const mlist_t &lst) { return reg.sub(lst.reg) | mem.sub(lst.mem); }
1945  asize_t count(void) const { return reg.count() + mem.count(); }
1946  void hexapi print(qstring *vout) const;
1947  const char *hexapi dstr(void) const;
1948  bool empty(void) const { return reg.empty() && mem.empty(); }
1949  void clear(void) { reg.clear(); mem.clear(); }
1950  bool has(mreg_t r) const { return reg.has(r); }
1951  bool has_all(mreg_t r, int size) const { return reg.has_all(r, size); }
1952  bool has_any(mreg_t r, int size) const { return reg.has_any(r, size); }
1953  bool has_memory(void) const { return !mem.empty(); }
1954  bool has_allmem(void) const { return mem == ALLMEM; }
1955  bool has_common(const mlist_t &lst) const { return reg.has_common(lst.reg) || mem.has_common(lst.mem); }
1956  bool includes(const mlist_t &lst) const { return reg.includes(lst.reg) && mem.includes(lst.mem); }
1957  bool intersect(const mlist_t &lst) { return reg.intersect(lst.reg) | mem.intersect(lst.mem); }
1958  bool is_subset_of(const mlist_t &lst) const { return lst.includes(*this); }
1959 
1960  DECLARE_COMPARISONS(mlist_t);
1961  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
1962 };
1963 DECLARE_TYPE_AS_MOVABLE(mlist_t);
1964 typedef qvector<mlist_t> mlistvec_t;
1965 DECLARE_TYPE_AS_MOVABLE(mlistvec_t);
1966 
1967 ///------------------------------------------------------------------------
1968 /// Map a processor register to a microregister.
1969 /// \param reg processor register number
1970 /// \return microregister register id or mr_none
1971 
1972 mreg_t hexapi reg2mreg(int reg);
1973 
1974 
1975 /// Map a microregister to a processor register.
1976 /// \param reg microregister number
1977 /// \param width size of microregister in bytes
1978 /// \return processor register id or -1
1979 
1980 int hexapi mreg2reg(mreg_t reg, int width);
1981 
1982 
1983 /// Get the microregister name
1984 /// \param out output buffer, may be nullptr
1985 /// \param bit microregister number
1986 /// \param width size of microregister in bytes. may be bigger than the real
1987 /// register size.
1988 /// \param ud reserved, must be nullptr
1989 /// \return width of the printed register. this value may be less than
1990 /// the WIDTH argument.
1991 
1992 int hexapi get_mreg_name(qstring *out, mreg_t reg, int width, void *ud=nullptr);
1993 
1994 //-------------------------------------------------------------------------
1995 /// User defined callback to optimize individual microcode instructions
1997 {
1998  /// Optimize an instruction.
1999  /// \param blk current basic block. maybe NULL, which means that
2000  /// the instruction must be optimized without context
2001  /// \param ins instruction to optimize; it is always a top-level instruction.
2002  /// the callback may not delete the instruction but may
2003  /// convert it into nop (see mblock_t::make_nop). to optimize
2004  /// sub-instructions, visit them using minsn_visitor_t.
2005  /// sub-instructions may not be converted into nop but
2006  /// can be converted to "mov x,x". for example:
2007  /// add x,0,x => mov x,x
2008  /// \return number of changes made to the instruction.
2009  /// if after this call the instruction's use/def lists have changed,
2010  /// you must mark the block level lists as dirty (see mark_lists_dirty)
2011  virtual int idaapi func(mblock_t *blk, minsn_t *ins) = 0;
2012 };
2013 
2014 /// Install an instruction level custom optimizer
2015 /// \param opt an instance of optinsn_t. cannot be destroyed before calling
2016 /// remove_optinsn_handler().
2018 
2019 /// Remove an instruction level custom optimizer
2021 
2022 /// User defined callback to optimize microcode blocks
2024 {
2025  /// Optimize a block.
2026  /// This function usually performs the optimizations that require analyzing
2027  /// the entire block and/or its neighbors. For example it can recognize
2028  /// patterns and perform conversions like:
2029  /// b0: b0:
2030  /// ... ...
2031  /// jnz x, 0, @b2 => jnz x, 0, @b2
2032  /// b1: b1:
2033  /// add x, 0, y mov x, y
2034  /// ... ...
2035  /// \param blk Basic block to optimize as a whole.
2036  /// \return number of changes made to the block. See also mark_lists_dirty.
2037  virtual int idaapi func(mblock_t *blk) = 0;
2038 };
2039 
2040 /// Install a block level custom optimizer.
2041 /// \param opt an instance of optblock_t. cannot be destroyed before calling
2042 /// remove_optblock_handler().
2044 
2045 /// Remove a block level custom optimizer
2047 
2048 
2049 //-------------------------------------------------------------------------
2050 // abstract graph interface
2051 class simple_graph_t : public gdl_graph_t
2052 {
2053 public:
2054  qstring title;
2055  bool colored_gdl_edges;
2056 private:
2057  friend class iterator;
2058  virtual int goup(int node) const;
2059 };
2060 
2061 //-------------------------------------------------------------------------
2062 // Since our data structures are quite complex, we use the visitor pattern
2063 // in many of our algorthims. This functionality is available for plugins too.
2064 // https://en.wikipedia.org/wiki/Visitor_pattern
2065 
2066 // All our visitor callbacks return an integer value.
2067 // Visiting is interrupted as soon an the return value is non-zero.
2068 // This non-zero value is returned as the result of the for_all_... function.
2069 // If for_all_... returns 0, it means that it successfully visited all items.
2070 
2071 /// The context info used by visitors
2073 {
2074  mbl_array_t *mba; // current block array
2075  mblock_t *blk; // current block
2076  minsn_t *topins; // top level instruction (parent of curins or curins itself)
2077  minsn_t *curins; // currently visited instruction
2079  mbl_array_t *_mba=NULL,
2080  mblock_t *_blk=NULL,
2081  minsn_t *_topins=NULL)
2082  : mba(_mba), blk(_blk), topins(_topins), curins(NULL) {}
2083  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
2084  bool really_alloc(void) const;
2085 };
2086 
2087 /// Micro instruction visitor.
2088 /// See mbl_array_t::for_all_topinsns, minsn_t::for_all_insns,
2089 /// mblock_::for_all_insns, mbl_array_t::for_all_insns
2091 {
2093  mbl_array_t *_mba=NULL,
2094  mblock_t *_blk=NULL,
2095  minsn_t *_topins=NULL)
2096  : op_parent_info_t(_mba, _blk, _topins) {}
2097  virtual int idaapi visit_minsn(void) = 0;
2098 };
2099 
2100 /// Micro operand visitor.
2101 /// See mop_t::for_all_ops, minsn_t::for_all_ops, mblock_t::for_all_insns,
2102 /// mbl_array_t::for_all_insns
2104 {
2105  mop_visitor_t(
2106  mbl_array_t *_mba=NULL,
2107  mblock_t *_blk=NULL,
2108  minsn_t *_topins=NULL)
2109  : op_parent_info_t(_mba, _blk, _topins), prune(false) {}
2110  /// Should skip sub-operands of the current operand?
2111  /// visit_mop() may set 'prune=true' for that.
2112  bool prune;
2113  virtual int idaapi visit_mop(mop_t *op, const tinfo_t *type, bool is_target) = 0;
2114 };
2115 
2116 /// Scattered mop: visit each of the scattered locations as a separate mop.
2117 /// See mop_t::for_all_scattered_submops
2119 {
2120  virtual int idaapi visit_scif_mop(const mop_t &r, int off) = 0;
2121 };
2122 
2123 // Used operand visitor.
2124 // See mblock_t::for_all_uses
2126 {
2127  minsn_t *topins;
2128  minsn_t *curins;
2129  bool changed;
2130  mlist_t *list;
2131  mlist_mop_visitor_t(void): topins(NULL), curins(NULL), changed(false), list(NULL) {}
2132  virtual int idaapi visit_mop(mop_t *op) = 0;
2133 };
2134 
2135 //-------------------------------------------------------------------------
2136 /// Instruction operand types
2137 
2138 typedef uint8 mopt_t;
2139 const mopt_t
2140  mop_z = 0, ///< none
2141  mop_r = 1, ///< register (they exist until MMAT_LVARS)
2142  mop_n = 2, ///< immediate number constant
2143  mop_str = 3, ///< immediate string constant
2144  mop_d = 4, ///< result of another instruction
2145  mop_S = 5, ///< local stack variable (they exist until MMAT_LVARS)
2146  mop_v = 6, ///< global variable
2147  mop_b = 7, ///< micro basic block (mblock_t)
2148  mop_f = 8, ///< list of arguments
2149  mop_l = 9, ///< local variable
2150  mop_a = 10, ///< mop_addr_t: address of operand (mop_l, mop_v, mop_S, mop_r)
2151  mop_h = 11, ///< helper function
2152  mop_c = 12, ///< mcases
2153  mop_fn = 13, ///< floating point constant
2154  mop_p = 14, ///< operand pair
2155  mop_sc = 15; ///< scattered
2156 
2157 const int NOSIZE = -1; ///< wrong or unexisting operand size
2158 
2159 //-------------------------------------------------------------------------
2160 /// Reference to a local variable. Used by mop_l
2162 {
2163  /// Pointer to the parent mbl_array_t object.
2164  /// Since we need to access the 'mba->vars' array in order to retrieve
2165  /// the referenced variable, we keep a pointer to mbl_array_t here.
2166  /// Note: this means this class and consequently mop_t, minsn_t, mblock_t
2167  /// are specific to a mbl_array_t object and cannot migrate between
2168  /// them. fortunately this is not something we need to do.
2169  /// second, lvar_ref_t's appear only after MMAT_LVARS.
2171  sval_t off; ///< offset from the beginning of the variable
2172  int idx; ///< index into mba->vars
2173  lvar_ref_t(mbl_array_t *m, int i, sval_t o=0) : mba(m), off(o), idx(i) {}
2174  lvar_ref_t(const lvar_ref_t &r) : mba(r.mba), off(r.off), idx(r.idx) {}
2175  lvar_ref_t &operator=(const lvar_ref_t &r)
2176  {
2177  off = r.off;
2178  idx = r.idx;
2179  return *this;
2180  }
2181  DECLARE_COMPARISONS(lvar_ref_t);
2182  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
2183  void swap(lvar_ref_t &r)
2184  {
2185  std::swap(off, r.off);
2186  std::swap(idx, r.idx);
2187  }
2188  lvar_t &hexapi var(void) const; ///< Retrieve the referenced variable
2189 };
2190 
2191 //-------------------------------------------------------------------------
2192 /// Reference to a stack variable. Used for mop_S
2194 {
2195  /// Pointer to the parent mbl_array_t object.
2196  /// We need it in order to retrieve the referenced stack variable.
2197  /// See notes for lvar_ref_t::mba.
2199 
2200  /// Offset to the stack variable from the bottom of the stack frame.
2201  /// It is called 'decompiler stkoff' and it is different from IDA stkoff.
2202  /// See a note and a picture about 'decompiler stkoff' below.
2203  sval_t off;
2204 
2205  stkvar_ref_t(mbl_array_t *m, sval_t o) : mba(m), off(o) {}
2206  DECLARE_COMPARISONS(stkvar_ref_t);
2207  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
2208  void swap(stkvar_ref_t &r)
2209  {
2210  std::swap(off, r.off);
2211  }
2212  /// Retrieve the referenced stack variable.
2213  /// \param p_off if specified, will hold IDA stkoff after the call.
2214  /// \return pointer to the stack variable
2215  member_t *hexapi get_stkvar(uval_t *p_off=NULL) const;
2216 };
2217 
2218 //-------------------------------------------------------------------------
2219 /// Scattered operand info. Used for mop_sc
2220 struct scif_t : public vdloc_t
2221 {
2222  /// Pointer to the parent mbl_array_t object.
2223  /// Some operations may convert a scattered operand into something simpler,
2224  /// (a stack operand, for example). We will need to create stkvar_ref_t at
2225  /// that moment, this is why we need this pointer.
2226  /// See notes for lvar_ref_t::mba.
2228 
2229  /// Usually scattered operands are created from a function prototype,
2230  /// which has the name information. We preserve it and use it to name
2231  /// the corresponding local variable.
2232  qstring name;
2233 
2234  /// Scattered operands always have type info assigned to them
2235  /// because without it we won't be able to manipulte them.
2236  tinfo_t type;
2237 
2238  scif_t(mbl_array_t *_mba, qstring *n, tinfo_t *tif) : mba(_mba)
2239  {
2240  n->swap(name);
2241  tif->swap(type);
2242  }
2243  scif_t &operator =(const vdloc_t &loc)
2244  {
2245  *(vdloc_t *)this = loc;
2246  return *this;
2247  }
2248 };
2249 
2250 //-------------------------------------------------------------------------
2251 /// An integer constant. Used for mop_n
2252 /// We support 64-bit values but 128-bit values can be represented with mop_p
2254 {
2255  uint64 value;
2256  uint64 org_value; // original value before changing the operand size
2257  mnumber_t(uint64 v, ea_t _ea=BADADDR, int n=0)
2258  : operand_locator_t(_ea, n), value(v), org_value(v) {}
2259  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
2260  DECLARE_COMPARISONS(mnumber_t)
2261  {
2262  if ( value < r.value )
2263  return -1;
2264  if ( value > r.value )
2265  return -1;
2266  return 0;
2267  }
2268  // always use this function instead of manually modifying the 'value' field
2269  void update_value(uint64 val64)
2270  {
2271  value = val64;
2272  org_value = val64;
2273  }
2274 };
2275 
2276 //-------------------------------------------------------------------------
2277 /// Floating point constant. Used for mop_fn
2278 /// For more details, please see the ieee.h file from IDA SDK.
2280 {
2281  uint16 fnum[6]; ///< Internal representation of the number
2282  int nbytes; ///< Original size of the constant in bytes
2283  operator uint16 *(void) { return fnum; }
2284  operator const uint16 *(void) const { return fnum; }
2285  void hexapi print(qstring *vout) const;
2286  const char *hexapi dstr(void) const;
2287  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
2288  DECLARE_COMPARISONS(fnumber_t)
2289  {
2290  return ecmp(fnum, r.fnum);
2291  }
2292 };
2293 
2294 //-------------------------------------------------------------------------
2295 /// \defgroup SHINS_ Bits to control how we print instructions
2296 //@{
2297 #define SHINS_NUMADDR 0x01 ///< display definition addresses for numbers
2298 #define SHINS_VALNUM 0x02 ///< display value numbers
2299 #define SHINS_SHORT 0x04 ///< do not display use-def chains and other attrs
2300 #define SHINS_LDXEA 0x08 ///< display address of ldx expressions (not used)
2301 //@}
2302 
2303 //-------------------------------------------------------------------------
2304 /// How to handle side effect of change_size()
2305 /// Sometimes we need to create a temporary operand and change its size in order
2306 /// to check some hypothesis. If we revert our changes, we do not want that the
2307 /// database (global variables, stack frame, etc) changes in any manner.
2309 {
2310  NO_SIDEFF, ///< change operand size but ignore side effects
2311  ///< if you decide to keep the changed operand,
2312  ///< handle_new_size() must be called
2313  WITH_SIDEFF, ///< change operand size and handle side effects
2314  ONLY_SIDEFF, ///< only handle side effects
2315  ANY_REGSIZE = 0x80, ///< any register size is permitted
2316 };
2317 
2318 // Max size of simple operands.
2319 // Please note there are some exceptions: udts, floating point, xmm/ymm, etc
2320 const int MAX_OPSIZE = 2 * sizeof(ea_t);
2321 const int DOUBLE_OPSIZE = 2 * MAX_OPSIZE;
2322 //-------------------------------------------------------------------------
2323 /// A microinstruction operand.
2324 /// This is the smallest building block of our microcode.
2325 /// Operands will be part of instructions, which are then grouped into basic blocks.
2326 /// The microcode consists of an array of such basic blocks + some additional info.
2327 class mop_t
2328 {
2329  void hexapi copy(const mop_t &rop);
2330 public:
2331  /// Operand type.
2333 
2334  /// Operand properties.
2335  uint8 oprops;
2336 #define OPROP_IMPDONE 0x01 ///< imported operand (a pointer) has been dereferenced
2337 #define OPROP_UDT 0x02 ///< a struct or union
2338 #define OPROP_FLOAT 0x04 ///< possibly floating value
2339 #define OPROP_CCFLAGS 0x08 ///< condition codes register value
2340 #define OPROP_UDEFVAL 0x10 ///< uses undefined value
2341 
2342  /// Value number.
2343  /// Zero means unknown.
2344  /// Operands with the same value number are equal.
2345  uint16 valnum;
2346 
2347  /// Operand size.
2348  /// Usually it is 1,2,4,8 or NOSIZE but for UDTs other sizes are permitted
2349  int size;
2350 
2351  /// The following union holds additional details about the operand.
2352  /// Depending on the operand type different kinds of info are stored.
2353  /// You should access these fields only after verifying the operand type.
2354  /// All pointers are owned by the operand and are freed by its destructor.
2355  union
2356  {
2357  mreg_t r; // mop_r register number
2358  mnumber_t *nnn; // mop_n immediate value
2359  minsn_t *d; // mop_d result (destination) of another instruction
2360  stkvar_ref_t *s; // mop_S stack variable
2361  ea_t g; // mop_v global variable (its linear address)
2362  int b; // mop_b block number (used in jmp,call instructions)
2363  mcallinfo_t *f; // mop_f function call information
2364  lvar_ref_t *l; // mop_l local variable
2365  mop_addr_t *a; // mop_a variable whose address is taken
2366  char *helper; // mop_h helper function name
2367  char *cstr; // mop_str string constant
2368  mcases_t *c; // mop_c cases
2369  fnumber_t *fpc; // mop_fn floating point constant
2370  mop_pair_t *pair; // mop_p operand pair
2371  scif_t *scif; // mop_sc scattered operand info
2372  };
2373  // -- End of data fields, member function declarations follow:
2374 
2375  void set_impptr_done(void) { oprops |= OPROP_IMPDONE; }
2376  void set_udt(void) { oprops |= OPROP_UDT; }
2377  void set_undef_val(void) { oprops |= OPROP_UDEFVAL; }
2378  bool is_impptr_done(void) const { return (oprops & OPROP_IMPDONE) != 0; }
2379  bool is_udt(void) const { return (oprops & OPROP_UDT) != 0; }
2380  bool probably_floating(void) const { return (oprops & OPROP_FLOAT) != 0; }
2381  bool is_ccflags(void) const { return (oprops & OPROP_CCFLAGS) != 0; }
2382  bool is_undef_val(void) const { return (oprops & OPROP_UDEFVAL) != 0; }
2383 
2384  mop_t(void) { zero(); }
2385  mop_t(const mop_t &rop) { copy(rop); }
2386  mop_t(mreg_t _r, int _s) : t(mop_r), oprops(0), valnum(0), size(_s), r(_r) {}
2387  mop_t &operator=(const mop_t &rop) { return assign(rop); }
2388  mop_t &hexapi assign(const mop_t &rop);
2389  ~mop_t(void)
2390  {
2391  erase();
2392  }
2393  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
2394  void zero(void) { t = mop_z; oprops = 0; valnum = 0; size = NOSIZE; nnn = NULL; }
2395  void hexapi swap(mop_t &rop);
2396  void hexapi erase(void);
2397  void erase_but_keep_size(void) { int s2 = size; erase(); size = s2; }
2398 
2399  void hexapi print(qstring *vout, int shins_flags=SHINS_SHORT|SHINS_VALNUM) const;
2400  const char *hexapi dstr(void) const; // use this function for debugging
2401 
2402  //-----------------------------------------------------------------------
2403  // Operand creation
2404  //-----------------------------------------------------------------------
2405  /// Create operand from mlist_t.
2406  /// Example: if LST contains 4 bits for R0.4, our operand will be
2407  /// (t=mop_r, r=R0, size=4)
2408  /// \param mba pointer to microcode
2409  /// \param lst list of locations
2410  /// \param fullsize mba->fullsize
2411  /// \return success
2412  bool hexapi create_from_mlist(mbl_array_t *mba, const mlist_t &lst, sval_t fullsize);
2413 
2414  /// Create operand from ivlset_t.
2415  /// Example: if IVS contains [glbvar..glbvar+4), our operand will be
2416  /// (t=mop_v, g=&glbvar, size=4)
2417  /// \param mba pointer to microcode
2418  /// \param ivs set of memory intervals
2419  /// \param fullsize mba->fullsize
2420  /// \return success
2421  bool hexapi create_from_ivlset(mbl_array_t *mba, const ivlset_t &ivs, sval_t fullsize);
2422 
2423  /// Create operand from vdloc_t.
2424  /// Example: if LOC contains (type=ALOC_REG1, r=R0), our operand will be
2425  /// (t=mop_r, r=R0, size=_SIZE)
2426  /// \param mba pointer to microcode
2427  /// \param loc location
2428  /// \param fullsize mba->fullsize
2429  /// Note: this function cannot handle scattered locations.
2430  /// \return success
2431  void hexapi create_from_vdloc(mbl_array_t *mba, const vdloc_t &loc, int _size);
2432 
2433  /// Create operand from scattered vdloc_t.
2434  /// Example: if LOC is (ALOC_DIST, {EAX.4, EDX.4}) and TYPE is _LARGE_INTEGER,
2435  /// our operand will be
2436  /// (t=mop_sc, scif={EAX.4, EDX.4})
2437  /// \param mba pointer to microcode
2438  /// \param name name of the operand, if available
2439  /// \param type type of the operand, must be present
2440  /// \param loc a scattered location
2441  /// \return success
2442  void hexapi create_from_scattered_vdloc(
2443  mbl_array_t *mba,
2444  const char *name,
2445  tinfo_t type,
2446  const vdloc_t &loc);
2447 
2448  /// Create operand from an instruction.
2449  /// This function creates a nested instruction that can be used as an operand.
2450  /// Example: if m="add x,y,z", our operand will be (t=mop_d,d=m).
2451  /// The destination operand of 'add' (z) is lost.
2452  /// \param m instruction to embed into operand. may not be NULL.
2453  void hexapi create_from_insn(const minsn_t *m);
2454 
2455  /// Create an integer constant operand.
2456  /// \param _value value to store in the operand
2457  /// \param _size size of the value in bytes (1,2,4,8)
2458  /// \param _ea address of the processor instruction that made the value
2459  /// \param opnum operand number of the processor instruction
2460  void hexapi make_number(uint64 _value, int _size, ea_t _ea=BADADDR, int opnum=0);
2461 
2462  /// Create a floating point constant operand.
2463  /// \param bytes pointer to the floating point value as used by the current
2464  /// processor (e.g. for x86 it must be in IEEE 754)
2465  /// \param _size number of bytes occupied by the constant.
2466  /// \return success
2467  bool hexapi make_fpnum(const void *bytes, size_t _size);
2468 
2469  /// Create a register operand without erasing previous data.
2470  /// \param reg micro register number
2471  /// Note: this function does not erase the previous contents of the operand;
2472  /// call erase() if necessary
2473  void _make_reg(mreg_t reg)
2474  {
2475  t = mop_r;
2476  r = reg;
2477  }
2478  void _make_reg(mreg_t reg, int _size)
2479  {
2480  t = mop_r;
2481  r = reg;
2482  size = _size;
2483  }
2484  /// Create a register operand.
2485  void make_reg(mreg_t reg) { erase(); _make_reg(reg); }
2486  void make_reg(mreg_t reg, int _size) { erase(); _make_reg(reg, _size); }
2487 
2488  /// Create a local variable operand.
2489  /// \param mba pointer to microcode
2490  /// \param idx index into mba->vars
2491  /// \param off offset from the beginning of the variable
2492  /// Note: this function does not erase the previous contents of the operand;
2493  /// call erase() if necessary
2494  void _make_lvar(mbl_array_t *mba, int idx, sval_t off=0)
2495  {
2496  t = mop_l;
2497  l = new lvar_ref_t(mba, idx, off);
2498  }
2499 
2500  /// Create a global variable operand without erasing previous data.
2501  /// \param ea address of the variable
2502  /// Note: this function does not erase the previous contents of the operand;
2503  /// call erase() if necessary
2504  void _make_gvar(ea_t ea)
2505  {
2506  t = mop_v;
2507  g = ea;
2508  }
2509  /// Create a global variable operand.
2510  void make_gvar(ea_t ea) { erase(); _make_gvar(ea); }
2511 
2512  /// Create a stack variable operand.
2513  /// \param mba pointer to microcode
2514  /// \param off decompiler stkoff
2515  /// Note: this function does not erase the previous contents of the operand;
2516  /// call erase() if necessary
2517  void _make_stkvar(mbl_array_t *mba, sval_t off)
2518  {
2519  t = mop_S;
2520  s = new stkvar_ref_t(mba, off);
2521  }
2522 
2523  /// Create pair of registers.
2524  /// \param loreg register holding the low part of the value
2525  /// \param hireg register holding the high part of the value
2526  /// \param halfsize the size of each of loreg/hireg
2527  void hexapi make_reg_pair(int loreg, int hireg, int halfsize);
2528 
2529  /// Create a nested instruction without erasing previous data.
2530  /// \param ea address of the nested instruction
2531  /// Note: this function does not erase the previous contents of the operand;
2532  /// call erase() if necessary
2533  /// See also create_from_insn, which is higher level
2534  void _make_insn(minsn_t *ins);
2535  /// Create a nested instruction.
2536  void make_insn(minsn_t *ins) { erase(); _make_insn(ins); }
2537 
2538  /// Create a block reference operand without erasing previous data.
2539  /// \param blknum block number
2540  /// Note: this function does not erase the previous contents of the operand;
2541  /// call erase() if necessary
2542  void _make_blkref(int blknum)
2543  {
2544  t = mop_b;
2545  b = blknum;
2546  }
2547  /// Create a global variable operand.
2548  void make_blkref(int blknum) { erase(); _make_blkref(blknum); }
2549 
2550  /// Create a helper operand.
2551  /// A helper operand usually keeps a built-in function name like "va_start"
2552  /// It is essentially just an arbitrary identifier without any additional info.
2553  void hexapi make_helper(const char *name);
2554 
2555  /// Create a constant string operand.
2556  void _make_strlit(const char *str)
2557  {
2558  t = mop_str;
2559  cstr = ::qstrdup(str);
2560  }
2561  void _make_strlit(qstring *str) // str is consumed
2562  {
2563  t = mop_str;
2564  cstr = str->extract();
2565  }
2566 
2567  /// Create a call info operand without erasing previous data.
2568  /// \param fi callinfo
2569  /// Note: this function does not erase the previous contents of the operand;
2570  /// call erase() if necessary
2572  {
2573  t = mop_f;
2574  f = fi;
2575  }
2576 
2577  /// Create a 'switch cases' operand without erasing previous data.
2578  /// Note: this function does not erase the previous contents of the operand;
2579  /// call erase() if necessary
2580  void _make_cases(mcases_t *_cases)
2581  {
2582  t = mop_c;
2583  c = _cases;
2584  }
2585 
2586  /// Create a pair operand without erasing previous data.
2587  /// Note: this function does not erase the previous contents of the operand;
2588  /// call erase() if necessary
2589  void _make_pair(mop_pair_t *_pair)
2590  {
2591  t = mop_p;
2592  pair = _pair;
2593  }
2594 
2595  //-----------------------------------------------------------------------
2596  // Various operand tests
2597  //-----------------------------------------------------------------------
2598  bool empty(void) const { return t == mop_z; }
2599  /// Is a register operand?
2600  /// See also get_mreg_name()
2601  bool is_reg(void) const { return t == mop_r; }
2602  /// Is the specified register?
2603  bool is_reg(mreg_t _r) const { return t == mop_r && r == _r; }
2604  /// Is the specified register of the specified size?
2605  bool is_reg(mreg_t _r, int _size) const { return t == mop_r && r == _r && size == _size; }
2606  /// Is a list of arguments?
2607  bool is_arglist(void) const { return t == mop_f; }
2608  /// Is a condition code?
2609  bool is_cc(void) const { return is_reg() && r >= mr_cf && r < mr_first; }
2610  /// Is a bit register?
2611  /// This includes condition codes and eventually other bit registers
2612  static bool hexapi is_bit_reg(mreg_t reg);
2613  bool is_bit_reg(void) const { return is_reg() && is_bit_reg(r); }
2614  /// Is a kernel register?
2615  bool is_kreg(void) const;
2616  /// Is a block reference to the specified block?
2617  bool is_mob(int serial) const { return t == mop_b && b == serial; }
2618  /// Is a scattered operand?
2619  bool is_scattered(void) const { return t == mop_sc; }
2620  /// Is address of a global memory cell?
2621  bool is_glbaddr() const;
2622  /// Is address of the specified global memory cell?
2623  bool is_glbaddr(ea_t ea) const;
2624  /// Is address of a stack variable?
2625  bool is_stkaddr() const;
2626  /// Is a sub-instruction?
2627  bool is_insn(void) const { return t == mop_d; }
2628  /// Is a sub-instruction with the specified opcode?
2629  bool is_insn(mcode_t code) const;
2630  /// Has any side effects?
2631  /// \param include_ldx_and_divs consider ldx/div/mod as having side effects?
2632  bool has_side_effects(bool include_ldx_and_divs=false) const;
2633  /// Is it possible for the operand to use aliased memory?
2634  bool hexapi may_use_aliased_memory(void) const;
2635 
2636  /// Are the possible values of the operand only 0 and 1?
2637  /// This function returns true for 0/1 constants, bit registers,
2638  /// the result of 'set' insns, etc.
2639  bool hexapi is01(void) const;
2640 
2641  /// Does the high part of the operand consist of the sign bytes?
2642  /// \param nbytes number of bytes that were sign extended.
2643  /// the remaining size-nbytes high bytes must be sign bytes
2644  /// Example: is_sign_extended_from(xds.4(op.1), 1) -> true
2645  /// because the high 3 bytes are certainly sign bits
2646  bool hexapi is_sign_extended_from(int nbytes) const;
2647 
2648  /// Does the high part of the operand consist of zero bytes?
2649  /// \param nbytes number of bytes that were zero extended.
2650  /// the remaining size-nbytes high bytes must be zero
2651  /// Example: is_zero_extended_from(xdu.8(op.1), 2) -> true
2652  /// because the high 6 bytes are certainly zero
2653  bool hexapi is_zero_extended_from(int nbytes) const;
2654 
2655  /// Does the high part of the operand consist of zero or sign bytes?
2656  bool is_extended_from(int nbytes, bool is_signed) const
2657  {
2658  if ( is_signed )
2659  return is_sign_extended_from(nbytes);
2660  else
2661  return is_zero_extended_from(nbytes);
2662  }
2663 
2664  //-----------------------------------------------------------------------
2665  // Comparisons
2666  //-----------------------------------------------------------------------
2667  /// Compare operands.
2668  /// This is the main comparison function for operands.
2669  /// \param rop operand to compare with
2670  /// \param eqflags combination of \ref EQ_ bits
2671  bool hexapi equal_mops(const mop_t &rop, int eqflags) const;
2672  bool operator==(const mop_t &rop) const { return equal_mops(rop, 0); }
2673  bool operator!=(const mop_t &rop) const { return !equal_mops(rop, 0); }
2674 
2675  /// Lexographical operand comparison.
2676  /// It can be used to store mop_t in various containers, like std::set
2677  bool operator <(const mop_t &rop) const { return lexcompare(rop) < 0; }
2678  friend int lexcompare(const mop_t &a, const mop_t &b) { return a.lexcompare(b); }
2679  int hexapi lexcompare(const mop_t &rop) const;
2680 
2681  //-----------------------------------------------------------------------
2682  // Visiting operand parts
2683  //-----------------------------------------------------------------------
2684  /// Visit the operand and all its sub-operands.
2685  /// This function visits the current operand as well.
2686  /// \param mv visitor object
2687  /// \param type operand type
2688  /// \param is_target is a destination operand?
2689  int hexapi for_all_ops(
2690  mop_visitor_t &mv,
2691  const tinfo_t *type=NULL,
2692  bool is_target=false);
2693 
2694  /// Visit all sub-operands of a scattered operand.
2695  /// This function does not visit the current operand, only its sub-operands.
2696  /// All sub-operands are synthetic and are destroyed after the visitor.
2697  /// This function works only with scattered operands.
2698  /// \param sv visitor object
2699  int hexapi for_all_scattered_submops(scif_visitor_t &sv) const;
2700 
2701  //-----------------------------------------------------------------------
2702  // Working with mop_n operands
2703  //-----------------------------------------------------------------------
2704  /// Retrieve value of a constant integer operand.
2705  /// These functions can be called only for mop_n operands.
2706  /// See is_constant() that can be called on any operand.
2707  uint64 value(bool is_signed) const { return extend_sign(nnn->value, size, is_signed); }
2708  int64 signed_value(void) const { return value(true); }
2709  uint64 unsigned_value(void) const { return value(false); }
2710 
2711  /// Retrieve value of a constant integer operand.
2712  /// \param out pointer to the output buffer
2713  /// \param is_signed should treat the value as signed
2714  /// \return true if the operand is mop_n
2715  bool hexapi is_constant(uint64 *out=NULL, bool is_signed=true) const;
2716 
2717  bool is_equal_to(uint64 n, bool is_signed=true) const
2718  {
2719  uint64 v;
2720  return is_constant(&v, is_signed) && v == n;
2721  }
2722  bool is_zero(void) const { return is_equal_to(0, false); }
2723  bool is_one(void) const { return is_equal_to(1, false); }
2724  bool is_positive_constant(void) const
2725  {
2726  uint64 v;
2727  return is_constant(&v, true) && int64(v) > 0;
2728  }
2729  bool is_negative_constant(void) const
2730  {
2731  uint64 v;
2732  return is_constant(&v, true) && int64(v) < 0;
2733  }
2734 
2735  //-----------------------------------------------------------------------
2736  // Working with mop_S operands
2737  //-----------------------------------------------------------------------
2738  /// Retrieve the referenced stack variable.
2739  /// \param p_off if specified, will hold IDA stkoff after the call.
2740  /// \return pointer to the stack variable
2741  member_t *get_stkvar(uval_t *p_off) const { return s->get_stkvar(p_off); }
2742 
2743  /// Get the referenced stack offset.
2744  /// This function can also handle mop_sc if it is entirely mapped into
2745  /// a continuous stack region.
2746  /// \param p_off the output buffer
2747  /// \return success
2748  bool hexapi get_stkoff(sval_t *p_off) const;
2749 
2750  //-----------------------------------------------------------------------
2751  // Working with mop_d operands
2752  //-----------------------------------------------------------------------
2753  /// Get subinstruction of the operand.
2754  /// If the operand has a subinstruction with the specified opcode, return it.
2755  /// \param code desired opcode
2756  /// \return pointer to the instruction or NULL
2757  const minsn_t *get_insn(mcode_t code) const;
2758  minsn_t *get_insn(mcode_t code);
2759 
2760  //-----------------------------------------------------------------------
2761  // Transforming operands
2762  //-----------------------------------------------------------------------
2763  /// Make the low part of the operand.
2764  /// This function takes into account the memory endianness (byte sex)
2765  /// \param width the desired size of the operand part in bytes
2766  /// \return success
2767  bool hexapi make_low_half(int width);
2768 
2769  /// Make the high part of the operand.
2770  /// This function takes into account the memory endianness (byte sex)
2771  /// \param width the desired size of the operand part in bytes
2772  /// \return success
2773  bool hexapi make_high_half(int width);
2774 
2775  /// Make the first part of the operand.
2776  /// This function does not care about the memory endianness
2777  /// \param width the desired size of the operand part in bytes
2778  /// \return success
2779  bool hexapi make_first_half(int width);
2780 
2781  /// Make the second part of the operand.
2782  /// This function does not care about the memory endianness
2783  /// \param width the desired size of the operand part in bytes
2784  /// \return success
2785  bool hexapi make_second_half(int width);
2786 
2787  /// Shift the operand.
2788  /// This function shifts only the beginning of the operand.
2789  /// The operand size will be changed.
2790  /// Examples: shift_mop(AH.1, -1) -> AX.2
2791  /// shift_mop(qword_00000008.8, 4) -> dword_0000000C.4
2792  /// shift_mop(xdu.8(op.4), 4) -> #0.4
2793  /// shift_mop(#0x12345678.4, 3) -> #12.1
2794  /// \param offset shift count (the number of bytes to shift)
2795  /// \return success
2796  bool hexapi shift_mop(int offset);
2797 
2798  /// Change the operand size.
2799  /// Examples: change_size(AL.1, 2) -> AX.2
2800  /// change_size(qword_00000008.8, 4) -> dword_00000008.4
2801  /// change_size(xdu.8(op.4), 4) -> op.4
2802  /// change_size(#0x12345678.4, 1) -> #0x78.1
2803  /// \param nsize new operand size
2804  /// \param sideff may modify the database because of the size change?
2805  /// \return success
2806  bool hexapi change_size(int nsize, side_effect_t sideff=WITH_SIDEFF);
2807  bool double_size(side_effect_t sideff=WITH_SIDEFF) { return change_size(size*2, sideff); }
2808 
2809  /// Move subinstructions with side effects out of the operand.
2810  /// If we decide to delete an instruction operand, it is a good idea to
2811  /// call this function. Alternatively we should skip such operands
2812  /// by calling mop_t::has_side_effects()
2813  /// For example, if we transform: jnz x, x, @blk => goto @blk
2814  /// then we must call this function before deleting the X operands.
2815  /// \param blk current block
2816  /// \param top top level instruction that contains our operand
2817  /// \param moved_calls pointer to the boolean that will track if all side
2818  /// effects get handled correctly. must be false initially.
2819  /// \return false failed to preserve a side effect, it is not safe to
2820  /// delete the operand
2821  /// true no side effects or successfully preserved them
2822  bool hexapi preserve_side_effects(
2823  mblock_t *blk,
2824  minsn_t *top,
2825  bool *moved_calls=NULL);
2826 
2827  /// Apply a unary opcode to the operand.
2828  /// \param mcode opcode to apply. it must accept 'l' and 'd' operands
2829  /// but not 'r'. examples: m_low/m_high/m_xds/m_xdu
2830  /// \param ea value of minsn_t::ea for the newly created insruction
2831  /// \param newsize new operand size
2832  /// Example: apply_ld_mcode(m_low) will convert op => low(op)
2833  void hexapi apply_ld_mcode(mcode_t mcode, ea_t ea, int newsize);
2834  void apply_xdu(ea_t ea, int newsize) { apply_ld_mcode(m_xdu, ea, newsize); }
2835  void apply_xds(ea_t ea, int newsize) { apply_ld_mcode(m_xds, ea, newsize); }
2836 };
2837 DECLARE_TYPE_AS_MOVABLE(mop_t);
2838 
2839 /// Pair of operands
2841 {
2842 public:
2843  mop_t lop; ///< low operand
2844  mop_t hop; ///< high operand
2845  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
2846 };
2847 
2848 /// Address of an operand (mop_l, mop_v, mop_S, mop_r)
2849 class mop_addr_t : public mop_t
2850 {
2851 public:
2852  int insize; // how many bytes of the pointed operand can be read
2853  int outsize; // how many bytes of the pointed operand can be written
2854 
2855  mop_addr_t(): insize(NOSIZE), outsize(NOSIZE) {}
2856  mop_addr_t(const mop_addr_t &ra)
2857  : mop_t(ra), insize(ra.insize), outsize(ra.outsize) {}
2858  mop_addr_t(const mop_t &ra, int isz, int osz)
2859  : mop_t(ra), insize(isz), outsize(osz) {}
2860 
2861  mop_addr_t &operator=(const mop_addr_t &rop)
2862  {
2863  *(mop_t *)this = mop_t(rop);
2864  insize = rop.insize;
2865  outsize = rop.outsize;
2866  return *this;
2867  }
2868  int lexcompare(const mop_addr_t &ra) const
2869  {
2870  int code = mop_t::lexcompare(ra);
2871  return code != 0 ? code
2872  : insize != ra.insize ? (insize-ra.insize)
2873  : outsize != ra.outsize ? (outsize-ra.outsize)
2874  : 0;
2875  }
2876 };
2877 
2878 /// A call argument
2879 class mcallarg_t : public mop_t // #callarg
2880 {
2881 public:
2882  ea_t ea; ///< address where the argument was initialized.
2883  ///< BADADDR means unknown.
2884  tinfo_t type; ///< formal argument type
2885  qstring name; ///< formal argument name
2886  argloc_t argloc; ///< ida argloc
2887 
2888  mcallarg_t(void) : ea(BADADDR) {}
2889  mcallarg_t(const mop_t &rarg) : mop_t(rarg), ea(BADADDR) {}
2890  void copy_mop(const mop_t &op) { *(mop_t *)this = op; }
2891  void hexapi print(qstring *vout, int shins_flags=SHINS_SHORT|SHINS_VALNUM) const;
2892  const char *hexapi dstr(void) const;
2893  void hexapi set_regarg(mreg_t mr, int sz, const tinfo_t &tif);
2894  void set_regarg(mreg_t mr, const tinfo_t &tif)
2895  {
2896  set_regarg(mr, tif.get_size(), tif);
2897  }
2898  void set_regarg(mreg_t mr, char dt, type_sign_t sign = type_unsigned)
2899  {
2900  int sz = get_dtype_size(dt);
2901  set_regarg(mr, sz, get_int_type_by_width_and_sign(sz, sign));
2902  }
2903  void make_int(int val, ea_t val_ea, int opno = 0)
2904  {
2905  type = tinfo_t(BTF_INT);
2906  make_number(val, inf_get_cc_size_i(), val_ea, opno);
2907  }
2908  void make_uint(int val, ea_t val_ea, int opno = 0)
2909  {
2910  type = tinfo_t(BTF_UINT);
2911  make_number(val, inf_get_cc_size_i(), val_ea, opno);
2912  }
2913 };
2914 DECLARE_TYPE_AS_MOVABLE(mcallarg_t);
2915 typedef qvector<mcallarg_t> mcallargs_t;
2916 
2917 /// Function roles.
2918 /// They are used to calculate use/def lists and to recognize functions
2919 /// without using string comparisons.
2921 {
2922  ROLE_UNK, ///< unknown function role
2923  ROLE_EMPTY, ///< empty, does not do anything (maybe spoils regs)
2924  ROLE_MEMSET, ///< memset(void *dst, uchar value, size_t count);
2925  ROLE_MEMSET32, ///< memset32(void *dst, uint32 value, size_t count);
2926  ROLE_MEMSET64, ///< memset32(void *dst, uint64 value, size_t count);
2927  ROLE_MEMCPY, ///< memcpy(void *dst, const void *src, size_t count);
2928  ROLE_STRCPY, ///< strcpy(char *dst, const char *src);
2929  ROLE_STRLEN, ///< strlen(const char *src);
2930  ROLE_STRCAT, ///< strcat(char *dst, const char *src);
2931  ROLE_TAIL, ///< char *tail(const char *str);
2932  ROLE_BUG, ///< BUG() helper macro: never returns, causes exception
2933  ROLE_ALLOCA, ///< alloca() function
2934  ROLE_BSWAP, ///< bswap() function (any size)
2935  ROLE_PRESENT, ///< present() function (used in patterns)
2936  ROLE_CONTAINING_RECORD, ///< CONTAINING_RECORD() macro
2937  ROLE_FASTFAIL, ///< __fastfail()
2938  ROLE_READFLAGS, ///< __readeflags, __readcallersflags
2939  ROLE_IS_MUL_OK, ///< is_mul_ok
2940  ROLE_SATURATED_MUL, ///< saturated_mul
2941  ROLE_BITTEST, ///< [lock] bt
2942  ROLE_BITTESTANDSET, ///< [lock] bts
2943  ROLE_BITTESTANDRESET, ///< [lock] btr
2944  ROLE_BITTESTANDCOMPLEMENT, ///< [lock] btc
2945  ROLE_VA_ARG, ///< va_arg() macro
2946  ROLE_VA_COPY, ///< va_copy() function
2947  ROLE_VA_START, ///< va_start() function
2948  ROLE_VA_END, ///< va_end() function
2949  ROLE_ROL, ///< rotate left
2950  ROLE_ROR, ///< rotate right
2951  ROLE_CFSUB3, ///< carry flag after subtract with carry
2952  ROLE_OFSUB3, ///< overflow flag after subtract with carry
2953  ROLE_ABS, ///< integer absolute value
2954 };
2955 
2956 /// \defgroup FUNC_NAME_ Well known function names
2957 //@{
2958 #define FUNC_NAME_MEMCPY "memcpy"
2959 #define FUNC_NAME_MEMSET "memset"
2960 #define FUNC_NAME_MEMSET32 "memset32"
2961 #define FUNC_NAME_MEMSET64 "memset64"
2962 #define FUNC_NAME_STRCPY "strcpy"
2963 #define FUNC_NAME_STRLEN "strlen"
2964 #define FUNC_NAME_STRCAT "strcat"
2965 #define FUNC_NAME_TAIL "tail"
2966 #define FUNC_NAME_VA_ARG "va_arg"
2967 #define FUNC_NAME_EMPTY "$empty"
2968 #define FUNC_NAME_PRESENT "$present"
2969 #define FUNC_NAME_CONTAINING_RECORD "CONTAINING_RECORD"
2970 //@}
2971 
2972 
2973 // the default 256 function arguments is too big, we use a lower value
2974 #undef MAX_FUNC_ARGS
2975 #define MAX_FUNC_ARGS 64
2976 
2977 /// Information about a call
2978 class mcallinfo_t // #callinfo
2979 {
2980 public:
2981  ea_t callee; ///< address of the called function, if known
2982  int solid_args; ///< number of solid args.
2983  ///< there may be variadic args in addtion
2984  int call_spd; ///< sp value at call insn
2985  int stkargs_top; ///< first offset past stack arguments
2986  cm_t cc; ///< calling convention
2987  mcallargs_t args; ///< call arguments
2988  mopvec_t retregs; ///< return register(s) (e.g., AX, AX:DX, etc.)
2989  ///< this vector is built from return_regs
2990  tinfo_t return_type; ///< type of the returned value
2991  argloc_t return_argloc; ///< location of the returned value
2992 
2993  mlist_t return_regs; ///< list of values returned by the function
2994  mlist_t spoiled; ///< list of spoiled locations (includes return_regs)
2995  mlist_t pass_regs; ///< passthrough registers: registers that depend on input
2996  ///< values (subset of spoiled)
2997  ivlset_t visible_memory; ///< what memory is visible to the call?
2998  mlist_t dead_regs; ///< registers defined by the function but never used.
2999  ///< upon propagation we do the following:
3000  ///< - dead_regs += return_regs
3001  ///< - retregs.clear() since the call is propagated
3002  int flags; ///< combination of \ref FCI_... bits
3003 /// \defgroup FCI_ Call properties
3004 //@{
3005 #define FCI_PROP 0x001 ///< call has been propagated
3006 #define FCI_DEAD 0x002 ///< some return registers were determined dead
3007 #define FCI_FINAL 0x004 ///< call type is final, should not be changed
3008 #define FCI_NORET 0x008 ///< call does not return
3009 #define FCI_PURE 0x010 ///< pure function
3010 #define FCI_NOSIDE 0x020 ///< call does not have side effects
3011 #define FCI_SPLOK 0x040 ///< spoiled/visible_memory lists have been
3012  ///< optimized. for some functions we can reduce them
3013  ///< as soon as information about the arguments becomes
3014  ///< available. in order not to try optimize them again
3015  ///< we use this bit.
3016 #define FCI_HASCALL 0x080 ///< A function is an synthetic helper combined
3017  ///< from several instructions and at least one
3018  ///< of them was a call to a real functions
3019 #define FCI_HASFMT 0x100 ///< A variadic function with recognized
3020  ///< printf- or scanf-style format string
3021 //@}
3022  funcrole_t role; ///< function role
3023  type_attrs_t fti_attrs; ///< extended function attributes
3024 
3025  mcallinfo_t(ea_t _callee=BADADDR, int _sargs=0)
3026  : callee(_callee), solid_args(_sargs), call_spd(0), stkargs_top(0),
3027  cc(CM_CC_INVALID), flags(0), role(ROLE_UNK) {}
3028  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
3029  int hexapi lexcompare(const mcallinfo_t &f) const;
3030  bool hexapi set_type(const tinfo_t &type);
3031  tinfo_t hexapi get_type(void) const;
3032  bool is_vararg(void) const { return is_vararg_cc(cc); }
3033  void hexapi print(qstring *vout, int size=-1, int shins_flags=SHINS_SHORT|SHINS_VALNUM) const;
3034  const char *hexapi dstr(void) const;
3035 };
3036 
3037 /// List of switch cases and targets
3038 class mcases_t // #cases
3039 {
3040 public:
3041  casevec_t values; ///< expression values for each target
3042  intvec_t targets; ///< target block numbers
3043 
3044  void swap(mcases_t &r) { values.swap(r.values); targets.swap(r.targets); }
3045  DECLARE_COMPARISONS(mcases_t);
3046  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
3047  bool empty(void) const { return targets.empty(); }
3048  size_t size(void) const { return targets.size(); }
3049  void resize(int s) { values.resize(s); targets.resize(s); }
3050  void hexapi print(qstring *vout) const;
3051  const char *hexapi dstr(void) const;
3052 };
3053 
3054 //-------------------------------------------------------------------------
3055 /// Value offset (microregister number or stack offset)
3056 struct voff_t
3057 {
3058  sval_t off; ///< register number or stack offset
3059  mopt_t type; ///< mop_r - register, mop_S - stack, mop_z - undefined
3060 
3061  voff_t() : off(-1), type(mop_z) {}
3062  voff_t(mopt_t _type, sval_t _off) : off(_off), type(_type) {}
3063  voff_t(const mop_t &op) : off(-1), type(mop_z)
3064  {
3065  if ( op.is_reg() || op.t == mop_S )
3066  set(op.t, op.is_reg() ? op.r : op.s->off);
3067  }
3068 
3069  void set(mopt_t _type, sval_t _off) { type = _type; off = _off; }
3070  void set_stkoff(sval_t stkoff) { set(mop_S, stkoff); }
3071  void set_reg (mreg_t mreg) { set(mop_r, mreg); }
3072  void undef() { set(mop_z, -1); }
3073 
3074  bool defined() const { return type != mop_z; }
3075  bool is_reg() const { return type == mop_r; }
3076  bool is_stkoff() const { return type == mop_S; }
3077  mreg_t get_reg() const { QASSERT(51892, is_reg()); return off; }
3078  sval_t get_stkoff() const { QASSERT(51893, is_stkoff()); return off; }
3079 
3080  void inc(sval_t delta) { off += delta; }
3081  voff_t add(int width) const { return voff_t(type, off+width); }
3082  sval_t diff(const voff_t &r) const { QASSERT(51894, type == r.type); return off - r.off; }
3083 
3084 
3085  DECLARE_COMPARISONS(voff_t)
3086  {
3087  int code = ::compare(type, r.type);
3088  return code != 0 ? code : ::compare(off, r.off);
3089  }
3090 };
3091 
3092 //-------------------------------------------------------------------------
3093 /// Value interval (register or stack range)
3094 struct vivl_t : voff_t
3095 {
3096  int size; ///< Interval size in bytes
3097 
3098  vivl_t(mopt_t _type = mop_z, sval_t _off = -1, int _size = 0)
3099  : voff_t(_type, _off), size(_size) {}
3100  vivl_t(const class chain_t &ch);
3101  vivl_t(const mop_t &op) : voff_t(op), size(op.size) {}
3102 
3103  // Make a value interval
3104  void set(mopt_t _type, sval_t _off, int _size = 0)
3105  { voff_t::set(_type, _off); size = _size; }
3106  void set(const voff_t &voff, int _size)
3107  { set(voff.type, voff.off, _size); }
3108  void set_stkoff(sval_t stkoff, int sz = 0) { set(mop_S, stkoff, sz); }
3109  void set_reg (mreg_t mreg, int sz = 0) { set(mop_r, mreg, sz); }
3110 
3111  /// Extend a value interval using another value interval of the same type
3112  /// \return success
3113  bool hexapi extend_to_cover(const vivl_t &r);
3114 
3115  /// Intersect value intervals the same type
3116  /// \return size of the resulting intersection
3117  uval_t hexapi intersect(const vivl_t &r);
3118 
3119  /// Do two value intervals overlap?
3120  bool overlap(const vivl_t &r) const
3121  {
3122  return type == r.type
3123  && interval::overlap(off, size, r.off, r.size);
3124  }
3125  /// Does our value interval include another?
3126  bool includes(const vivl_t &r) const
3127  {
3128  return type == r.type
3129  && interval::includes(off, size, r.off, r.size);
3130  }
3131 
3132  /// Does our value interval contain the specified value offset?
3133  bool contains(const voff_t &voff2) const
3134  {
3135  return type == voff2.type
3136  && interval::contains(off, size, voff2.off);
3137  }
3138 
3139  // Comparisons
3140  DECLARE_COMPARISONS(vivl_t)
3141  {
3142  int code = voff_t::compare(r);
3143  return code; //return code != 0 ? code : ::compare(size, r.size);
3144  }
3145  bool operator==(const mop_t &mop) const
3146  {
3147  return type == mop.t && off == (mop.is_reg() ? mop.r : mop.s->off);
3148  }
3149  void hexapi print(qstring *vout) const;
3150  const char *hexapi dstr(void) const;
3151 };
3152 
3153 //-------------------------------------------------------------------------
3154 /// ud (use->def) and du (def->use) chain.
3155 /// We store in chains only the block numbers, not individual instructions
3156 /// See https://en.wikipedia.org/wiki/Use-define_chain
3157 class chain_t : public intvec_t // sequence of block numbers
3158 {
3159  voff_t k; ///< Value offset of the chain.
3160  ///< (what variable is this chain about)
3161 
3162 public:
3163  int width; ///< size of the value in bytes
3164  int varnum; ///< allocated variable index (-1 - not allocated yet)
3165  uchar flags; ///< combination \ref CHF_ bits
3166 /// \defgroup CHF_ Chain properties
3167 //@{
3168 #define CHF_INITED 0x01 ///< is chain initialized? (valid only after lvar allocation)
3169 #define CHF_REPLACED 0x02 ///< chain operands have been replaced?
3170 #define CHF_OVER 0x04 ///< overlapped chain
3171 #define CHF_FAKE 0x08 ///< fake chain created by widen_chains()
3172 #define CHF_PASSTHRU 0x10 ///< pass-thru chain, must use the input variable to the block
3173 #define CHF_TERM 0x20 ///< terminating chain; the variable does not survive across the block
3174 //@}
3175  chain_t() : width(0), varnum(-1), flags(CHF_INITED) {}
3176  chain_t(mopt_t t, sval_t off, int w=1, int v=-1)
3177  : k(t, off), width(w), varnum(v), flags(CHF_INITED) {}
3178  chain_t(const voff_t &_k, int w=1)
3179  : k(_k), width(w), varnum(-1), flags(CHF_INITED) {}
3180  void set_value(const chain_t &r)
3181  { width = r.width; varnum = r.varnum; flags = r.flags; *(intvec_t *)this = (intvec_t &)r; }
3182  const voff_t &key() const { return k; }
3183  bool is_inited(void) const { return (flags & CHF_INITED) != 0; }
3184  bool is_reg(void) const { return k.is_reg(); }
3185  bool is_stkoff(void) const { return k.is_stkoff(); }
3186  bool is_replaced(void) const { return (flags & CHF_REPLACED) != 0; }
3187  bool is_overlapped(void) const { return (flags & CHF_OVER) != 0; }
3188  bool is_fake(void) const { return (flags & CHF_FAKE) != 0; }
3189  bool is_passreg(void) const { return (flags & CHF_PASSTHRU) != 0; }
3190  bool is_term(void) const { return (flags & CHF_TERM) != 0; }
3191  void set_inited(bool b) { setflag(flags, CHF_INITED, b); }
3192  void set_replaced(bool b) { setflag(flags, CHF_REPLACED, b); }
3193  void set_overlapped(bool b) { setflag(flags, CHF_OVER, b); }
3194  void set_term(bool b) { setflag(flags, CHF_TERM, b); }
3195  mreg_t get_reg() const { return k.get_reg(); }
3196  sval_t get_stkoff() const { return k.get_stkoff(); }
3197  bool overlap(const chain_t &r) const
3198  { return k.type == r.k.type && interval::overlap(k.off, width, r.k.off, r.width); }
3199  bool includes(const chain_t &r) const
3200  { return k.type == r.k.type && interval::includes(k.off, width, r.k.off, r.width); }
3201  const voff_t endoff() const { return k.add(width); }
3202 
3203  bool operator<(const chain_t &r) const { return key() < r.key(); }
3204 
3205  void hexapi print(qstring *vout) const;
3206  const char *hexapi dstr(void) const;
3207  /// Append the contents of the chain to the specified list of locations.
3208  void hexapi append_list(mlist_t *list) const;
3209  void clear_varnum(void) { varnum = -1; set_replaced(false); }
3210 };
3211 
3212 //-------------------------------------------------------------------------
3213 #if defined(__NT__)
3214 #define SIZEOF_BLOCK_CHAINS 24
3215 #elif defined(__MAC__)
3216 #define SIZEOF_BLOCK_CHAINS 32
3217 #else
3218 #define SIZEOF_BLOCK_CHAINS 56
3219 #endif
3220 /// Chains of one block.
3221 /// Please note that this class is based on std::map and it must be accessed
3222 /// using the block_chains_begin(), block_chains_find() and similar functions.
3223 /// This is required because different compilers use different implementations
3224 /// of std::map. However, since the size of std::map depends on the compilation
3225 /// options, we replace it with a byte array.
3227 {
3228  size_t body[SIZEOF_BLOCK_CHAINS/sizeof(size_t)]; // opaque std::set, uncopyable
3229 public:
3230 
3231  /// Get chain for the specified register
3232  /// \param reg register number
3233  /// \param width size of register in bytes
3234  const chain_t *get_reg_chain(mreg_t reg, int width=1) const
3235  { return get_chain((chain_t(mop_r, reg, width))); }
3236  chain_t *get_reg_chain(mreg_t reg, int width=1)
3237  { return get_chain((chain_t(mop_r, reg, width))); }
3238 
3239  /// Get chain for the specified stack offset
3240  /// \param off stack offset
3241  /// \param width size of stack value in bytes
3242  const chain_t *get_stk_chain(sval_t off, int width=1) const
3243  { return get_chain(chain_t(mop_S, off, width)); }
3244  chain_t *get_stk_chain(sval_t off, int width=1)
3245  { return get_chain(chain_t(mop_S, off, width)); }
3246 
3247  /// Get chain for the specified value offset.
3248  /// \param k value offset (register number or stack offset)
3249  /// \param width size of value in bytes
3250  const chain_t *get_chain(const voff_t &k, int width=1) const
3251  { return get_chain(chain_t(k, width)); }
3252  chain_t *get_chain(const voff_t &k, int width=1)
3253  { return (chain_t*)((const block_chains_t *)this)->get_chain(k, width); }
3254 
3255  /// Get chain similar to the specified chain
3256  /// \param ch chain to search for. only its 'k' and 'width' are used.
3257  const chain_t *hexapi get_chain(const chain_t &ch) const;
3258  chain_t *get_chain(const chain_t &ch)
3259  { return (chain_t*)((const block_chains_t *)this)->get_chain(ch); }
3260 
3261  void hexapi print(qstring *vout) const;
3262  const char *hexapi dstr(void) const;
3263  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
3264 };
3265 //-------------------------------------------------------------------------
3266 /// Chain visitor class
3268 {
3269  block_chains_t *parent; ///< parent of the current chain
3270  chain_visitor_t(void) : parent(NULL) {}
3271  virtual int idaapi visit_chain(int nblock, chain_t &ch) = 0;
3272 };
3273 
3274 //-------------------------------------------------------------------------
3275 /// Graph chains.
3276 /// This class represents all ud and du chains of the decompiled function
3277 typedef qvector<block_chains_t> block_chains_vec_t;
3279 {
3280  int lock; ///< are chained locked? (in-use)
3281 public:
3282  graph_chains_t(void) : lock(0) {}
3283  ~graph_chains_t(void) { QASSERT(50444, !lock); }
3284  /// Visit all chains
3285  /// \param cv chain visitor
3286  /// \param gca_flags combination of GCA_ bits
3287  int hexapi for_all_chains(chain_visitor_t &cv, int gca_flags);
3288  /// \defgroup GCA_ chain visitor flags
3289  //@{
3290 #define GCA_EMPTY 0x01 ///< include empty chains
3291 #define GCA_SPEC 0x02 ///< include chains for special registers
3292 #define GCA_ALLOC 0x04 ///< enumerate only allocated chains
3293 #define GCA_NALLOC 0x08 ///< enumerate only non-allocated chains
3294 #define GCA_OFIRST 0x10 ///< consider only chains of the first block
3295 #define GCA_OLAST 0x20 ///< consider only chains of the last block
3296  //@}
3297  /// Are the chains locked?
3298  /// It is a good idea to lock the chains before using them. This ensures
3299  /// that they won't be recalculated and reallocated during the use.
3300  /// See the \ref chain_keeper_t class for that.
3301  bool is_locked(void) const { return lock != 0; }
3302  /// Lock the chains
3303  void acquire(void) { lock++; }
3304  /// Unlock the chains
3305  void hexapi release(void);
3306  void swap(graph_chains_t &r)
3307  {
3308  qvector<block_chains_t>::swap(r);
3309  std::swap(lock, r.lock);
3310  }
3311 };
3312 //-------------------------------------------------------------------------
3313 /// Microinstruction class #insn
3314 class minsn_t
3315 {
3316  void hexapi init(ea_t _ea);
3317  void hexapi copy(const minsn_t &m);
3318 public:
3319  mcode_t opcode; ///< instruction opcode
3320  int iprops; ///< combination of \ref IPROP_ bits
3321  minsn_t *next; ///< next insn in doubly linked list. check also nexti()
3322  minsn_t *prev; ///< prev insn in doubly linked list. check also previ()
3323  ea_t ea; ///< instruction address
3324  mop_t l; ///< left operand
3325  mop_t r; ///< right operand
3326  mop_t d; ///< destination operand
3327 
3328  /// \defgroup IPROP_ instruction property bits
3329  //@{
3330  // bits to be used in patterns:
3331 #define IPROP_OPTIONAL 0x0001 ///< optional instruction
3332 #define IPROP_PERSIST 0x0002 ///< persistent insn; they are not destroyed
3333 #define IPROP_WILDMATCH 0x0004 ///< match multiple insns
3334 
3335  // instruction attributes:
3336 #define IPROP_CLNPOP 0x0008 ///< the purpose of the instruction is to clean stack
3337  ///< (e.g. "pop ecx" is often used for that)
3338 #define IPROP_FPINSN 0x0010 ///< floating point insn
3339 #define IPROP_FARCALL 0x0020 ///< call of a far function using push cs/call sequence
3340 #define IPROP_TAILCALL 0x0040 ///< tail call
3341 #define IPROP_ASSERT 0x0080 ///< assertion: usually mov #val, op.
3342  ///< assertions are used to help the optimizer.
3343  ///< assertions are ignored when generating ctree
3344 
3345  // instruction history:
3346 #define IPROP_SPLIT 0x0700 ///< the instruction has been split:
3347 #define IPROP_SPLIT1 0x0100 ///< into 1 byte
3348 #define IPROP_SPLIT2 0x0200 ///< into 2 bytes
3349 #define IPROP_SPLIT4 0x0300 ///< into 4 bytes
3350 #define IPROP_SPLIT8 0x0400 ///< into 8 bytes
3351 #define IPROP_COMBINED 0x0800 ///< insn has been modified because of a partial reference
3352 #define IPROP_EXTSTX 0x1000 ///< this is m_ext propagated into m_stx
3353 #define IPROP_IGNLOWSRC 0x2000 ///< low part of the instruction source operand
3354  ///< has been created artificially
3355  ///< (this bit is used only for 'and x, 80...')
3356 #define IPROP_INV_JX 0x4000 ///< inverted conditional jump
3357 #define IPROP_WAS_NORET 0x8000 ///< was noret icall
3358 #define IPROP_MULTI_MOV 0x10000 ///< the minsn was generated as part of insn that moves multiple registers
3359  ///< (example: STM on ARM may transfer multiple registers)
3360 
3361  ///< bits that can be set by plugins:
3362 #define IPROP_DONT_PROP 0x20000 ///< may not propagate
3363 #define IPROP_DONT_COMB 0x40000 ///< may not combine this instruction with others
3364 #define IPROP_MBARRIER 0x80000 ///< this instruction acts as a memory barrier
3365  ///< (instructions accessing memory may not be reordered past it)
3366  //@}
3367 
3368  bool is_optional(void) const { return (iprops & IPROP_OPTIONAL) != 0; }
3369  bool is_combined(void) const { return (iprops & IPROP_COMBINED) != 0; }
3370  bool is_farcall(void) const { return (iprops & IPROP_FARCALL) != 0; }
3371  bool is_cleaning_pop(void) const { return (iprops & IPROP_CLNPOP) != 0; }
3372  bool is_extstx(void) const { return (iprops & IPROP_EXTSTX) != 0; }
3373  bool is_tailcall(void) const { return (iprops & IPROP_TAILCALL) != 0; }
3374  bool is_fpinsn(void) const { return (iprops & IPROP_FPINSN) != 0; }
3375  bool is_assert(void) const { return (iprops & IPROP_ASSERT) != 0; }
3376  bool is_persistent(void) const { return (iprops & IPROP_PERSIST) != 0; }
3377  bool is_wild_match(void) const { return (iprops & IPROP_WILDMATCH) != 0; }
3378  bool is_propagatable(void) const { return (iprops & IPROP_DONT_PROP) == 0; }
3379  bool is_ignlowsrc(void) const { return (iprops & IPROP_IGNLOWSRC) != 0; }
3380  bool is_inverted_jx(void) const { return (iprops & IPROP_INV_JX) != 0; }
3381  bool was_noret_icall(void) const { return (iprops & IPROP_WAS_NORET) != 0; }
3382  bool is_multimov(void) const { return (iprops & IPROP_MULTI_MOV) != 0; }
3383  bool is_combinable(void) const { return (iprops & IPROP_DONT_COMB) == 0; }
3384  bool was_split(void) const { return (iprops & IPROP_SPLIT) != 0; }
3385  bool is_mbarrier(void) const { return (iprops & IPROP_MBARRIER) != 0; }
3386 
3387  void set_optional(void) { iprops |= IPROP_OPTIONAL; }
3388  void set_combined(void);
3389  void clr_combined(void) { iprops &= ~IPROP_COMBINED; }
3390  void set_farcall(void) { iprops |= IPROP_FARCALL; }
3391  void set_cleaning_pop(void) { iprops |= IPROP_CLNPOP; }
3392  void set_extstx(void) { iprops |= IPROP_EXTSTX; }
3393  void set_tailcall(void) { iprops |= IPROP_TAILCALL; }
3394  void clr_tailcall(void) { iprops &= ~IPROP_TAILCALL; }
3395  void set_fpinsn(void) { iprops |= IPROP_FPINSN; }
3396  void clr_fpinsn(void) { iprops &= ~IPROP_FPINSN; }
3397  void set_assert(void) { iprops |= IPROP_ASSERT; }
3398  void clr_assert(void) { iprops &= ~IPROP_ASSERT; }
3399  void set_persistent(void) { iprops |= IPROP_PERSIST; }
3400  void set_wild_match(void) { iprops |= IPROP_WILDMATCH; }
3401  void clr_propagatable(void) { iprops |= IPROP_DONT_PROP; }
3402  void set_ignlowsrc(void) { iprops |= IPROP_IGNLOWSRC; }
3403  void clr_ignlowsrc(void) { iprops &= ~IPROP_IGNLOWSRC; }
3404  void set_inverted_jx(void) { iprops |= IPROP_INV_JX; }
3405  void set_noret_icall(void) { iprops |= IPROP_WAS_NORET; }
3406  void clr_noret_icall(void) { iprops &= ~IPROP_WAS_NORET; }
3407  void set_multimov(void) { iprops |= IPROP_MULTI_MOV; }
3408  void clr_multimov(void) { iprops &= ~IPROP_MULTI_MOV; }
3409  void set_combinable(void) { iprops &= ~IPROP_DONT_COMB; }
3410  void clr_combinable(void) { iprops |= IPROP_DONT_COMB; }
3411  void set_mbarrier(void) { iprops |= IPROP_MBARRIER; }
3412  void set_split_size(int s)
3413  { // s may be only 1,2,4,8. other values are ignored
3414  iprops &= ~IPROP_SPLIT;
3415  iprops |= (s == 1 ? IPROP_SPLIT1
3416  : s == 2 ? IPROP_SPLIT2
3417  : s == 4 ? IPROP_SPLIT4
3418  : s == 8 ? IPROP_SPLIT8 : 0);
3419  }
3420  int get_split_size(void) const
3421  {
3422  int cnt = (iprops & IPROP_SPLIT) >> 8;
3423  return cnt == 0 ? 0 : 1 << (cnt-1);
3424  }
3425 
3426  /// Constructor
3427  minsn_t(ea_t _ea) { init(_ea); }
3428  minsn_t(const minsn_t &m) { next = prev = NULL; copy(m); }
3429  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
3430 
3431  /// Assignment operator. It does not copy prev/next fields.
3432  minsn_t &operator=(const minsn_t &m) { copy(m); return *this; }
3433 
3434  /// Swap two instructions.
3435  /// The prev/next fields are not modified by this function
3436  /// because it would corrupt the doubly linked list.
3437  void hexapi swap(minsn_t &m);
3438 
3439  /// Generate insn text into the buffer
3440  void hexapi print(qstring *vout, int shins_flags=SHINS_SHORT|SHINS_VALNUM) const;
3441 
3442  /// Get displayable text without tags in a static buffer
3443  const char *hexapi dstr(void) const;
3444 
3445  /// Change the instruction address.
3446  /// This function modifies subinstructions as well.
3447  void hexapi setaddr(ea_t new_ea);
3448 
3449  /// Optimize one instruction without context.
3450  /// This function does not have access to the instruction context (the
3451  /// previous and next instructions in the list, the block number, etc).
3452  /// It performs only basic optimizations that are available without this info.
3453  /// \param optflags combination of \ref OPTI_ bits
3454  /// \return number of changes, 0-unchanged
3455  /// See also mblock_t::optimize_insn()
3456  int optimize_solo(int optflags=0) { return optimize_subtree(NULL, NULL, NULL, NULL, optflags); }
3457  /// \defgroup OPTI_ optimization flags
3458  //@{
3459 #define OPTI_ADDREXPRS 0x0001 ///< optimize all address expressions (&x+N; &x-&y)
3460 #define OPTI_MINSTKREF 0x0002 ///< may update minstkref
3461 #define OPTI_COMBINSNS 0x0004 ///< may combine insns (only for optimize_insn)
3462 #define OPTI_NO_LDXOPT 0x0008 ///< do not optimize low/high(ldx)
3463  //@}
3464 
3465  /// Optimize instruction in its context.
3466  /// Do not use this function, use mblock_t::optimize()
3467  int hexapi optimize_subtree(
3468  mblock_t *blk,
3469  minsn_t *top,
3470  minsn_t *parent,
3471  minsn_t **converted_call,
3472  int optflags=OPTI_MINSTKREF);
3473 
3474  /// Visit all instruction operands.
3475  /// This function visits subinstruction operands as well.
3476  /// \param mv operand visitor
3477  /// \return non-zero value returned by mv.visit_mop() or zero
3478  int hexapi for_all_ops(mop_visitor_t &mv);
3479 
3480  /// Visit all instructions.
3481  /// This function visits the instruction itself and all its subinstructions.
3482  /// \param mv instruction visitor
3483  /// \return non-zero value returned by mv.visit_mop() or zero
3484  int hexapi for_all_insns(minsn_visitor_t &mv);
3485 
3486  /// Convert instruction to nop.
3487  /// This function erases all info but the prev/next fields.
3488  /// In most cases it is better to use mblock_t::make_nop(), which also
3489  /// marks the block lists as dirty.
3490  void hexapi _make_nop(void);
3491 
3492  /// Compare instructions.
3493  /// This is the main comparison function for instructions.
3494  /// \param m instruction to compare with
3495  /// \param eqflags combination of \ref EQ_ bits
3496  bool hexapi equal_insns(const minsn_t &m, int eqflags) const; // intelligent comparison
3497  /// \defgroup EQ_ comparison bits
3498  //@{
3499 #define EQ_IGNSIZE 0x0001 ///< ignore operand sizes
3500 #define EQ_IGNCODE 0x0002 ///< ignore instruction opcodes
3501 #define EQ_CMPDEST 0x0004 ///< compare instruction destinations
3502 #define EQ_OPTINSN 0x0008 ///< optimize mop_d operands
3503  //@}
3504 
3505  /// Lexographical comparison
3506  /// It can be used to store minsn_t in various containers, like std::set
3507  bool operator <(const minsn_t &ri) const { return lexcompare(ri) < 0; }
3508  int hexapi lexcompare(const minsn_t &ri) const;
3509 
3510  //-----------------------------------------------------------------------
3511  // Call instructions
3512  //-----------------------------------------------------------------------
3513  /// Is a non-returing call?
3514  /// \param ignore_noret_icall if set, indirect calls to noret functions will
3515  /// return false
3516  bool hexapi is_noret_call(bool ignore_noret_icall=false);
3517 
3518  /// Is an unknown call?
3519  /// Unknown calls are resolved by mbl_array_t::analyze_calls()
3520  /// They exist until the MMAT_CALLS maturity level.
3521  /// See also \ref mblock_t::is_call_block
3522  bool is_unknown_call(void) const { return is_mcode_call(opcode) && d.empty(); }
3523 
3524  /// Is a helper call with the specified name?
3525  /// Helper calls usually have well-known function names (see \ref FUNC_NAME_)
3526  /// but they may have any other name. The decompiler does not assume any
3527  /// special meaning for non-well-known names.
3528  bool hexapi is_helper(const char *name) const;
3529 
3530  /// Find a call instruction.
3531  /// Check for the current instruction and its subinstructions.
3532  /// \param with_helpers consider helper calls as well?
3533  minsn_t *hexapi find_call(bool with_helpers=false) const;
3534 
3535  /// Does the instruction contain a call?
3536  bool contains_call(bool with_helpers=false) const { return find_call(with_helpers) != NULL; }
3537 
3538  /// Does the instruction have a side effect?
3539  /// \param include_ldx_and_divs consider ldx/div/mod as having side effects?
3540  /// stx is always considered as having side effects.
3541  /// Apart from ldx/std only call may have side effects.
3542  bool hexapi has_side_effects(bool include_ldx_and_divs=false) const;
3543 
3544  /// Get the function role of a call
3545  funcrole_t get_role(void) const { return d.is_arglist() ? d.f->role : ROLE_UNK; }
3546  bool is_memcpy(void) const { return get_role() == ROLE_MEMCPY; }
3547  bool is_memset(void) const { return get_role() == ROLE_MEMSET; }
3548  bool is_alloca(void) const { return get_role() == ROLE_ALLOCA; }
3549  bool is_bswap (void) const { return get_role() == ROLE_BSWAP; }
3550  bool is_readflags (void) const { return get_role() == ROLE_READFLAGS; }
3551 
3552  //-----------------------------------------------------------------------
3553  // Misc
3554  //-----------------------------------------------------------------------
3555  /// Does the instruction have the specified opcode?
3556  /// This function searches subinstructions as well.
3557  /// \param mcode opcode to search for.
3558  bool contains_opcode(mcode_t mcode) const { return find_opcode(mcode) != NULL; }
3559 
3560  /// Find a (sub)insruction with the specified opcode.
3561  /// \param mcode opcode to search for.
3562  const minsn_t *find_opcode(mcode_t mcode) const { return (CONST_CAST(minsn_t*)(this))->find_opcode(mcode); }
3563  minsn_t *hexapi find_opcode(mcode_t mcode);
3564 
3565  /// Find an operand that is a subinsruction with the specified opcode.
3566  /// This function checks only the 'l' and 'r' operands of the current insn.
3567  /// \param[out] other pointer to the other operand
3568  /// (&r if we return &l and vice versa)
3569  /// \param op opcode to search for
3570  /// \return &l or &r or NULL
3571  const minsn_t *hexapi find_ins_op(const mop_t **other, mcode_t op=m_nop) const;
3572  minsn_t *find_ins_op(mop_t **other, mcode_t op=m_nop) { return CONST_CAST(minsn_t*)((CONST_CAST(const minsn_t*)(this))->find_ins_op((const mop_t**)other, op)); }
3573 
3574  /// Find a numeric operand of the current instruction.
3575  /// This function checks only the 'l' and 'r' operands of the current insn.
3576  /// \param[out] other pointer to the other operand
3577  /// (&r if we return &l and vice versa)
3578  /// \return &l or &r or NULL
3579  const mop_t *hexapi find_num_op(const mop_t **other) const;
3580  mop_t *find_num_op(mop_t **other) { return CONST_CAST(mop_t*)((CONST_CAST(const minsn_t*)(this))->find_num_op((const mop_t**)other)); }
3581 
3582  bool is_mov(void) const { return opcode == m_mov || (opcode == m_f2f && l.size == d.size); }
3583  bool is_like_move(void) const { return is_mov() || is_mcode_xdsu(opcode) || opcode == m_low; }
3584 
3585  /// Does the instruction modify its 'd' operand?
3586  /// Some instructions (e.g. m_stx) do not modify the 'd' operand.
3587  bool hexapi modifes_d(void) const;
3588  bool modifies_pair_mop(void) const { return d.t == mop_p && modifes_d(); }
3589 
3590  /// Is the instruction in the specified range of instructions?
3591  /// \param m1 beginning of the range in the doubly linked list
3592  /// \param m2 end of the range in the doubly linked list (excluded, may be NULL)
3593  /// This function assumes that m1 and m2 belong to the same basic block
3594  /// and they are top level instructions.
3595  bool hexapi is_between(const minsn_t *m1, const minsn_t *m2) const;
3596 
3597  /// Is the instruction after the specified one?
3598  /// \param m the instruction to compare against in the list
3599  bool is_after(const minsn_t *m) const { return m != NULL && is_between(m->next, NULL); }
3600 
3601  /// Is it possible for the instruction to use aliased memory?
3602  bool hexapi may_use_aliased_memory(void) const;
3603 };
3604 
3605 /// Skip assertions forward
3606 const minsn_t *hexapi getf_reginsn(const minsn_t *ins);
3607 /// Skip assertions backward
3608 const minsn_t *hexapi getb_reginsn(const minsn_t *ins);
3609 inline minsn_t *getf_reginsn(minsn_t *ins) { return CONST_CAST(minsn_t*)(getf_reginsn(CONST_CAST(const minsn_t *)(ins))); }
3610 inline minsn_t *getb_reginsn(minsn_t *ins) { return CONST_CAST(minsn_t*)(getb_reginsn(CONST_CAST(const minsn_t *)(ins))); }
3611 
3612 //-------------------------------------------------------------------------
3613 /// Basic block types
3615 {
3616  BLT_NONE = 0, ///< unknown block type
3617  BLT_STOP = 1, ///< stops execution regularly (must be the last block)
3618  BLT_0WAY = 2, ///< does not have successors (tail is a noret function)
3619  BLT_1WAY = 3, ///< passes execution to one block (regular or goto block)
3620  BLT_2WAY = 4, ///< passes execution to two blocks (conditional jump)
3621  BLT_NWAY = 5, ///< passes execution to many blocks (switch idiom)
3622  BLT_XTRN = 6, ///< external block (out of function address)
3623 };
3624 
3625 // Maximal bit range
3626 #define MAXRANGE bitrange_t(0, USHRT_MAX)
3627 
3628 //-------------------------------------------------------------------------
3629 /// Microcode of one basic block.
3630 /// All blocks are part of a doubly linked list. They can also be addressed
3631 /// by indexing the mba->natural array. A block contains a doubly linked list
3632 /// of instructions, various location lists that are used for data flow
3633 /// analysis, and other attributes.
3635 {
3636  friend class codegen_t;
3637  DECLARE_UNCOPYABLE(mblock_t)
3638  void hexapi init(void);
3639 public:
3640  mblock_t *nextb; ///< next block in the doubly linked list
3641  mblock_t *prevb; ///< previous block in the doubly linked list
3642  uint32 flags; ///< combination of \ref MBL_ bits
3643  /// \defgroup MBL_ Basic block properties
3644  //@{
3645 #define MBL_PRIV 0x0001 ///< private block - no instructions except
3646  ///< the specified are accepted (used in patterns)
3647 #define MBL_NONFAKE 0x0000 ///< regular block
3648 #define MBL_FAKE 0x0002 ///< fake block (after a tail call)
3649 #define MBL_GOTO 0x0004 ///< this block is a goto target
3650 #define MBL_TCAL 0x0008 ///< aritifical call block for tail calls
3651 #define MBL_PUSH 0x0010 ///< needs "convert push/pop instructions"
3652 #define MBL_DMT64 0x0020 ///< needs "demote 64bits"
3653 #define MBL_COMB 0x0040 ///< needs "combine" pass
3654 #define MBL_PROP 0x0080 ///< needs 'propagation' pass
3655 #define MBL_DEAD 0x0100 ///< needs "eliminate deads" pass
3656 #define MBL_LIST 0x0200 ///< use/def lists are ready (not dirty)
3657 #define MBL_INCONST 0x0400 ///< inconsistent lists: we are building them
3658 #define MBL_CALL 0x0800 ///< call information has been built
3659 #define MBL_BACKPROP 0x1000 ///< performed backprop_cc
3660 #define MBL_NORET 0x2000 ///< dead end block: doesn't return execution control
3661 #define MBL_DSLOT 0x4000 ///< block for delay slot
3662 #define MBL_VALRANGES 0x8000 ///< should optimize using value ranges
3663  //@}
3664  ea_t start; ///< start address
3665  ea_t end; ///< end address
3666  ///< note: we cannot rely on start/end addresses
3667  ///< very much because instructions are
3668  ///< propagated between blocks
3669  minsn_t *head; ///< pointer to the first instruction of the block
3670  minsn_t *tail; ///< pointer to the last instruction of the block
3671  mbl_array_t *mba; ///< the parent micro block array
3672  int serial; ///< block number
3673  mblock_type_t type; ///< block type (BLT_NONE - not computed yet)
3674 
3675  mlist_t dead_at_start; ///< data that is dead at the block entry
3676  mlist_t mustbuse; ///< data that must be used by the block
3677  mlist_t maybuse; ///< data that may be used by the block
3678  mlist_t mustbdef; ///< data that must be defined by the block
3679  mlist_t maybdef; ///< data that may be defined by the block
3680  mlist_t dnu; ///< data that is defined but not used in the block
3681 
3682  sval_t maxbsp; ///< maximal sp value in the block (0...stacksize)
3683  sval_t minbstkref; ///< lowest stack location accessible with indirect
3684  ///< addressing (offset from the stack bottom)
3685  ///< initially it is 0 (not computed)
3686  sval_t minbargref; ///< the same for arguments
3687 
3688  intvec_t predset; ///< control flow graph: list of our predecessors
3689  ///< use npred() and pred() to access it
3690  intvec_t succset; ///< control flow graph: list of our successors
3691  ///< use nsucc() and succ() to access it
3692 
3693  // the exact size of this class is not documented, they may be more fields
3694  char reserved[];
3695 
3696  void mark_lists_dirty(void) { flags &= ~MBL_LIST; request_propagation(); }
3697  void request_propagation(void) { flags |= MBL_PROP; }
3698  bool needs_propagation(void) const { return (flags & MBL_PROP) != 0; }
3699  void request_demote64(void) { flags |= MBL_DMT64; }
3700  bool lists_dirty(void) const { return (flags & MBL_LIST) == 0; }
3701  bool lists_ready(void) const { return (flags & (MBL_LIST|MBL_INCONST)) == MBL_LIST; }
3702  int make_lists_ready(void) // returns number of changes
3703  {
3704  if ( lists_ready() )
3705  return 0;
3706  return build_lists(false);
3707  }
3708 
3709  /// Get number of block predecessors
3710  int npred(void) const { return predset.size(); } // number of xrefs to the block
3711  /// Get number of block successors
3712  int nsucc(void) const { return succset.size(); } // number of xrefs from the block
3713  // Get predecessor number N
3714  int pred(int n) const { return predset[n]; }
3715  // Get successor number N
3716  int succ(int n) const { return succset[n]; }
3717 
3718  mblock_t(void) { init(); }
3719  virtual ~mblock_t(void);
3720  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
3721  bool empty(void) const { return head == NULL; }
3722 
3723  /// Print block contents.
3724  /// \param vp print helpers class. it can be used to direct the printed
3725  /// info to any destination
3726  void hexapi print(vd_printer_t &vp) const;
3727 
3728  /// Dump block info.
3729  /// This function is useful for debugging, see mbl_array_t::dump for info
3730  void hexapi dump(void) const;
3731  AS_PRINTF(2, 0) void hexapi vdump_block(const char *title, va_list va) const;
3732  AS_PRINTF(2, 3) void dump_block(const char *title, ...) const
3733  {
3734  va_list va;
3735  va_start(va, title);
3736  vdump_block(title, va);
3737  va_end(va);
3738  }
3739 
3740  //-----------------------------------------------------------------------
3741  // Functions to insert/remove insns during the microcode optimization phase.
3742  // See codegen_t, microcode_filter_t, udcall_t classes for the initial
3743  // microcode generation.
3744  //-----------------------------------------------------------------------
3745  /// Insert instruction into the doubly linked list
3746  /// \param nm new instruction
3747  /// \param om existing instruction, part of the doubly linked list
3748  /// if NULL, then the instruction will be inserted at the beginning
3749  /// of the list
3750  /// NM will be inserted immediately after OM
3751  /// \return pointer to NM
3752  minsn_t *hexapi insert_into_block(minsn_t *nm, minsn_t *om);
3753 
3754  /// Remove instruction from the doubly linked list
3755  /// \param m instruction to remove
3756  /// The removed instruction is not deleted, the caller gets its ownership
3757  /// \return pointer to the next instruction
3758  minsn_t *hexapi remove_from_block(minsn_t *m);
3759 
3760  //-----------------------------------------------------------------------
3761  // Iterator over instructions and operands
3762  //-----------------------------------------------------------------------
3763  /// Visit all instructions.
3764  /// This function visits subinstructions too.
3765  /// \param mv instruction visitor
3766  /// \return zero or the value returned by mv.visit_insn()
3767  /// See also mbl_array_t::for_all_topinsns()
3768  int hexapi for_all_insns(minsn_visitor_t &mv);
3769 
3770  /// Visit all operands.
3771  /// This function visit subinstruction operands too.
3772  /// \param mv operand visitor
3773  /// \return zero or the value returned by mv.visit_mop()
3774  int hexapi for_all_ops(mop_visitor_t &mv);
3775 
3776  /// Visit all operands that use LIST.
3777  /// \param list ptr to the list of locations. it may be modified:
3778  /// parts that get redefined by the instructions in [i1,i2)
3779  /// will be deleted.
3780  /// \param i1 starting instruction. must be a top level insn.
3781  /// \param i2 ending instruction (excluded). must be a top level insn.
3782  /// \param mmv operand visitor
3783  /// \return zero or the value returned by mmv.visit_mop()
3784  int hexapi for_all_uses(
3785  mlist_t *list,
3786  minsn_t *i1,
3787  minsn_t *i2,
3788  mlist_mop_visitor_t &mmv);
3789 
3790  //-----------------------------------------------------------------------
3791  // Optimization functions
3792  //-----------------------------------------------------------------------
3793  /// Optimize one instruction in the context of the block.
3794  /// \param m pointer to a top level instruction
3795  /// \param optflags combination of \ref OPTI_ bits
3796  /// \return number of changes made to the block
3797  /// This function may change other instructions in the block too.
3798  /// However, it will not destroy top level instructions (it may convert them
3799  /// to nop's). This function performs only intrablock modifications.
3800  /// See also minsn_t::optimize_solo()
3801  int hexapi optimize_insn(minsn_t *m, int optflags=OPTI_MINSTKREF|OPTI_COMBINSNS);
3802 
3803  /// Optimize a basic block.
3804  /// Usually there is no need to call this function explicitly because the
3805  /// decompiler will call it itself if optinsn_t::func or optblock_t::func
3806  /// return non-zero.
3807  /// \return number of changes made to the block
3808  int hexapi optimize_block(void);
3809 
3810  /// Build def-use lists and eliminate deads.
3811  /// \param kill_deads do delete dead instructions?
3812  /// \return the number of eliminated instructions
3813  /// Better mblock_t::call make_lists_ready() rather than this function.
3814  int hexapi build_lists(bool kill_deads);
3815 
3816  /// Remove a jump at the end of the block if it is useless.
3817  /// This function preserves any side effects when removing a useless jump.
3818  /// Both conditional and unconditional jumps are handled (and jtbl too).
3819  /// This function deletes useless jumps, not only replaces them with a nop.
3820  /// (please note that \optimize_insn does not handle useless jumps).
3821  /// \return number of changes made to the block
3822  int hexapi optimize_useless_jump(void);
3823 
3824  //-----------------------------------------------------------------------
3825  // Functions that build with use/def lists. These lists are used to
3826  // reprsent list of registers and stack locations that are either modified
3827  // or accessed by microinstructions.
3828  //-----------------------------------------------------------------------
3829  /// Append use-list of an operand.
3830  /// This function calculates list of locations that may or must be used
3831  /// by the operand and appends it to LIST.
3832  /// \param list ptr to the output buffer. we will append to it.
3833  /// \param op operand to calculate the use list of
3834  /// \param maymust should we calculate 'may-use' or 'must-use' list?
3835  /// see \ref maymust_t for more details.
3836  /// \param mask if only part of the operand should be considered,
3837  /// a bitmask can be used to specify which part.
3838  /// example: op=AX,mask=0xFF means that we will consider only AL.
3839  void hexapi append_use_list(
3840  mlist_t *list,
3841  const mop_t &op,
3842  maymust_t maymust,
3843  bitrange_t mask=MAXRANGE) const;
3844 
3845  /// Append def-list of an operand.
3846  /// This function calculates list of locations that may or must be modified
3847  /// by the operand and appends it to LIST.
3848  /// \param list ptr to the output buffer. we will append to it.
3849  /// \param op operand to calculate the def list of
3850  /// \param maymust should we calculate 'may-def' or 'must-def' list?
3851  /// see \ref maymust_t for more details.
3852  void hexapi append_def_list(
3853  mlist_t *list,
3854  const mop_t &op,
3855  maymust_t maymust) const;
3856 
3857  /// Build use-list of an instruction.
3858  /// This function calculates list of locations that may or must be used
3859  /// by the instruction. Examples:
3860  /// "ldx ds.2, eax.4, ebx.4", may-list: all aliasable memory
3861  /// "ldx ds.2, eax.4, ebx.4", must-list: empty
3862  /// Since LDX uses EAX for indirect access, it may access any aliasable
3863  /// memory. On the other hand, we cannot tell for sure which memory cells
3864  /// will be accessed, this is why the must-list is empty.
3865  /// \param ins instruction to calculate the use list of
3866  /// \param maymust should we calculate 'may-use' or 'must-use' list?
3867  /// see \ref maymust_t for more details.
3868  /// \return the calculated use-list
3869  mlist_t hexapi build_use_list(const minsn_t &ins, maymust_t maymust) const;
3870 
3871  /// Build def-list of an instruction.
3872  /// This function calculates list of locations that may or must be modified
3873  /// by the instruction. Examples:
3874  /// "stx ebx.4, ds.2, eax.4", may-list: all aliasable memory
3875  /// "stx ebx.4, ds.2, eax.4", must-list: empty
3876  /// Since STX uses EAX for indirect access, it may modify any aliasable
3877  /// memory. On the other hand, we cannot tell for sure which memory cells
3878  /// will be modified, this is why the must-list is empty.
3879  /// \param ins instruction to calculate the def list of
3880  /// \param maymust should we calculate 'may-def' or 'must-def' list?
3881  /// see \ref maymust_t for more details.
3882  /// \return the calculated def-list
3883  mlist_t hexapi build_def_list(const minsn_t &ins, maymust_t maymust) const;
3884 
3885  //-----------------------------------------------------------------------
3886  // The use/def lists can be used to search for interesting instructions
3887  //-----------------------------------------------------------------------
3888  /// Is the list used by the specified instruction range?
3889  /// \param list list of locations. LIST may be modified by the function:
3890  /// redefined locations will be removed from it.
3891  /// \param i1 starting instruction of the range (must be a top level insn)
3892  /// \param i2 end instruction of the range (must be a top level insn)
3893  /// i2 is excluded from the range. it can be specified as NULL.
3894  /// i1 and i2 must belong to the same block.
3895  /// \param maymust should we search in 'may-access' or 'must-access' mode?
3896  bool is_used(mlist_t *list, const minsn_t *i1, const minsn_t *i2, maymust_t maymust=MAY_ACCESS) const
3897  { return find_first_use(list, i1, i2, maymust) != NULL; }
3898 
3899  /// Find the first insn that uses the specified list in the insn range.
3900  /// \param list list of locations. LIST may be modified by the function:
3901  /// redefined locations will be removed from it.
3902  /// \param i1 starting instruction of the range (must be a top level insn)
3903  /// \param i2 end instruction of the range (must be a top level insn)
3904  /// i2 is excluded from the range. it can be specified as NULL.
3905  /// i1 and i2 must belong to the same block.
3906  /// \param maymust should we search in 'may-access' or 'must-access' mode?
3907  /// \return pointer to such instruction or NULL.
3908  /// Upon return LIST will contain only locations not redefined
3909  /// by insns [i1..result]
3910  const minsn_t *hexapi find_first_use(mlist_t *list, const minsn_t *i1, const minsn_t *i2, maymust_t maymust=MAY_ACCESS) const;
3911  minsn_t *find_first_use(mlist_t *list, minsn_t *i1, const minsn_t *i2, maymust_t maymust=MAY_ACCESS) const
3912  {
3913  return CONST_CAST(minsn_t*)(find_first_use(list,
3914  CONST_CAST(const minsn_t*)(i1),
3915  i2,
3916  maymust));
3917  }
3918 
3919  /// Is the list redefined by the specified instructions?
3920  /// \param list list of locations to check.
3921  /// \param i1 starting instruction of the range (must be a top level insn)
3922  /// \param i2 end instruction of the range (must be a top level insn)
3923  /// i2 is excluded from the range. it can be specified as NULL.
3924  /// i1 and i2 must belong to the same block.
3925  /// \param maymust should we search in 'may-access' or 'must-access' mode?
3927  const mlist_t &list,
3928  const minsn_t *i1,
3929  const minsn_t *i2,
3930  maymust_t maymust=MAY_ACCESS) const
3931  {
3932  return find_redefinition(list, i1, i2, maymust) != NULL;
3933  }
3934 
3935  /// Find the first insn that redefines any part of the list in the insn range.
3936  /// \param list list of locations to check.
3937  /// \param i1 starting instruction of the range (must be a top level insn)
3938  /// \param i2 end instruction of the range (must be a top level insn)
3939  /// i2 is excluded from the range. it can be specified as NULL.
3940  /// i1 and i2 must belong to the same block.
3941  /// \param maymust should we search in 'may-access' or 'must-access' mode?
3942  /// \return pointer to such instruction or NULL.
3943  const minsn_t *hexapi find_redefinition(
3944  const mlist_t &list,
3945  const minsn_t *i1,
3946  const minsn_t *i2,
3947  maymust_t maymust=MAY_ACCESS) const;
3948  minsn_t *find_redefinition(
3949  const mlist_t &list,
3950  minsn_t *i1,
3951  const minsn_t *i2,
3952  maymust_t maymust=MAY_ACCESS) const
3953  {
3954  return CONST_CAST(minsn_t*)(find_redefinition(list,
3955  CONST_CAST(const minsn_t*)(i1),
3956  i2,
3957  maymust));
3958  }
3959 
3960  /// Is the right hand side of the instruction redefined the insn range?
3961  /// "right hand side" corresponds to the source operands of the instruction.
3962  /// \param ins instruction to consider
3963  /// \param i1 starting instruction of the range (must be a top level insn)
3964  /// \param i2 end instruction of the range (must be a top level insn)
3965  /// i2 is excluded from the range. it can be specified as NULL.
3966  /// i1 and i2 must belong to the same block.
3967  bool hexapi is_rhs_redefined(minsn_t *ins, minsn_t *i1, minsn_t *i2);
3968 
3969  /// Find the instruction that accesses the specified operand.
3970  /// This function search inside one block.
3971  /// \param op operand to search for
3972  /// \param p_i1 ptr to ptr to a top level instruction.
3973  /// denotes the beginning of the search range.
3974  /// \param i2 end instruction of the range (must be a top level insn)
3975  /// i2 is excluded from the range. it can be specified as NULL.
3976  /// i1 and i2 must belong to the same block.
3977  /// \fdflags combination of \ref FD_ bits
3978  /// \return the instruction that accesses the operand. this instruction
3979  /// may be a sub-instruction. to find out the top level
3980  /// instruction, check out *p_i1.
3981  /// NULL means 'not found'.
3982  minsn_t *hexapi find_access(
3983  const mop_t &op,
3984  minsn_t **parent,
3985  const minsn_t *mend,
3986  int fdflags) const;
3987  /// \defgroup FD_ bits for mblock_t::find_access
3988  //@{
3989 #define FD_BACKWARD 0x0000 ///< search direction
3990 #define FD_FORWARD 0x0001 ///< search direction
3991 #define FD_USE 0x0000 ///< look for use
3992 #define FD_DEF 0x0002 ///< look for definition
3993 #define FD_DIRTY 0x0004 ///< ignore possible implicit definitions
3994  ///< by function calls and indirect memory access
3995  //@}
3996 
3997  // Convenience functions:
3998  minsn_t *find_def(
3999  const mop_t &op,
4000  minsn_t **p_i1,
4001  const minsn_t *i2,
4002  int fdflags)
4003  {
4004  return find_access(op, p_i1, i2, fdflags|FD_DEF);
4005  }
4006  minsn_t *find_use(
4007  const mop_t &op,
4008  minsn_t **p_i1,
4009  const minsn_t *i2,
4010  int fdflags)
4011  {
4012  return find_access(op, p_i1, i2, fdflags|FD_USE);
4013  }
4014 
4015  /// Find possible values for a block.
4016  /// \param res set of value ranges
4017  /// \param vivl what to search for
4018  /// \param vrflags combination of \ref VR_ bits
4019  bool hexapi get_valranges(valrng_t *res, const vivl_t &vivl, int vrflags) const;
4020 
4021  /// Find possible values for an instruction.
4022  /// \param res set of value ranges
4023  /// \param vivl what to search for
4024  /// \param m insn to search value ranges at. \sa VR_ bits
4025  /// \param vrflags combination of \ref VR_ bits
4026  bool hexapi get_valranges(
4027  valrng_t *res,
4028  const vivl_t &vivl,
4029  const minsn_t *m,
4030  int vrflags) const;
4031 
4032  /// \defgroup VR_ bits for get_valranges
4033  //@{
4034 #define VR_AT_START 0x0000 ///< get value ranges before the instruction or
4035  ///< at the block start (if M is NULL)
4036 #define VR_AT_END 0x0001 ///< get value ranges after the instruction or
4037  ///< at the block end, just after the last
4038  ///< instruction (if M is NULL)
4039 #define VR_EXACT 0x0002 ///< find exact match. if not set, the returned
4040  ///< valrng size will be >= vivl.size
4041  //@}
4042 
4043  /// Erase the instruction (convert it to nop) and mark the lists dirty.
4044  /// This is the recommended function to use because it also marks the block
4045  /// use-def lists dirty.
4046  void make_nop(minsn_t *m) { m->_make_nop(); mark_lists_dirty(); }
4047 
4048  /// Calculate number of register instructions in the block.
4049  /// Assertions are skipped by this function.
4050  /// \return Number of non-assertion instructions in the block.
4051  size_t hexapi get_reginsn_qty(void) const;
4052 
4053  bool is_call_block(void) const { return tail != NULL && is_mcode_call(tail->opcode); }
4054  bool is_unknown_call(void) const { return tail != NULL && tail->is_unknown_call(); }
4055  bool is_nway(void) const { return type == BLT_NWAY; }
4056  bool is_branch(void) const { return type == BLT_2WAY && tail->d.t == mop_b; }
4057  bool is_simple_goto_block(void) const
4058  {
4059  return get_reginsn_qty() == 1
4060  && tail->opcode == m_goto
4061  && tail->l.t == mop_b;
4062  }
4063  bool is_simple_jcnd_block() const
4064  {
4065  return is_branch()
4066  && npred() == 1
4067  && get_reginsn_qty() == 1
4068  && is_mcode_convertible_to_set(tail->opcode);
4069  }
4070 };
4071 //-------------------------------------------------------------------------
4072 /// Warning ids
4074 {
4075  WARN_VARARG_REGS, ///< 0 cannot handle register arguments in vararg function, discarded them
4076  WARN_ILL_PURGED, ///< 1 odd caller purged bytes %d, correcting
4077  WARN_ILL_FUNCTYPE, ///< 2 invalid function type has been ignored
4078  WARN_VARARG_TCAL, ///< 3 cannot handle tail call to vararg
4079  WARN_VARARG_NOSTK, ///< 4 call vararg without local stack
4080  WARN_VARARG_MANY, ///< 5 too many varargs, some ignored
4081  WARN_ADDR_OUTARGS, ///< 6 cannot handle address arithmetics in outgoing argument area of stack frame -- unused
4082  WARN_DEP_UNK_CALLS, ///< 7 found interdependent unknown calls
4083  WARN_ILL_ELLIPSIS, ///< 8 erroneously detected ellipsis type has been ignored
4084  WARN_GUESSED_TYPE, ///< 9 using guessed type %s;
4085  WARN_EXP_LINVAR, ///< 10 failed to expand a linear variable
4086  WARN_WIDEN_CHAINS, ///< 11 failed to widen chains
4087  WARN_BAD_PURGED, ///< 12 inconsistent function type and number of purged bytes
4088  WARN_CBUILD_LOOPS, ///< 13 too many cbuild loops
4089  WARN_NO_SAVE_REST, ///< 14 could not find valid save-restore pair for %s
4090  WARN_ODD_INPUT_REG, ///< 15 odd input register %s
4091  WARN_ODD_ADDR_USE, ///< 16 odd use of a variable address
4092  WARN_MUST_RET_FP, ///< 17 function return type is incorrect (must be floating point)
4093  WARN_ILL_FPU_STACK, ///< 18 inconsistent fpu stack
4094  WARN_SELFREF_PROP, ///< 19 self-referencing variable has been detected
4095  WARN_WOULD_OVERLAP, ///< 20 variables would overlap: %s
4096  WARN_ARRAY_INARG, ///< 21 array has been used for an input argument
4097  WARN_MAX_ARGS, ///< 22 too many input arguments, some ignored
4098  WARN_BAD_FIELD_TYPE,///< 23 incorrect structure member type for %s::%s, ignored
4099  WARN_WRITE_CONST, ///< 24 write access to const memory at %a has been detected
4100  WARN_BAD_RETVAR, ///< 25 wrong return variable
4101  WARN_FRAG_LVAR, ///< 26 fragmented variable at %s may be wrong
4102  WARN_HUGE_STKOFF, ///< 27 exceedingly huge offset into the stack frame
4103  WARN_UNINITED_REG, ///< 28 reference to an uninitialized register has been removed: %s
4104  WARN_FIXED_MACRO, ///< 29 fixed broken macro-insn
4105  WARN_WRONG_VA_OFF, ///< 30 wrong offset of va_list variable
4106  WARN_CR_NOFIELD, ///< 31 CONTAINING_RECORD: no field '%s' in struct '%s' at %d
4107  WARN_CR_BADOFF, ///< 32 CONTAINING_RECORD: too small offset %d for struct '%s'
4108  WARN_BAD_STROFF, ///< 33 user specified stroff has not been processed: %s
4109  WARN_BAD_VARSIZE, ///< 34 inconsistent variable size for '%s'
4110  WARN_UNSUPP_REG, ///< 35 unsupported processor register '%s'
4111  WARN_UNALIGNED_ARG, ///< 36 unaligned function argument '%s'
4112  WARN_BAD_STD_TYPE, ///< 37 corrupted or unexisting local type '%s'
4113  WARN_BAD_CALL_SP, ///< 38 bad sp value at call
4114  WARN_MISSED_SWITCH, ///< 39 wrong markup of switch jump, skipped it
4115  WARN_BAD_SP, ///< 40 positive sp value %a has been found
4116  WARN_BAD_STKPNT, ///< 41 wrong sp change point
4117  WARN_UNDEF_LVAR, ///< 42 variable '%s' is possibly undefined
4118  WARN_JUMPOUT, ///< 43 control flows out of bounds
4119  WARN_BAD_VALRNG, ///< 44 values range analysis failed
4120  WARN_BAD_SHADOW, ///< 45 ignored the value written to the shadow area of the succeeding call
4121  WARN_OPT_VALRNG, ///< 46 conditional instruction was optimized away because of '%s'
4122  WARN_RET_LOCREF, ///< 47 returning address of temporary local variable '%s'
4123 
4124  WARN_MAX, ///< may be used in notes as a placeholder when the
4125  ///< warning id is not available
4126 };
4127 
4128 /// Warning instances
4130 {
4131  ea_t ea; ///< Address where the warning occurred
4132  warnid_t id; ///< Warning id
4133  qstring text; ///< Fully formatted text of the warning
4134  DECLARE_COMPARISONS(hexwarn_t)
4135  {
4136  if ( ea < r.ea )
4137  return -1;
4138  if ( ea > r.ea )
4139  return 1;
4140  if ( id < r.id )
4141  return -1;
4142  if ( id > r.id )
4143  return 1;
4144  return strcmp(text.c_str(), r.text.c_str());
4145  }
4146 };
4147 DECLARE_TYPE_AS_MOVABLE(hexwarn_t);
4148 typedef qvector<hexwarn_t> hexwarns_t;
4149 
4150 //-------------------------------------------------------------------------
4151 /// Microcode maturity levels
4153 {
4154  MMAT_ZERO, ///< microcode does not exist
4155  MMAT_GENERATED, ///< generated microcode
4156  MMAT_PREOPTIMIZED, ///< preoptimized pass is complete
4157  MMAT_LOCOPT, ///< local optimization of each basic block is complete.
4158  ///< control flow graph is ready too.
4159  MMAT_CALLS, ///< detected call arguments
4160  MMAT_GLBOPT1, ///< performed the first pass of global optimization
4161  MMAT_GLBOPT2, ///< most global optimization passes are done
4162  MMAT_GLBOPT3, ///< completed all global optimization. microcode is fixed now.
4163  MMAT_LVARS, ///< allocated local variables
4164 };
4165 
4166 //-------------------------------------------------------------------------
4167 enum memreg_index_t ///< memory region types
4168 {
4169  MMIDX_GLBLOW, ///< global memory: low part
4170  MMIDX_LVARS, ///< stack: local variables
4171  MMIDX_RETADDR, ///< stack: return address
4172  MMIDX_SHADOW, ///< stack: shadow arguments
4173  MMIDX_ARGS, ///< stack: regular stack arguments
4174  MMIDX_GLBHIGH, ///< global memory: high part
4175 };
4176 
4177 //-------------------------------------------------------------------------
4178 /// Ranges to decompile. Either a function or an explicit vector of ranges.
4180 {
4181  func_t *pfn; ///< function to decompile
4182  rangevec_t ranges; ///< empty ? function_mode : snippet mode
4183  mba_ranges_t(func_t *_pfn=NULL) : pfn(_pfn) {}
4184  mba_ranges_t(const rangevec_t &r) : pfn(NULL), ranges(r) {}
4185  ea_t start(void) const { return (ranges.empty() ? *pfn : ranges[0]).start_ea; }
4186  bool empty(void) const { return pfn == NULL && ranges.empty(); }
4187  void clear(void) { pfn = NULL; ranges.clear(); }
4188  bool is_snippet(void) const { return !ranges.empty(); }
4189  bool hexapi range_contains(ea_t ea) const;
4190  bool is_fragmented(void) const { return ranges.empty() ? pfn->tailqty > 0 : ranges.size() > 1; }
4191 };
4192 
4193 /// Item iterator of arbitrary rangevec items
4195 {
4196  const rangevec_t *ranges;
4197  const range_t *rptr; // pointer into ranges
4198  ea_t cur; // current address
4199  range_item_iterator_t(void) : ranges(NULL), rptr(NULL), cur(BADADDR) {}
4200  bool set(const rangevec_t &r);
4201  bool next_code(void);
4202  ea_t current(void) const { return cur; }
4203 };
4204 
4205 /// Item iterator for mba_ranges_t
4207 {
4209  func_item_iterator_t fii; // this is used if rii.ranges==NULL
4210  bool is_snippet(void) const { return rii.ranges != NULL; }
4211  bool set(const mba_ranges_t &mbr)
4212  {
4213  if ( mbr.is_snippet() )
4214  return rii.set(mbr.ranges);
4215  else
4216  return fii.set(mbr.pfn);
4217  }
4218  bool next_code(void)
4219  {
4220  if ( is_snippet() )
4221  return rii.next_code();
4222  else
4223  return fii.next_code();
4224  }
4225  ea_t current(void) const
4226  {
4227  return is_snippet() ? rii.current() : fii.current();
4228  }
4229 };
4230 
4231 /// Chunk iterator of arbitrary rangevec items
4233 {
4234  const range_t *rptr; // pointer into ranges
4235  const range_t *rend;
4236  range_chunk_iterator_t(void) : rptr(NULL), rend(NULL) {}
4237  bool set(const rangevec_t &r) { rptr = r.begin(); rend = r.end(); return rptr != rend; }
4238  bool next(void) { return ++rptr != rend; }
4239  const range_t &chunk(void) const { return *rptr; }
4240 };
4241 
4242 /// Chunk iterator for mba_ranges_t
4244 {
4246  func_tail_iterator_t fii; // this is used if rii.rptr==NULL
4247  bool is_snippet(void) const { return rii.rptr != NULL; }
4248  bool set(const mba_ranges_t &mbr)
4249  {
4250  if ( mbr.is_snippet() )
4251  return rii.set(mbr.ranges);
4252  else
4253  return fii.set(mbr.pfn);
4254  }
4255  bool next(void)
4256  {
4257  if ( is_snippet() )
4258  return rii.next();
4259  else
4260  return fii.next();
4261  }
4262  const range_t &chunk(void) const
4263  {
4264  return is_snippet() ? rii.chunk() : fii.chunk();
4265  }
4266 };
4267 
4268 //-------------------------------------------------------------------------
4269 /// Array of micro blocks representing microcode for a decompiled function.
4270 /// The first micro block is the entry point, the last one is the exit point.
4271 /// The entry and exit blocks are always empty. The exit block is generated
4272 /// at MMAT_LOCOPT maturity level.
4273 class mbl_array_t
4274 {
4275  DECLARE_UNCOPYABLE(mbl_array_t)
4276  uint32 flags;
4277  uint32 flags2;
4278 
4279 public:
4280  // bits to describe the microcode, set by the decompiler
4281 #define MBA_PRCDEFS 0x00000001 ///< use precise defeas for chain-allocated lvars
4282 #define MBA_NOFUNC 0x00000002 ///< function is not present, addresses might be wrong
4283 #define MBA_PATTERN 0x00000004 ///< microcode pattern, callinfo is present
4284 #define MBA_LOADED 0x00000008 ///< loaded gdl, no instructions (debugging)
4285 #define MBA_RETFP 0x00000010 ///< function returns floating point value
4286 #define MBA_SPLINFO 0x00000020 ///< (final_type ? idb_spoiled : spoiled_regs) is valid
4287 #define MBA_PASSREGS 0x00000040 ///< has mcallinfo_t::pass_regs
4288 #define MBA_THUNK 0x00000080 ///< thunk function
4289 #define MBA_CMNSTK 0x00000100 ///< stkvars+stkargs should be considered as one area
4290 
4291  // bits to describe analysis stages and requests
4292 #define MBA_PREOPT 0x00000200 ///< preoptimization stage complete
4293 #define MBA_CMBBLK 0x00000400 ///< request to combine blocks
4294 #define MBA_ASRTOK 0x00000800 ///< assertions have been generated
4295 #define MBA_CALLS 0x00001000 ///< callinfo has been built
4296 #define MBA_ASRPROP 0x00002000 ///< assertion have been propagated
4297 #define MBA_SAVRST 0x00004000 ///< save-restore analysis has been performed
4298 #define MBA_RETREF 0x00008000 ///< return type has been refined
4299 #define MBA_GLBOPT 0x00010000 ///< microcode has been optimized globally
4300 #define MBA_LVARS0 0x00040000 ///< lvar pre-allocation has been performed
4301 #define MBA_LVARS1 0x00080000 ///< lvar real allocation has been performed
4302 #define MBA_DELPAIRS 0x00100000 ///< pairs have been deleted once
4303 #define MBA_CHVARS 0x00200000 ///< can verify chain varnums
4304 
4305  // bits that can be set by the caller:
4306 #define MBA_SHORT 0x00400000 ///< use short display
4307 #define MBA_COLGDL 0x00800000 ///< display graph after each reduction
4308 #define MBA_INSGDL 0x01000000 ///< display instruction in graphs
4309 #define MBA_NICE 0x02000000 ///< apply transformations to c code
4310 #define MBA_REFINE 0x04000000 ///< may refine return value size
4311 #define MBA_WINGR32 0x10000000 ///< use wingraph32
4312 #define MBA_NUMADDR 0x20000000 ///< display definition addresses for numbers
4313 #define MBA_VALNUM 0x40000000 ///< display value numbers
4314 
4315 #define MBA_INITIAL_FLAGS (MBA_INSGDL|MBA_NICE|MBA_CMBBLK|MBA_REFINE\
4316  |MBA_PRCDEFS|MBA_WINGR32|MBA_VALNUM)
4317 
4318 #define MBA2_LVARNAMES_OK 0x00000001 // may verify lvar_names?
4319 #define MBA2_LVARS_RENAMED 0x00000002 // accept empty names now?
4320 #define MBA2_OVER_CHAINS 0x00000004 // has overlapped chains?
4321 #define MBA2_VALRNG_DONE 0x00000008 // calculated valranges?
4322 #define MBA2_IS_CTR 0x00000010 // is constructor?
4323 #define MBA2_IS_DTR 0x00000020 // is destructor?
4324 #define MBA2_ARGIDX_OK 0x00000040 // may verify input argument list?
4325 #define MBA2_NO_DUP_CALLS 0x00000080 // forbid multiple calls with the same ea
4326 #define MBA2_NO_DUP_LVARS 0x00000100 // forbid multiple lvars with the same ea
4327 #define MBA2_UNDEF_RETVAR 0x00000200 // return value is undefined
4328 #define MBA2_ARGIDX_SORTED 0x00000400 // args finally sorted according to ABI
4329  // (e.g. reverse stkarg order in Borland)
4330 
4331 #define MBA2_INITIAL_FLAGS (MBA2_LVARNAMES_OK|MBA2_LVARS_RENAMED)
4332 
4333 #define MBA2_ALL_FLAGS 0x000007FF
4334 
4335  bool precise_defeas(void) const { return (flags & MBA_PRCDEFS) != 0; }
4336  bool optimized(void) const { return (flags & MBA_GLBOPT) != 0; }
4337  bool short_display(void) const { return (flags & MBA_SHORT ) != 0; }
4338  bool show_reduction(void) const { return (flags & MBA_COLGDL) != 0; }
4339  bool graph_insns(void) const { return (flags & MBA_INSGDL) != 0; }
4340  bool loaded_gdl(void) const { return (flags & MBA_LOADED) != 0; }
4341  bool should_beautify(void)const { return (flags & MBA_NICE ) != 0; }
4342  bool rtype_refined(void) const { return (flags & MBA_RETREF) != 0; }
4343  bool may_refine_rettype(void) const { return (flags & MBA_REFINE) != 0; }
4344  bool use_wingraph32(void) const { return (flags & MBA_WINGR32) != 0; }
4345  bool display_numaddrs(void) const { return (flags & MBA_NUMADDR) != 0; }
4346  bool display_valnums(void) const { return (flags & MBA_VALNUM) != 0; }
4347  bool is_pattern(void) const { return (flags & MBA_PATTERN) != 0; }
4348  bool is_thunk(void) const { return (flags & MBA_THUNK) != 0; }
4349  bool saverest_done(void) const { return (flags & MBA_SAVRST) != 0; }
4350  bool callinfo_built(void) const { return (flags & MBA_CALLS) != 0; }
4351  bool really_alloc(void) const { return (flags & MBA_LVARS0) != 0; }
4352  bool lvars_allocated(void)const { return (flags & MBA_LVARS1) != 0; }
4353  bool chain_varnums_ok(void)const { return (flags & MBA_CHVARS) != 0; }
4354  bool returns_fpval(void) const { return (flags & MBA_RETFP) != 0; }
4355  bool has_passregs(void) const { return (flags & MBA_PASSREGS) != 0; }
4356  bool generated_asserts(void) const { return (flags & MBA_ASRTOK) != 0; }
4357  bool propagated_asserts(void) const { return (flags & MBA_ASRPROP) != 0; }
4358  bool deleted_pairs(void) const { return (flags & MBA_DELPAIRS) != 0; }
4359  bool common_stkvars_stkargs(void) const { return (flags & MBA_CMNSTK) != 0; }
4360  bool lvar_names_ok(void) const { return (flags2 & MBA2_LVARNAMES_OK) != 0; }
4361  bool lvars_renamed(void) const { return (flags2 & MBA2_LVARS_RENAMED) != 0; }
4362  bool has_over_chains(void) const { return (flags2 & MBA2_OVER_CHAINS) != 0; }
4363  bool valranges_done(void) const { return (flags2 & MBA2_VALRNG_DONE) != 0; }
4364  bool argidx_ok(void) const { return (flags2 & MBA2_ARGIDX_OK) != 0; }
4365  bool argidx_sorted(void) const { return (flags2 & MBA2_ARGIDX_SORTED) != 0; }
4366  bool is_ctr(void) const { return (flags2 & MBA2_IS_CTR) != 0; }
4367  bool is_dtr(void) const { return (flags2 & MBA2_IS_DTR) != 0; }
4368  bool is_cdtr(void) const { return (flags2 & (MBA2_IS_CTR|MBA2_IS_DTR)) != 0; }
4369  int get_mba_flags(void) const { return flags; }
4370  int get_mba_flags2(void) const { return flags2; }
4371  void set_mba_flags(int f) { flags |= f; }
4372  void clr_mba_flags(int f) { flags &= ~f; }
4373  void set_mba_flags2(int f) { flags2 |= f; }
4374  void clr_mba_flags2(int f) { flags2 &= ~f; }
4375  void clr_cdtr(void) { flags2 &= ~(MBA2_IS_CTR|MBA2_IS_DTR); }
4376  int calc_shins_flags(void) const
4377  {
4378  int shins_flags = 0;
4379  if ( short_display() )
4380  shins_flags |= SHINS_SHORT;
4381  if ( display_valnums() )
4382  shins_flags |= SHINS_VALNUM;
4383  if ( display_numaddrs() )
4384  shins_flags |= SHINS_NUMADDR;
4385  return shins_flags;
4386  }
4387 
4388 /*
4389  +-----------+ <- inargtop
4390  | prmN |
4391  | ... | <- minargref
4392  | prm0 |
4393  +-----------+ <- inargoff
4394  |shadow_args|
4395  +-----------+
4396  | retaddr |
4397  frsize+frregs +-----------+ <- initial esp |
4398  | frregs | |
4399  +frsize +-----------+ <- typical ebp |
4400  | | | |
4401  | | | fpd |
4402  | | | |
4403  | frsize | <- current ebp |
4404  | | |
4405  | | |
4406  | | | stacksize
4407  | | |
4408  | | |
4409  | | <- minstkref |
4410  stkvar base off 0 +---.. | | | current
4411  | | | | stack
4412  | | | | pointer
4413  | | | | range
4414  |tmpstk_size| | | (what getspd() returns)
4415  | | | |
4416  | | | |
4417  +-----------+ <- minimal sp | | offset 0 for the decompiler (vd)
4418 
4419  There is a detail that may add confusion when working with stack variables.
4420  The decompiler does not use the same stack offsets as IDA.
4421  The picture above should explain the difference:
4422  - IDA stkoffs are displayed on the left, decompiler stkoffs - on the right
4423  - Decompiler stkoffs are always >= 0
4424  - IDA stkoff==0 corresponds to stkoff==tmpstk_size in the decompiler
4425  - See stkoff_vd2ida and stkoff_ida2vd below to convert IDA stkoffs to vd stkoff
4426 
4427 */
4428 
4429  // convert a stack offset used in vd to a stack offset used in ida stack frame
4430  sval_t stkoff_vd2ida(sval_t off) const
4431  {
4432  return off - tmpstk_size;
4433  }
4434  // convert a ida stack frame offset to a stack offset used in vd
4435  sval_t stkoff_ida2vd(sval_t off) const
4436  {
4437  return off + tmpstk_size;
4438  }
4439  sval_t argbase() const
4440  {
4441  return retsize + stacksize;
4442  }
4443  static vdloc_t hexapi idaloc2vd(const argloc_t &loc, int width, sval_t spd);
4444  vdloc_t idaloc2vd(const argloc_t &loc, int width) const
4445  {
4446  return idaloc2vd(loc, width, argbase());
4447  }
4448 
4449  static argloc_t hexapi vd2idaloc(const vdloc_t &loc, int width, sval_t spd);
4450  argloc_t vd2idaloc(const vdloc_t &loc, int width) const
4451  {
4452  return vd2idaloc(loc, width, argbase());
4453  }
4454 
4455  bool is_stkarg(const lvar_t &v) const
4456  {
4457  return v.is_stk_var() && v.get_stkoff() >= inargoff;
4458  }
4459  member_t *get_stkvar(sval_t vd_stkoff, uval_t *poff) const;
4460  // get lvar location
4461  argloc_t get_ida_argloc(const lvar_t &v) const
4462  {
4463  return vd2idaloc(v.location, v.width);
4464  }
4465  mba_ranges_t mbr;
4466  ea_t entry_ea;
4467  ea_t last_prolog_ea;
4468  ea_t first_epilog_ea;
4469  int qty; ///< number of basic blocks
4470  int npurged; ///< -1 - unknown
4471  cm_t cc; ///< calling convention
4472  sval_t tmpstk_size; ///< size of the temporary stack part
4473  ///< (which dynamically changes with push/pops)
4474  sval_t frsize; ///< size of local stkvars range in the stack frame
4475  sval_t frregs; ///< size of saved registers range in the stack frame
4476  sval_t fpd; ///< frame pointer delta
4477  int pfn_flags; ///< copy of func_t::flags
4478  int retsize; ///< size of return address in the stack frame
4479  int shadow_args; ///< size of shadow argument area
4480  sval_t fullsize; ///< Full stack size including incoming args
4481  sval_t stacksize; ///< The maximal size of the function stack including
4482  ///< bytes allocated for outgoing call arguments
4483  ///< (up to retaddr)
4484  sval_t inargoff; ///< offset of the first stack argument;
4485  ///< after fix_scattered_movs() INARGOFF may
4486  ///< be less than STACKSIZE
4487  sval_t minstkref; ///< The lowest stack location whose address was taken
4488  ea_t minstkref_ea; ///< address with lowest minstkref (for debugging)
4489  sval_t minargref; ///< The lowest stack argument location whose address was taken
4490  ///< This location and locations above it can be aliased
4491  ///< It controls locations >= inargoff-shadow_args
4492  sval_t spd_adjust; ///< If sp>0, the max positive sp value
4493  ivl_t aliased_vars; ///< Aliased stkvar locations
4494  ivl_t aliased_args; ///< Aliased stkarg locations
4495  ivlset_t gotoff_stkvars; ///< stkvars that hold .got offsets. considered to be unaliasable
4496  ivlset_t restricted_memory;
4497  ivlset_t aliased_memory; ///< aliased_memory+restricted_memory=ALLMEM
4498  mlist_t nodel_memory; ///< global dead elimination may not delete references to this area
4499  rlist_t consumed_argregs; ///< registers converted into stack arguments, should not be used as arguments
4500 
4501  mba_maturity_t maturity; ///< current maturity level
4502  mba_maturity_t reqmat; ///< required maturity level
4503 
4504  bool final_type; ///< is the function type final? (specified by the user)
4505  tinfo_t idb_type; ///< function type as retrieved from the database
4506  reginfovec_t idb_spoiled; ///< MBA_SPLINFO && final_type: info in ida format
4507  mlist_t spoiled_list; ///< MBA_SPLINFO && !final_type: info in vd format
4508  int fti_flags; ///< FTI_... constants for the current function
4509 
4510  netnode idb_node;
4511 #define NALT_VD 2 ///< this index is not used by ida
4512 
4513  qstring label; ///< name of the function or pattern (colored)
4514  lvars_t vars; ///< local variables
4515  intvec_t argidx; ///< input arguments (indexes into 'vars')
4516  int retvaridx; ///< index of variable holding the return value
4517  ///< -1 means none
4518 
4519  ea_t error_ea; ///< during microcode generation holds ins.ea
4520  qstring error_strarg;
4521 
4522  mblock_t *blocks; ///< double linked list of blocks
4523  mblock_t **natural; ///< natural order of blocks
4524 
4525  ivl_with_name_t std_ivls[6]; ///< we treat memory as consisting of 6 parts
4526  ///< see \ref memreg_index_t
4527 
4528  mutable hexwarns_t notes;
4529  mutable uchar occurred_warns[32]; // occurred warning messages
4530  // (even disabled warnings are taken into account)
4531  bool write_to_const_detected(void) const
4532  {
4533  return test_bit(occurred_warns, WARN_WRITE_CONST);
4534  }
4535  bool bad_call_sp_detected(void) const
4536  {
4537  return test_bit(occurred_warns, WARN_BAD_CALL_SP);
4538  }
4539  bool regargs_is_not_aligned(void) const
4540  {
4541  return test_bit(occurred_warns, WARN_UNALIGNED_ARG);
4542  }
4543  bool has_bad_sp(void) const
4544  {
4545  return test_bit(occurred_warns, WARN_BAD_SP);
4546  }
4547 
4548  // the exact size of this class is not documented, they may be more fields
4549  char reserved[];
4550  mbl_array_t(void);
4551  ~mbl_array_t(void) { term(); }
4552  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
4553  void hexapi term(void);
4554  func_t *get_curfunc(void) const { return mbr.pfn; }
4555  bool use_frame(void) const { return mbr.pfn != NULL; }
4556  bool range_contains(ea_t ea) const { return mbr.range_contains(ea); }
4557  bool is_snippet(void) const { return mbr.is_snippet(); }
4558  /// Optimize each basic block locally
4559  /// \param locopt_bits combination of \ref LOCOPT_ bits
4560  /// \return number of changes. 0 means nothing changed
4561  /// This function is called by the decompiler, usually there is no need to
4562  /// call it explicitly.
4563  int hexapi optimize_local(int locopt_bits);
4564  /// \defgroup LOCOPT_ Bits for optimize_local()
4565  //@{
4566 #define LOCOPT_ALL 0x0001 ///< redo optimization for all blocks. if this bit
4567  ///< is not set, only dirty blocks will be optimized
4568 #define LOCOPT_REFINE 0x0002 ///< refine return type, ok to fail
4569 #define LOCOPT_REFINE2 0x0004 ///< refine return type, try harder
4570  //@}
4571 
4572  /// Build control flow graph.
4573  /// This function may be called only once. It calculates the type of each
4574  /// basic block and the adjacency list. optimize_local() calls this function
4575  /// if necessary. You need to call this function only before MMAT_LOCOPT.
4576  /// \return error code
4577  merror_t hexapi build_graph(void);
4578 
4579  /// Get control graph.
4580  /// Call build_graph() if you need the graph before MMAT_LOCOPT.
4581  mbl_graph_t *hexapi get_graph(void);
4582 
4583  /// Analyze calls and determine calling conventions.
4584  /// \param acflags permitted actions that are necessary for successful detection
4585  /// of calling conventions. See \ref ACFL_
4586  /// \return number of calls. -1 means error.
4587  int hexapi analyze_calls(int acflags);
4588  /// \defgroup ACFL_ Bits for analyze_calls()
4589  //@{
4590 #define ACFL_LOCOPT 0x01 ///< perform local propagation (requires ACFL_BLKOPT)
4591 #define ACFL_BLKOPT 0x02 ///< perform interblock transformations
4592 #define ACFL_GLBPROP 0x04 ///< perform global propagation
4593 #define ACFL_GLBDEL 0x08 ///< perform dead code eliminition
4594 #define ACFL_GUESS 0x10 ///< may guess calling conventions
4595  //@}
4596 
4597  /// Optimize microcode globally.
4598  /// This function applies various optimization methods until we reach the
4599  /// fixed point. After that it preallocates lvars unless reqmat forbids it.
4600  /// \return error code
4601  merror_t hexapi optimize_global(void);
4602 
4603  /// Allocate local variables.
4604  /// Must be called only immediately after optimize_global(), with no
4605  /// modifications to the microcode. Converts registers,
4606  /// stack variables, and similar operands into mop_l. This call will not fail
4607  /// because all necessary checks were performed in optimize_global().
4608  /// After this call the microcode reaches its final state.
4609  void hexapi alloc_lvars(void);
4610 
4611  /// Dump microcode to a file.
4612  /// The file will be created in the directory pointed by IDA_DUMPDIR envvar.
4613  /// Dump will be created only if IDA is run under debugger.
4614  void hexapi dump(void) const;
4615  AS_PRINTF(3, 0) void hexapi vdump_mba(bool _verify, const char *title, va_list va) const;
4616  AS_PRINTF(3, 4) void dump_mba(bool _verify, const char *title, ...) const
4617  {
4618  va_list va;
4619  va_start(va, title);
4620  vdump_mba(_verify, title, va);
4621  va_end(va);
4622  }
4623 
4624  /// Print microcode to any destination.
4625  /// \param vp print sink
4626  void hexapi print(vd_printer_t &vp) const;
4627 
4628  /// Verify microcode consistency.
4629  /// \param always if false, the check will be performed only if ida runs
4630  /// under debugger
4631  /// If any inconsistency is discovered, an internal error will be generated.
4632  /// We strongly recommend you to call this function before returing control
4633  /// to the decompiler from your callbacks, in the case if you modified
4634  /// the microcode.
4635  void hexapi verify(bool always) const;
4636 
4637  /// Mark the microcode use-def chains dirty.
4638  /// Call this function is any inter-block data dependencies got changed
4639  /// because of your modifications to the microcode. Failing to do so may
4640  /// cause an internal error.
4641  void hexapi mark_chains_dirty(void);
4642 
4643  /// Get basic block by its serial number.
4644  const mblock_t *get_mblock(int n) const { return natural[n]; }
4645  mblock_t *get_mblock(int n) { return CONST_CAST(mblock_t*)((CONST_CAST(const mbl_array_t *)(this))->get_mblock(n)); }
4646 
4647  /// Insert a block in the middle of the mbl array.
4648  /// The very first block of microcode must be empty, it is the entry block.
4649  /// The very last block of microcode must be BLT_STOP, it is the exit block.
4650  /// Therefore inserting a new block before the entry point or after the exit
4651  /// block is not a good idea.
4652  /// \param bblk the new block will be inserted before BBLK
4653  /// \return ptr to the new block
4654  mblock_t *hexapi insert_block(int bblk);
4655 
4656  /// Delete a block.
4657  /// \param blk block to delete
4658  /// \return true if at least one of the other blocks became empty or unreachable
4659  bool hexapi remove_block(mblock_t *blk);
4660 
4661  /// Make a copy of a block.
4662  /// This function makes a simple copy of the block. It does not fix the
4663  /// predecessor and successor lists, they must be fixed if necessary.
4664  /// \param blk block to copy
4665  /// \param new_serial position of the copied block
4666  /// \param cpblk_flags combination of \ref CPBLK_... bits
4667  /// \return pointer to the new copy
4668  mblock_t *hexapi copy_block(mblock_t *blk, int new_serial, int cpblk_flags=3);
4669 /// \defgroup CPBLK_ Batch decompilation bits
4670 //@{
4671 #define CPBLK_FAST 0x0000 ///< do not update minbstkref and minbargref
4672 #define CPBLK_MINREF 0x0001 ///< update minbstkref and minbargref
4673 #define CPBLK_OPTJMP 0x0002 ///< del the jump insn at the end of the block
4674  ///< if it becomes useless
4675 //@}
4676 
4677  /// Delete all empty blocks.
4678  bool hexapi remove_empty_blocks(void);
4679 
4680  /// Combine blocks.
4681  /// This function merges blocks constituting linear flow.
4682  /// It calls remove_empty_blocks() as well.
4683  /// \return true if changed any blocks
4684  bool hexapi combine_blocks(void);
4685 
4686  /// Visit all operands of all instructions.
4687  /// \param mv operand visitor
4688  /// \return non-zero value returned by mv.visit_mop() or zero
4689  int hexapi for_all_ops(mop_visitor_t &mv);
4690 
4691  /// Visit all instructions.
4692  /// This function visits all instruction and subinstructions.
4693  /// \param mv instruction visitor
4694  /// \return non-zero value returned by mv.visit_mop() or zero
4695  int hexapi for_all_insns(minsn_visitor_t &mv);
4696 
4697  /// Visit all top level instructions.
4698  /// \param mv instruction visitor
4699  /// \return non-zero value returned by mv.visit_mop() or zero
4700  int hexapi for_all_topinsns(minsn_visitor_t &mv);
4701 
4702  /// Find an operand in the microcode.
4703  /// This function tries to find the operand that matches LIST.
4704  /// Any operand that overlaps with LIST is considered as a match.
4705  /// \param[out] ctx context information for the result
4706  /// \param ea desired address of the operand
4707  /// \param is_dest search for destination operand? this argument may be
4708  /// ignored if the exact match could not be found
4709  /// \param list list of locations the correspond to the operand
4710  /// \return pointer to the operand or NULL.
4711  mop_t *hexapi find_mop(op_parent_info_t *ctx, ea_t ea, bool is_dest, const mlist_t &list);
4712 
4713  /// Get input argument of the decompiled function.
4714  /// \param n argument number (0..nargs-1)
4715  lvar_t &hexapi arg(int n);
4716  const lvar_t &arg(int n) const { return CONST_CAST(mbl_array_t*)(this)->arg(n); }
4717 
4718  /// Get information about various memory regions.
4719  /// We map the stack frame to the global memory, to some unused range.
4720  const ivl_t &get_std_region(memreg_index_t idx) const;
4721  const ivl_t &get_lvars_region(void) const;
4722  const ivl_t &get_shadow_region(void) const;
4723  const ivl_t &get_args_region(void) const;
4724  ivl_t get_stack_region(void) const; // get entire stack region
4725 
4726  /// Serialize mbl array into a sequence of bytes.
4727  void hexapi serialize(bytevec_t &vout) const;
4728 
4729  /// Deserialize a byte sequence into mbl array.
4730  /// \param bytes pointer to the beginning of the byte sequence.
4731  /// \param nbytes number of bytes in the byte sequence.
4732  /// \return new mbl array
4733  static mbl_array_t *hexapi deserialize(const uchar *bytes, size_t nbytes);
4734 
4735 };
4736 //-------------------------------------------------------------------------
4737 /// Convenience class to release graph chains automatically.
4738 /// Use this class instead of using graph_chains_t directly.
4740 {
4741  graph_chains_t *gc;
4742  chain_keeper_t &operator=(const chain_keeper_t &); // not defined
4743 public:
4744  chain_keeper_t(graph_chains_t *_gc) : gc(_gc) { QASSERT(50446, gc != NULL); gc->acquire(); }
4745  ~chain_keeper_t(void)
4746  {
4747  gc->release();
4748  }
4749  block_chains_t &operator[](size_t idx) { return (*gc)[idx]; }
4750  block_chains_t &front(void) { return gc->front(); }
4751  block_chains_t &back(void) { return gc->back(); }
4752  operator graph_chains_t &(void) { return *gc; }
4753  int for_all_chains(chain_visitor_t &cv, int gca) { return gc->for_all_chains(cv, gca); }
4754  HEXRAYS_MEMORY_ALLOCATION_FUNCS()
4755 };
4756 
4757 //-------------------------------------------------------------------------
4758 /// Kind of use-def and def-use chains
4760 {
4761  GC_REGS_AND_STKVARS, ///< registers and stkvars (restricted memory only)
4762  GC_ASR, ///< all the above and assertions
4763  GC_XDSU, ///< only registers calculated with FULL_XDSU
4764  GC_END, ///< number of chain types
4765  GC_DIRTY_ALL = (1 << (2*GC_END))-1, ///< bitmask to represent all chains
4766 };
4767 
4768 //-------------------------------------------------------------------------
4769 /// Control flow graph of microcode.
4771 {
4772  mbl_array_t *mba; ///< pointer to the mbl array
4773  int dirty; ///< what kinds of use-def chains are dirty?
4774  int chain_stamp; ///< we increment this counter each time chains are recalculated
4775  graph_chains_t gcs[2*GC_END]; ///< cached use-def chains
4776 
4777  /// Is LIST accessed between two instructions?
4778  /// This function can analyze all path between the specified instructions
4779  /// and find if the specified list is used in any of them. The instructions
4780  /// may be located in different basic blocks. This function does not use
4781  /// use-def chains but use the graph for analysis. It may be slow in some
4782  /// cases but its advantage is that is does not require building the use-def
4783  /// chains.
4784  /// \param list list to verify
4785  /// \param b1 starting block
4786  /// \param b2 ending block. may be -1, it means all possible paths from b1
4787  /// \param m1 starting instruction (in b1)
4788  /// \param m2 ending instruction (in b2). excluded. may be NULL.
4789  /// \param access_type read or write access?
4790  /// \param maymust may access or must access?
4791  /// \return true if found an access to the list
4792  bool hexapi is_accessed_globally(
4793  const mlist_t &list, // list to verify
4794  int b1, // starting block
4795  int b2, // ending block
4796  const minsn_t *m1, // starting instruction (in b1)
4797  const minsn_t *m2, // ending instruction (in b2)
4798  access_type_t access_type,
4799  maymust_t maymust) const;
4800  int get_ud_gc_idx(gctype_t gctype) const { return (gctype << 1); }
4801  int get_du_gc_idx(gctype_t gctype) const { return (gctype << 1)+1; }
4802  int get_ud_dirty_bit(gctype_t gctype) { return 1 << get_ud_gc_idx(gctype); }
4803  int get_du_dirty_bit(gctype_t gctype) { return 1 << get_du_gc_idx(gctype); }
4804 
4805 public:
4806  /// Is the use-def chain of the specified kind dirty?
4808  {
4809  int bit = get_ud_dirty_bit(gctype);
4810  return (dirty & bit) != 0;
4811  }
4812 
4813  /// Is the def-use chain of the specified kind dirty?
4815  {
4816  int bit = get_du_dirty_bit(gctype);
4817  return (dirty & bit) != 0;
4818  }
4819  int get_chain_stamp(void) const { return chain_stamp; }
4820 
4821  /// Get use-def chains.
4822  graph_chains_t *hexapi get_ud(gctype_t gctype);
4823 
4824  /// Get def-use chains.
4825  graph_chains_t *hexapi get_du(gctype_t gctype);
4826 
4827  /// Is LIST redefined in the graph?
4828  bool is_redefined_globally(const mlist_t &list, int b1, int b2, const minsn_t *m1, const minsn_t *m2, maymust_t maymust=MAY_ACCESS) const
4829  { return is_accessed_globally(list, b1, b2, m1, m2, WRITE_ACCESS, maymust); }
4830 
4831  /// Is LIST used in the graph?
4832  bool is_used_globally(const mlist_t &list, int b1, int b2, const minsn_t *m1, const minsn_t *m2, maymust_t maymust=MAY_ACCESS) const
4833  { return is_accessed_globally(list, b1, b2, m1, m2, READ_ACCESS, maymust); }
4834 
4835  mblock_t *get_mblock(int n) const { return mba->get_mblock(n); }
4836 };
4837 
4838 //-------------------------------------------------------------------------
4839 // helper for codegen_t. It takes into account delay slots
4841 {
4842  ea_t ea; // next insn to decode
4843  ea_t end; // end of the block
4844  ea_t dslot; // address of the insn in the delay slot
4845  insn_t dslot_insn; // instruction in the delay slot
4846  bool is_separate_dslot; // the current insn is the separate delay slot
4847  // insn (when the delay slot starts a block)
4848  bool is_likely_dslot; // execute delay slot only when jumping
4849 
4851  : ea(BADADDR),
4852  end(BADADDR),
4853  dslot(BADADDR),
4854  is_separate_dslot(false),
4855  is_likely_dslot(false) {}
4856  cdg_insn_iterator_t(const cdg_insn_iterator_t &r) = default;
4857  cdg_insn_iterator_t &operator=(const cdg_insn_iterator_t &r) = default;
4858 
4859  bool ok() const { return ea < end; }
4860  bool has_dslot() const { return dslot != BADADDR; }
4861  bool dslot_with_xrefs() const { return dslot >= end; }
4862  void start(const range_t &rng)
4863  {
4864  ea = rng.start_ea;
4865  end = rng.end_ea;
4866  }
4867  merror_t hexapi next(insn_t *ins);
4868 };
4869 
4870 //-------------------------------------------------------------------------
4871 /// Helper class to generate the initial microcode
4873 {
4874 public:
4875  mbl_array_t *mba; // ptr to mbl array
4876  mblock_t *mb; // current basic block
4877  insn_t insn; // instruction to generate microcode for
4878  char ignore_micro; // value of get_ignore_micro() for the insn
4879  cdg_insn_iterator_t ii; // instruction iterator
4880 
4881  codegen_t(mbl_array_t *m) : mba(m), mb(NULL), ignore_micro(IM_NONE) {}
4882  virtual ~codegen_t(void)
4883  {
4884  }
4885 
4886  /// Analyze prolog/epilog of the function to decompile.
4887  /// If prolog is found, allocate and fill 'mba->pi' structure.
4888  /// \param fc flow chart
4889  /// \param reachable bitmap of reachable blocks
4890  /// \return error code
4891  virtual merror_t idaapi analyze_prolog(
4892  const class qflow_chart_t &fc,
4893  const class bitset_t &reachable) = 0;
4894 
4895  /// Generate microcode for one instruction.
4896  /// The instruction is in INSN
4897  /// \return MERR_OK - all ok
4898  /// MERR_BLOCK - all ok, need to switch to new block
4899  /// MERR_BADBLK - delete current block and continue
4900  /// other error codes are fatal
4901  virtual merror_t idaapi gen_micro() = 0;
4902 
4903  /// Generate microcode to load one operand.
4904  virtual mreg_t idaapi load_operand(int opnum) = 0;
4905 
4906  /// Emit one microinstruction.
4907  /// See explanations for emit().
4908  virtual minsn_t *idaapi emit_micro_mvm(
4909  mcode_t code,
4910  op_dtype_t dtype,
4911  uval_t l,
4912  uval_t r,
4913  uval_t d,
4914  int offsize)
4915  {
4916  return emit(code, get_dtype_size(dtype), l, r, d, offsize);
4917  }
4918 
4919  /// Emit one microinstruction.
4920  /// The L, R, D arguments usually mean the register number. However, they depend
4921  /// on CODE. For example:
4922  /// - for m_goto and m_jcnd L is the target address
4923  /// - for m_ldc L is the constant value to load
4924  /// \param code instruction opcode
4925  /// \param width operand size in bytes
4926  /// \param l left operand
4927  /// \param r right operand
4928  /// \param d destination operand
4929  /// \param offsize for ldx/stx, the size of the offset operand
4930  /// for ldc, operand number of the constant value
4931  /// -1, set the FP instruction (e.g. for m_mov)
4932  /// \return created microinstruction. can be NULL if the instruction got
4933  /// immediately optimized away.
4934  minsn_t *hexapi emit(mcode_t code, int width, uval_t l, uval_t r, uval_t d, int offsize);
4935 
4936  /// Emit one microinstruction.
4937  /// This variant accepts pointers to operands. It is more difficult to use
4938  /// but permits to create virtually any instruction. Operands may be NULL
4939  /// when it makes sense.
4940  minsn_t *