hexrays.hpp
Go to the documentation of this file.
1 /*
2  * Hex-Rays Decompiler project
3  * Copyright (c) 1990-2015 Hex-Rays
4  * ALL RIGHTS RESERVED.
5  */
6 
7 #ifndef __HEXRAYS_HPP
8 #define __HEXRAYS_HPP
9 
10 #define CHECKED_BUILD
11 #include <pro.h>
12 #include <fpro.h>
13 #include <ida.hpp>
14 #include <idp.hpp>
15 #include <gdl.hpp>
16 #include <ieee.h>
17 #include <loader.hpp>
18 #include <kernwin.hpp>
19 #include <typeinf.hpp>
20 #include <set>
21 #include <map>
22 #include <deque>
23 #include <queue>
24 #include <algorithm>
25 /*-
26  * Decompiler project (internal name: vd)
27  * Copyright (c) 2005-2018 Hex-Rays SA <[email protected]>
28  * ALL RIGHTS RESERVED.
29  *
30  * The virtual micro machine has many registers.
31  * Each register is 8 bits wide. Multibyte processor registers are mapped
32  * to adjacent microregisters. Processor condition codes are also
33  * represented by microregisters. The unaliasable part of the stack
34  * frame is too mapped to microregisters. The microregisters are grouped
35  * into following groups:
36  * 0..7: condition codes
37  * 8..n: all processor registers (including fpu registers, if necessary)
38  * this range may also include temporary registers used during
39  * the initial microcode generation
40  * n..n+256: so called kernel registers; they are used during optimization
41  * see is_kreg()
42  * n+256..: unaliasable part of the stack frame, so called
43  * virtual stack registers. see get_first_stack_reg()
44  *
45  * Each micro-instruction (minsn_t) has zero to three operands.
46  * Some of the possible operands types are:
47  * - immediate value
48  * - register
49  * - memory reference
50  * - result of another micro-instruction
51  *
52  * The operands (mop_t) are l (left), r (right), d (destination).
53  * 'd' is almost always the destination but there are exceptions.
54  * See is_dest_target_mcode(). For example, stx does not modify 'd'.
55  * See the opcode map below for the list of microinstructions and their
56  * operands. Most instructions are very simple and do not need
57  * detailed explanations. There are no side effects.
58  *
59  * Each operand has a size specifier. The following sizes can be used in
60  * practically all contexts: 1, 2, 4, 8, 16 bytes. Floating types may have
61  * other sizes. Functions may return objects of arbitrary size, as well as
62  * operations upon UDT's (user-defined types, which are structs are unions).
63  *
64  * Memory is considered to consist of several segments.
65  * We don't make any assumptions about the segments: whether they overlap
66  * or not, etc. A memory reference is made using a (selector, offset) pair.
67  * A selector is always 2 bytes long. An offset can be 2 or 4 bytes long.
68  * Currently the selectors are not used very much. The decompiler tries to
69  * resolve (selector, offset) pairs into direct memory references at each
70  * opportunity and then operates on mop_v operands. In other words,
71  * while the decompiler can handle segmented memory models, internally
72  * it still uses simple linear addresses.
73  *
74  * The following memory regions are recognized:
75  * - GLBLOW global memory: low part, everything below the stack
76  * - LVARS stack: local variables
77  * - RETADDR stack: return address
78  * - SHADOW stack: shadow arguments
79  * - ARGS stack: regular stack arguments
80  * - GLBHIGH global memory: high part, everything below the stack
81  * Any stack region may be empty. Objects residing in one memory region
82  * are considered to be completely distinct from objects in other regions.
83  * We allocate the stack frame in some memory region, which is not
84  * allocated for any purposes in IDA. This permits us to use linear addresses
85  * for all memory references, including the stack frame.
86  *
87  * If the operand size is bigger than a byte then the register
88  * operand references a block of registers. For example:
89  *
90  * ldc #1.4, r8.4
91  *
92  * loads a constant 1 to registers 8, 9, 10, 11:
93  *
94  * #1 -> r8
95  * #0 -> r9
96  * #0 -> r10
97  * #0 -> r11
98  *
99  * This example uses little-endian byte ordering.
100  * Big-endian byte ordering is supported too.
101  *
102  * Each instruction has 'next' and 'prev' fields that are used to form
103  * a doubly linked list. Such lists are present for each basic block (mblock_t).
104  * Basic blocks have other attributes, including:
105  * - dead_at_start: list of dead locations at the block start
106  * - maybuse: list of locations the block may use
107  * - maybdef: list of locations the block may define (or spoil)
108  * - mustbuse: list of locations the block will certainly use
109  * - mustbdef: list of locations the block will certainly define
110  * - dnu: list of locations the block will certainly define
111  * but will not use
112  *
113  * These lists are represented by the mlist_t class. It consists of 2 parts:
114  * - rlist_t: list of microregisters (possibly including virtual stack locations)
115  * - ivlset_t: list of memory locations represented as intervals
116  * we use linear addresses in this list.
117  * The mlist_t class is used quite often. For example, to find what an operand
118  * can spoil, we build its 'maybe-use' list. Then we can find out if this list
119  * is accessed using the is_accessed() or is_accessed_globally() functions.
120  *
121  * All basic blocks of the decompiled function constite an array called
122  * mbl_array_t (array of microblocks). This is a huge class that has too
123  * many fields to describe here (some of the fields are not visible in the sdk)
124  * The most importants ones are:
125  * - stack frame: frregs, stacksize, etc
126  * - memory: aliased, restricted, and other ranges
127  * - type: type of the current function, its arguments (argidx) and
128  * local variables (vars)
129  * - natural: array of basic blocks. they are also accessible as a
130  * doubly linked list starting from 'blocks'.
131  * - bg: control flow graph. the graph gives access to use use-def
132  * chains, which how data dependencies between basic blocks
133  *
134  */
135 
136 #ifdef __VC__
137 #pragma warning(push)
138 #pragma warning(disable:4062) // enumerator 'x' in switch of enum 'y' is not handled
139 #pragma warning(disable:4265) // virtual functions without virtual destructor
140 #endif
141 
142 #define hexapi ///< Public functions are marked with this keyword
143 
144 //-V:2:654 The condition '2' of loop is always true.
145 //-V::719 The switch statement does not cover all values
146 //-V:verify:678
147 //-V:chain_keeper_t:690 copy ctr will be generated
148 //-V:ivlset_t:690 lacks the '=' operator
149 //-V:add_block:656 call to the same function
150 //-V:add:792 The 'add' function located to the right of the operator '|' will be called regardless of the value of the left operand
151 //-V:sub:792 The 'sub' function located to the right of the operator '|' will be called regardless of the value of the left operand
152 //-V:intersect:792 The 'intersect' function located to the right of the operator '|' will be called regardless of the value of the left operand
153 class mop_t;
154 class mop_pair_t;
155 class mop_addr_t;
156 class mfuncinfo_t;
157 class mcases_t;
158 class minsn_t;
159 class mblock_t;
160 class mbl_array_t;
161 class codegen_t;
162 class mbl_graph_t;
163 struct vdui_t;
164 struct hexrays_failure_t;
165 struct mba_stats_t;
166 struct mlist_t;
167 typedef int mreg_t; ///< Micro register
168 
169 struct cfunc_t;
170 struct citem_t;
171 struct cexpr_t;
172 struct cinsn_t;
173 struct cblock_t;
174 struct cswitch_t;
175 struct carg_t;
176 struct carglist_t;
177 
178 typedef std::set<ea_t> easet_t;
179 typedef std::set<minsn_t *> minsn_ptr_set_t;
180 typedef std::set<qstring> strings_t;
181 typedef qvector<minsn_t*> minsnptrs_t;
182 typedef qvector<mop_t*> mopptrs_t;
183 typedef qvector<mop_t> mopvec_t;
184 typedef qvector<uint64> uint64vec_t;
185 typedef qvector<mreg_t> mregvec_t;
186 #define MAX_SUPPORTED_STACK_SIZE 0x100000 // 1MByte
187 
188 //-------------------------------------------------------------------------
189 enum access_type_t
190 {
191  NO_ACCESS = 0,
192  WRITE_ACCESS = 1,
193  READ_ACCESS = 2,
194  RW_ACCESS = WRITE_ACCESS | READ_ACCESS,
195 };
196 
197 typedef int maymust_t;
198 const maymust_t
199  MUST_ACCESS = 0x00, // access information we can count on
200  MAY_ACCESS = 0x01, // access information we should take into account
201  MAYMUST_ACCESS_MASK = 0x01,
202 
203  ONE_ACCESS_TYPE = 0x20, // for find_first_use:
204  // use only the specified maymust access type
205  // (by default it inverts the access type for def-lists)
206  INCLUDE_SPOILED_REGS = 0x40, // for build_def_list with MUST_ACCESS:
207  // include spoiled registers in the list
208  EXCLUDE_PASS_REGS = 0x80, // for build_def_list with MAY_ACCESS:
209  // exclude pass_regs from the list
210  FULL_XDSU = 0x100, // for build_def_list:
211  // if xdsu source and targets are the same
212  // treat it as if xdsu redefines the whole destination
213  WITH_ASSERTS = 0x200, // for find_first_use:
214  // do not ignore assertions
215  EXCLUDE_VOLATILE = 0x400, // for build_def_list:
216  // exclude volatile memory from the list
217  INCLUDE_UNUSED_SRC = 0x800; // for build_use_list:
218  // do not exclude unused source bytes for m_and/m_or insns
219 
220 inline bool is_may_access(maymust_t maymust)
221 {
222  return (maymust & MAYMUST_ACCESS_MASK) != MUST_ACCESS;
223 }
224 
225 //-------------------------------------------------------------------------
226 /// \defgroup MERR_ Microcode error codes
227 //@{
229 {
230  MERR_OK = 0, ///< ok
231  MERR_BLOCK = 1, ///< no error, switch to new block
232  MERR_INTERR = -1, ///< internal error
233  MERR_INSN = -2, ///< can not convert to microcode
234  MERR_MEM = -3, ///< not enough memory
235  MERR_BADBLK = -4, ///< bad block found
236  MERR_BADSP = -5, ///< positive sp value has been found
237  MERR_PROLOG = -6, ///< prolog analysis failed
238  MERR_SWITCH = -7, ///< wrong switch idiom
239  MERR_EXCEPTION = -8, ///< exception analysis failed
240  MERR_HUGESTACK = -9, ///< stack frame is too big
241  MERR_LVARS = -10, ///< local variable allocation failed
242  MERR_BITNESS = -11, ///< only 32/16bit functions can be decompiled
243  MERR_BADCALL = -12, ///< could not determine call arguments
244  MERR_BADFRAME = -13, ///< function frame is wrong
245  MERR_UNKTYPE = -14, ///< undefined type %s (currently unused error code)
246  MERR_BADIDB = -15, ///< inconsistent database information
247  MERR_SIZEOF = -16, ///< wrong basic type sizes in compiler settings
248  MERR_REDO = -17, ///< redecompilation has been requested
249  MERR_CANCELED = -18, ///< decompilation has been cancelled
250  MERR_RECDEPTH = -19, ///< max recursion depth reached during lvar allocation
251  MERR_OVERLAP = -20, ///< variables would overlap: %s
252  MERR_PARTINIT = -21, ///< partially initialized variable %s
253  MERR_COMPLEX = -22, ///< too complex function
254  MERR_LICENSE = -23, ///< no license available
255  MERR_ONLY32 = -24, ///< only 32-bit functions can be decompiled for the current database
256  MERR_ONLY64 = -25, ///< only 64-bit functions can be decompiled for the current database
257  MERR_BUSY = -26, ///< already decompiling a function
258  MERR_FARPTR = -27, ///< far memory model is supported only for pc
259  MERR_EXTERN = -28, ///< special segments can not be decompiled
260  MERR_FUNCSIZE = -29, ///< too big function
261  MERR_BADRANGES = -30, ///< bad input ranges
262  MERR_STOP = -31, ///< no error, stop the analysis
263  MERR_MAX_ERR = 31,
264  MERR_LOOP = -32, ///< internal code: redo last loop (never reported)
265 };
266 //@}
267 
268 /// Get textual description of an error code
269 /// \param out the output buffer for the error description
270 /// \param code \ref MERR_
271 /// \param mba the microcode array
272 /// \return the error address
273 
274 ea_t hexapi get_merror_desc(qstring *out, merror_t code, mbl_array_t *mba);
275 
276 ///------------------------------------------------------------------------
277 /// Map a processor register to microregister.
278 /// \param reg processor register number
279 /// \return microregister register id or mr_none
280 
281 mreg_t hexapi reg2mreg(int reg);
282 
283 
284 /// Map a microregister to processor register.
285 /// \param reg microregister number
286 /// \param width size of microregister in bytes
287 /// \return processor register id or -1
288 
289 int hexapi mreg2reg(mreg_t reg, int width);
290 
291 
292 //-------------------------------------------------------------------------
293 /// User defined callbacks to optimize individual microcode instructions
294 struct optinsn_t
295 {
296  /// Optimize an instruction.
297  /// \param blk current basic block. maybe NULL, which means that
298  /// the instruction must be optimized without context
299  /// \param ins instruction to optimize; it may be a sub-instruction of some
300  /// other instruction. such sub-instructions may be optimized up
301  /// to "mov x,x". for example: add x,0,x => mov x,x.
302  /// top level instruction may be optimized up to "nop".
303  /// \return number of changes made to the instruction.
304  virtual int idaapi func(mblock_t *blk, minsn_t *ins) = 0;
305 };
306 
307 void hexapi install_optinsn_handler(optinsn_t *opt);
308 bool hexapi remove_optinsn_handler(optinsn_t *opt);
309 
310 /// User defined callbacks to optimize microcode blocks
312 {
313  /// Optimize a block.
314  /// This function usually performs the optimizations that require analyzing
315  /// the entire block and/or its neighbors. For example it can recognize
316  /// patterns and perform conversions like:
317  /// b0: b0:
318  /// ... ...
319  /// jnz x, 0, @b2 => jnz x, 0, @b2
320  /// b1: b1:
321  /// add x, 0, y mov x, y
322  /// ... ...
323  /// \param blk Basic block to optimize as a whole.
324  /// \return number of changes made to the block.
325  virtual int idaapi func(mblock_t *blk) = 0;
326 };
327 
328 void hexapi install_optblock_handler(optblock_t *opt);
329 bool hexapi remove_optblock_handler(optblock_t *opt);
330 
331 
332 //-------------------------------------------------------------------------
333 // The order of setX and jX insns is important, it is used in the code.
334 
335 // Instructions marked with *F may have FPINSN bit set and operate on fp values
336 // Instructions marked with +F must have FPINSN bit set. They always operate on fp values
337 // Other instructions do not operate on fp values.
338 
339 enum mcode_t
340 {
341  m_nop = 0x00, // nop // no operation
342  m_stx = 0x01, // stx l, {r=sel, d=off} // store register to memory *F
343  m_ldx = 0x02, // ldx {l=sel,r=off}, d // load register from memory *F
344  m_ldc = 0x03, // ldc l=const, d // load constant
345  m_mov = 0x04, // mov l, d // move *F
346  m_neg = 0x05, // neg l, d // negate
347  m_lnot = 0x06, // lnot l, d // logical not
348  m_bnot = 0x07, // bnot l, d // bitwise not
349  m_xds = 0x08, // xds l, d // extend (signed)
350  m_xdu = 0x09, // xdu l, d // extend (unsigned)
351  m_low = 0x0A, // low l, d // take low part
352  m_high = 0x0B, // high l, d // take high part
353  m_add = 0x0C, // add l, r, d // l + r -> dst
354  m_sub = 0x0D, // sub l, r, d // l - r -> dst
355  m_mul = 0x0E, // mul l, r, d // l * r -> dst
356  m_udiv = 0x0F, // udiv l, r, d // l / r -> dst
357  m_sdiv = 0x10, // sdiv l, r, d // l / r -> dst
358  m_umod = 0x11, // umod l, r, d // l % r -> dst
359  m_smod = 0x12, // smod l, r, d // l % r -> dst
360  m_or = 0x13, // or l, r, d // bitwise or
361  m_and = 0x14, // and l, r, d // bitwise and
362  m_xor = 0x15, // xor l, r, d // bitwise xor
363  m_shl = 0x16, // shl l, r, d // shift logical left
364  m_shr = 0x17, // shr l, r, d // shift logical right
365  m_sar = 0x18, // sar l, r, d // shift arithmetic right
366  m_cfadd = 0x19, // cfadd l, r, d=carry // calculate carry bit of (l+r)
367  m_ofadd = 0x1A, // ofadd l, r, d=overf // calculate overflow bit of (l+r)
368  m_cfshl = 0x1B, // cfshl l, r, d=carry // calculate carry bit of (l<<r)
369  m_cfshr = 0x1C, // cfshr l, r, d=carry // calculate carry bit of (l>>r)
370  m_sets = 0x1D, // sets l, d=byte SF=1 Sign
371  m_seto = 0x1E, // seto l, r, d=byte OF=1 Overflow of (l-r)
372  m_setp = 0x1F, // setp l, r, d=byte PF=1 Unordered/Parity *F
373  m_setnz = 0x20, // setnz l, r, d=byte ZF=0 Not Equal *F
374  m_setz = 0x21, // setz l, r, d=byte ZF=1 Equal *F
375  m_setae = 0x22, // setae l, r, d=byte CF=0 Above or Equal *F
376  m_setb = 0x23, // setb l, r, d=byte CF=1 Below *F
377  m_seta = 0x24, // seta l, r, d=byte CF=0 & ZF=0 Above *F
378  m_setbe = 0x25, // setbe l, r, d=byte CF=1 | ZF=1 Below or Equal *F
379  m_setg = 0x26, // setg l, r, d=byte SF=OF & ZF=0 Greater
380  m_setge = 0x27, // setge l, r, d=byte SF=OF Greater or Equal
381  m_setl = 0x28, // setl l, r, d=byte SF!=OF Less
382  m_setle = 0x29, // setle l, r, d=byte SF!=OF | ZF=1 Less or Equal
383  m_jcnd = 0x2A, // jcnd l, d // d is mop_v or mop_b
384  m_jnz = 0x2B, // jnz l, r, d // ZF=0 Not Equal *F
385  m_jz = 0x2C, // jz l, r, d // ZF=1 Equal *F
386  m_jae = 0x2D, // jae l, r, d // CF=0 Above or Equal *F
387  m_jb = 0x2E, // jb l, r, d // CF=1 Below *F
388  m_ja = 0x2F, // ja l, r, d // CF=0 & ZF=0 Above *F
389  m_jbe = 0x30, // jbe l, r, d // CF=1 | ZF=1 Below or Equal *F
390  m_jg = 0x31, // jg l, r, d // SF=OF & ZF=0 Greater
391  m_jge = 0x32, // jge l, r, d // SF=OF Greater or Equal
392  m_jl = 0x33, // jl l, r, d // SF!=OF Less
393  m_jle = 0x34, // jle l, r, d // SF!=OF | ZF=1 Less or Equal
394  m_jtbl = 0x35, // jtbl l, r=mcases // Table jump
395  m_ijmp = 0x36, // ijmp {r=sel, d=off} // indirect unconditional jump
396  m_goto = 0x37, // goto l // l is mop_v or mop_b
397  m_call = 0x38, // call l d // l is mop_v or mop_b or mop_h
398  m_icall = 0x39, // icall {l=sel, r=off} d // indirect call
399  m_ret = 0x3A, // ret
400  m_push = 0x3B, // push l
401  m_pop = 0x3C, // pop d
402  m_und = 0x3D, // und d // undefine
403  m_ext = 0x3E, // ext in1, in2, out1 // external insn, not microcode *F
404  m_f2i = 0x3F, // f2i l, d int(l) => d; convert fp -> integer +F
405  m_f2u = 0x40, // f2u l, d uint(l)=> d; convert fp -> uinteger +F
406  m_i2f = 0x41, // i2f l, d fp(l) => d; convert integer -> fp e +F
407  m_u2f = 0x42, // i2f l, d fp(l) => d; convert uinteger -> fp +F
408  m_f2f = 0x43, // f2f l, d l => d; change fp precision +F
409  m_fneg = 0x44, // fneg l, d -l => d; change sign +F
410  m_fadd = 0x45, // fadd l, r, d l + r => d; add +F
411  m_fsub = 0x46, // fsub l, r, d l - r => d; subtract +F
412  m_fmul = 0x47, // fmul l, r, d l * r => d; multiply +F
413  m_fdiv = 0x48, // fdiv l, r, d l / r => d; divide +F
414 #define m_max 0x49 // first unused opcode
415 };
416 
417 bool must_mcode_close_block(mcode_t opcode, bool including_calls);
418 bool is_mcode_propagatable(mcode_t mcode);
419 inline bool is_mcode_addsub(mcode_t mcode) { return mcode == m_add || mcode == m_sub; }
420 inline bool is_mcode_xdsu(mcode_t mcode) { return mcode == m_xds || mcode == m_xdu; }
421 inline bool is_mcode_set(mcode_t mcode) { return mcode >= m_sets && mcode <= m_setle; }
422 inline bool is_mcode_set1(mcode_t mcode) { return mcode == m_sets; }
423 inline bool is_mcode_j1(mcode_t mcode) { return mcode == m_jcnd; }
424 inline bool is_mcode_jcond(mcode_t mcode) { return mcode >= m_jcnd && mcode <= m_jle; }
425 inline bool is_mcode_convertible_to_jmp(mcode_t mcode) { return mcode >= m_setnz && mcode <= m_setle; }
426 inline bool is_mcode_convertible_to_set(mcode_t mcode) { return mcode >= m_jnz && mcode <= m_jle; }
427 inline bool is_mcode_call(mcode_t mcode) { return mcode == m_call || mcode == m_icall; }
428 inline bool is_mcode_fpu(mcode_t mcode) { return mcode >= m_f2i; } // must be fpu
429 inline bool is_mcode_commutative(mcode_t mcode)
430 {
431  return mcode == m_add
432  || mcode == m_mul
433  || mcode == m_or
434  || mcode == m_and
435  || mcode == m_xor
436  || mcode == m_setz
437  || mcode == m_setnz
438  || mcode == m_cfadd
439  || mcode == m_ofadd;
440 }
441 inline bool is_mcode_shift(mcode_t mcode)
442 {
443  return mcode == m_shl
444  || mcode == m_shr
445  || mcode == m_sar;
446 }
447 
448 // Convert setX opcode into corresponding jX opcode
449 // This function relies on the order of setX and jX opcodes!
450 inline mcode_t set2jcnd(mcode_t code)
451 {
452  return mcode_t(code - m_setnz + m_jnz);
453 }
454 
455 // Convert setX opcode into corresponding jX opcode
456 // This function relies on the order of setX and jX opcodes!
457 inline mcode_t jcnd2set(mcode_t code)
458 {
459  return mcode_t(code + m_setnz - m_jnz);
460 }
461 
462 mcode_t hexapi negate_mcode_relation(mcode_t code);
463 mcode_t hexapi swap_mcode_relation(mcode_t code);
464 mcode_t hexapi get_signed_mcode(mcode_t code);
465 mcode_t hexapi get_unsigned_mcode(mcode_t code);
466 inline bool is_signed_mcode(mcode_t code) { return get_unsigned_mcode(code) != code; }
467 inline bool is_unsigned_mcode(mcode_t code) { return get_signed_mcode(code) != code; }
468 
469 // is the 'd' field a destination microreg?
470 // insns like jcnd, ijmp, stx etc. reuse 'd' as a source operand
471 // NB: use minsn_t::is_dest_target() if you have minsn_t (it returns proper value for m_ext)
472 bool hexapi is_dest_target_mcode(mcode_t mcode);
473 
474 // Map the processor condition codes to the first virtual registers:
475 const mreg_t mr_none = mreg_t(-1);
476 const mreg_t mr_cf = mreg_t(0); // the order is important, see mop_t::is_cc()
477 const mreg_t mr_zf = mreg_t(1);
478 const mreg_t mr_sf = mreg_t(2);
479 const mreg_t mr_of = mreg_t(3);
480 const mreg_t mr_pf = mreg_t(4);
481 const int cc_count = mr_pf - mr_cf + 1; // number of condition code registers
482 const mreg_t mr_cc = mreg_t(5); // synthetic condition code, used internally
483 const mreg_t mr_first = mreg_t(8); // the first processor specific register
484 
485 // It is ok to use the gap between mr_pf and mr_first for bit registers
486 
487 //------------------------------------------------------------------------
488 /// Macro to declare standard inline comparison operators
489 #define DECLARE_COMPARISON_OPERATORS(type) \
490  bool operator==(const type &r) const { return compare(r) == 0; } \
491  bool operator!=(const type &r) const { return compare(r) != 0; } \
492  bool operator< (const type &r) const { return compare(r) < 0; } \
493  bool operator> (const type &r) const { return compare(r) > 0; } \
494  bool operator<=(const type &r) const { return compare(r) <= 0; } \
495  bool operator>=(const type &r) const { return compare(r) >= 0; }
496 
497 /// Macro to declare comparisons for our classes
498 /// All comparison operators call the compare() function which returns -1/0/1
499 #define DECLARE_COMPARISONS(type) \
500  DECLARE_COMPARISON_OPERATORS(type) \
501  friend int compare(const type &a, const type &b) { return a.compare(b); } \
502  int compare(const type &r) const
503 
504 /// Operand locator.
506 {
507 private:
508  //forbid the default constructor
509  operand_locator_t(void) {}
510 public:
511  ea_t ea; ///< address of the original instruction
512  int opnum; ///< operand number in the instruction
513  operand_locator_t(ea_t _ea, int _opnum) : ea(_ea), opnum(_opnum) {}
514  DECLARE_COMPARISONS(operand_locator_t);
515  DEFINE_MEMORY_ALLOCATION_FUNCS()
516 };
517 
518 //-------------------------------------------------------------------------
519 /// Number represenation.
520 /// This structure holds information about number format.
522 {
523  DEFINE_MEMORY_ALLOCATION_FUNCS()
524  flags_t flags; ///< ida flags, which describe number radix, enum, etc
525  char opnum; ///< operand number: 0..UA_MAXOP
526  char props; ///< properties: combination of NF_ bits (\ref NF_)
527 /// \defgroup NF_ Number format property bits
528 /// Used in number_format_t::props
529 //@{
530 #define NF_FIXED 0x01 ///< number format has been defined by the user
531 #define NF_NEGDONE 0x02 ///< temporary internal bit: negation has been performed
532 #define NF_BINVDONE 0x04 ///< temporary internal bit: inverting bits is done
533 #define NF_NEGATE 0x08 ///< The user asked to negate the constant
534 #define NF_BITNOT 0x10 ///< The user asked to invert bits of the constant
535 #define NF_STROFF 0x20 ///< internal bit: used as stroff, valid iff is_stroff()
536 //@}
537  uchar serial; ///< for enums: constant serial number
538  char org_nbytes; ///< original number size in bytes
539  qstring type_name; ///< for stroffs: structure for offsetof()\n
540  ///< for enums: enum name
541  /// Contructor
542  number_format_t(int _opnum=0)
543  : flags(0), opnum(char(_opnum)), props(0), serial(0), org_nbytes(0) {}
544  /// Get number radix
545  /// \return 2,8,10, or 16
546  int get_radix(void) const { return ::get_radix(flags, opnum); }
547  /// Is number representation fixed?
548  /// Fixed representation can not be modified by the decompiler
549  bool is_fixed(void) const { return props != 0; }
550  /// Is a hexadecimal number?
551  bool is_hex(void) const { return ::is_numop(flags, opnum) && get_radix() == 16; }
552  /// Is a decimal number?
553  bool is_dec(void) const { return ::is_numop(flags, opnum) && get_radix() == 10; }
554  /// Is a octal number?
555  bool is_oct(void) const { return ::is_numop(flags, opnum) && get_radix() == 8; }
556  /// Is a symbolic constant?
557  bool is_enum(void) const { return ::is_enum(flags, opnum); }
558  /// Is a character constant?
559  bool is_char(void) const { return ::is_char(flags, opnum); }
560  /// Is a structure field offset?
561  bool is_stroff(void) const { return ::is_stroff(flags, opnum); }
562  /// Is a number?
563  bool is_numop(void) const { return !is_enum() && !is_char() && !is_stroff(); }
564  /// Does the number need to be negated or bitwise negated?
565  /// Returns true if the user requested a negation but it is not done yet
566  bool needs_to_be_inverted(void) const
567  {
568  return (props & (NF_NEGATE|NF_BITNOT)) != 0 // the user requested it
569  && (props & (NF_NEGDONE|NF_BINVDONE)) == 0; // not done yet
570  }
571 };
572 
573 // Number formats are attached to (ea,opnum) pairs
574 typedef std::map<operand_locator_t, number_format_t> user_numforms_t;
575 
576 //-------------------------------------------------------------------------
577 /// Base helper class to convert binary data structures into text.
578 /// Other classes are derived from this class.
580 {
581  qstring tmpbuf;
582  int hdrlines; ///< number of header lines (prototype+typedef+lvars)
583  ///< valid at the end of print process
584  /// Print.
585  /// This function is called to generate a portion of the output text.
586  /// The output text may contain color codes.
587  /// \return the number of printed characters
588  /// \param indent number of spaces to generate as prefix
589  /// \param format printf-style format specifier
590  /// \return length of printed string
591  AS_PRINTF(3, 4) virtual int hexapi print(int indent, const char *format,...);
592  DEFINE_MEMORY_ALLOCATION_FUNCS()
593 };
594 
595 /// Helper class to convert cfunc_t into text.
596 struct vc_printer_t : public vd_printer_t
597 {
598  const cfunc_t *func; ///< cfunc_t to generate text for
599  char lastchar; ///< internal: last printed character
600  /// Constructor
601  vc_printer_t(const cfunc_t *f) : func(f), lastchar(0) {}
602  /// Are we generating one-line text representation?
603  /// \return \c true if the output will occupy one line without line breaks
604  virtual bool idaapi oneliner(void) const { return false; }
605 };
606 
607 /// Helper class to convert binary data structures into text and put into a file.
609 {
610  FILE *fp; ///< Output file pointer
611  /// Print.
612  /// This function is called to generate a portion of the output text.
613  /// The output text may contain color codes.
614  /// \return the number of printed characters
615  /// \param indent number of spaces to generate as prefix
616  /// \param format printf-style format specifier
617  /// \return length of printed string
618  AS_PRINTF(3, 4) int hexapi print(int indent, const char *format, ...);
619  /// Constructor
620  file_printer_t(FILE *_fp) : fp(_fp) {}
621 };
622 
623 /// Helper class to convert cfunc_t into a text string
625 {
626  bool with_tags; ///< Generate output with color tags
627  qstring &s; ///< Reference to the output string
628  /// Constructor
629  qstring_printer_t(const cfunc_t *f, qstring &_s, bool tags)
630  : vc_printer_t(f), with_tags(tags), s(_s) {}
631  /// Print.
632  /// This function is called to generate a portion of the output text.
633  /// The output text may contain color codes.
634  /// \return the number of printed characters
635  /// \param indent number of spaces to generate as prefix
636  /// \param format printf-style format specifier
637  /// \return length of printed string
638  AS_PRINTF(3, 4) int hexapi print(int indent, const char *format, ...);
639 };
640 
641 //-------------------------------------------------------------------------
642 /// \defgroup type Type string related declarations
643 /// Type related functions and class.
644 //@{
645 
646 /// Verify a type string.
647 /// \return true if type string is correct
648 
649 bool hexapi is_type_correct(const type_t *ptr);
650 
651 
652 /// Is a small structure or union?
653 /// \return true if the type is a small UDT (user defined type).
654 /// Small UDTs fit into a register (or pair or registers) as a rule.
655 
656 bool hexapi is_small_struni(const tinfo_t &tif);
657 
658 
659 /// Is definitely a non-boolean type?
660 /// \return true if the type is a non-boolean type (non bool and well defined)
661 
662 bool hexapi is_nonbool_type(const tinfo_t &type);
663 
664 
665 /// Is a boolean type?
666 /// \return true if the type is a boolean type
667 
668 bool hexapi is_bool_type(const tinfo_t &type);
669 
670 
671 /// Is a pointer or array type?
672 inline bool is_ptr_or_array(type_t t)
673 {
674  return is_type_ptr(t) || is_type_array(t);
675 }
676 
677 /// Is a pointer, array, or function type?
678 inline bool is_paf(type_t t)
679 {
680  return is_ptr_or_array(t) || is_type_func(t);
681 }
682 
683 /// Is struct/union/enum definition (not declaration)?
684 inline bool is_inplace_def(const tinfo_t &type)
685 {
686  return type.is_decl_complex() && !type.is_typeref();
687 }
688 
689 /// Calculate number of partial subtypes.
690 /// \return number of partial subtypes. The bigger is this number, the uglier is the type.
691 
692 int hexapi partial_type_num(const tinfo_t &type);
693 
694 
695 /// Get a type of a floating point value with the specified width
696 /// \returns type info object
697 /// \param width width of the desired type
698 
699 tinfo_t hexapi get_float_type(int width);
700 
701 
702 /// Create a type info by width and sign.
703 /// Returns a simple type (examples: int, short) with the given width and sign.
704 /// \param srcwidth size of the type in bytes
705 /// \param sign sign of the type
706 
707 tinfo_t hexapi get_int_type_by_width_and_sign(int srcwidth, type_sign_t sign);
708 
709 
710 /// Create a partial type info by width.
711 /// Returns a partially defined type (examples: _DWORD, _BYTE) with the given width.
712 /// \param size size of the type in bytes
713 
714 tinfo_t hexapi get_unk_type(int size);
715 
716 
717 /// Generate a dummy pointer type
718 /// \param ptrsize size of pointed object
719 /// \param isfp is floating point object?
720 
721 tinfo_t hexapi dummy_ptrtype(int ptrsize, bool isfp);
722 
723 
724 /// Get type of a structure field.
725 /// This function performs validity checks of the field type. Wrong types are rejected.
726 /// \param mptr structure field
727 /// \param type pointer to the variable where the type is returned. This parameter can be NULL.
728 /// \return false if failed
729 
730 bool hexapi get_member_type(const member_t *mptr, tinfo_t *type);
731 
732 
733 /// Create a pointer type.
734 /// This function performs the following conversion: "type" -> "type*"
735 /// \param type object type.
736 /// \return "type*". for example, if 'char' is passed as the argument,
737 // the function will return 'char *'
738 
739 tinfo_t hexapi make_pointer(const tinfo_t &type);
740 
741 
742 /// Create a reference to a named type.
743 /// \param name type name
744 /// \return type which refers to the specified name. For example, if name is "DWORD",
745 /// the type info which refers to "DWORD" is created.
746 
747 tinfo_t hexapi create_typedef(const char *name);
748 
749 
750 /// Create a reference to an ordinal type.
751 /// \param n ordinal number of the type
752 /// \return type which refers to the specified ordianl. For example, if n is 1,
753 /// the type info which refers to ordinal type 1 is created.
754 
755 inline tinfo_t create_typedef(int n)
756 {
757  tinfo_t tif;
758  tif.create_typedef(NULL, n);
759  return tif;
760 }
761 
762 /// Type source (where the type information comes from)
764 {
765  GUESSED_NONE, // not guessed, specified by the user
766  GUESSED_WEAK, // not guessed, comes from idb
767  GUESSED_FUNC, // guessed as a function
768  GUESSED_DATA, // guessed as a data item
769  TS_NOELL = 0x8000000, // can be used in set_type() to avoid merging into ellipsis
770  TS_SHRINK = 0x4000000, // can be used in set_type() to prefer smaller arguments
771  TS_MASK = 0xC000000, // all high bits
772 };
773 
774 inline int compare_typsrc(type_source_t s1, type_source_t s2)
775 {
776  if ( s1 > GUESSED_WEAK && s2 > GUESSED_WEAK )
777  return 0; // both guessed, consider equal
778  return compare(s1, s2);
779 }
780 
781 
782 /// Get a global type.
783 /// Global types are types of addressable objects and struct/union/enum types
784 /// \param id address or id of the object
785 /// \param tif buffer for the answer
786 /// \param guess what kind of types to consider
787 /// \return success
788 
789 bool hexapi get_type(uval_t id, tinfo_t *tif, type_source_t guess);
790 
791 
792 /// Set a global type.
793 /// \param id address or id of the object
794 /// \param tif new type info
795 /// \param source where the type comes from
796 /// \param force true means to set the type as is, false means to merge the
797 /// new type with the possibly existing old type info.
798 /// \return success
799 
800 bool hexapi set_type(uval_t id, const tinfo_t &tif, type_source_t source, bool force=false);
801 
802 //@}
803 
804 //-------------------------------------------------------------------------
805 // We use our own class to store argument and variable locations.
806 // The main differences between vdloc and argloc_t:
807 // VLOC_REG1: the offset is always 0, so it is not used. the register number
808 // uses the whole ~VLOC_MASK field.
809 // VLOCK_STKOFF: stack offsets are always positive because they are based on
810 // the lowest value of sp in the function.
811 class vdloc_t : public argloc_t
812 {
813  int regoff(void); // inaccessible & undefined: regoff() should not be used
814 public:
815  // use all available bits for register number for VLOC_REG1
816  int reg1(void) const { return atype() == ALOC_REG2 ? argloc_t::reg1() : get_reginfo(); }
817  void _set_reg1(int r1) { argloc_t::_set_reg1(r1, r1>>16); } // it works fine
818  void set_reg1(int r1) { cleanup_argloc(this); _set_reg1(r1); }
819  inline bool is_fpu_mreg() const;
820  const char *hexapi dstr(int width=0) const;
821 };
822 
823 void hexapi print_vdloc(qstring *vout, const vdloc_t &loc, int w);
824 //-------------------------------------------------------------------------
825 /// Do two arglocs overlap?
826 bool hexapi arglocs_overlap(const vdloc_t &loc1, size_t w1, const vdloc_t &loc2, size_t w2);
827 
828 /// Local variable locator. Local variables are located using: definition ea, location
830 {
831  vdloc_t location; ///< Variable location.
832  ea_t defea; ///< Definition address. The address of an instruction
833  ///< that initializes the variable. This value is
834  ///< assigned to each lvar by lvar allocator.
835  ///< BADADDR for function arguments
836  lvar_locator_t(void) : defea(BADADDR) {}
837  lvar_locator_t(const vdloc_t &loc, ea_t ea) : location(loc), defea(ea) {}
838  /// Calculate the variable location (only for continuous variables)
839  /// \return if the variable is register-hosted, the register number
840  /// otherwise, return the virtual stack register number that
841  /// corresponds to the stack location
842  sval_t hexapi get_regnum(void) const;
843  /// Is variable located on one register?
844  bool is_reg1(void) const { return location.is_reg1(); }
845  /// Is variable located on two registers?
846  bool is_reg2(void) const { return location.is_reg2(); }
847  /// Is variable located on register(s)?
848  bool is_reg_var(void) const { return location.is_reg(); }
849  /// Is variable located on the stack?
850  bool is_stk_var(void) const { return location.is_stkoff(); }
851  /// Is variable scattered?
852  bool is_scattered(void) const { return location.is_scattered(); }
853  /// Get number of the register for the variable
854  mreg_t get_reg1(void) const { return location.reg1(); }
855  /// Get number of the second register (only for tworeg lvars)
856  mreg_t get_reg2(void) const { return location.reg2(); }
857  /// Get information about scattered variable
858  const scattered_aloc_t &get_scattered(void) const { return location.scattered(); }
859  scattered_aloc_t &get_scattered(void) { return location.scattered(); }
861  DEFINE_MEMORY_ALLOCATION_FUNCS()
862  const char *hexapi dstr(void) const;
863 };
864 
865 /// Definition of a local variable (register or stack) #var #lvar
866 class lvar_t : public lvar_locator_t
867 {
868  friend class mbl_array_t;
869  int flags; ///< \ref CVAR_
870 /// \defgroup CVAR_ Local variable property bits
871 /// Used in lvar_t::flags
872 //@{
873 #define CVAR_USED 0x0001 ///< is used in the code?
874 #define CVAR_TYPE 0x0002 ///< the type is defined?
875 #define CVAR_NAME 0x0004 ///< has nice name?
876 #define CVAR_MREG 0x0008 ///< corresponding mregs were replaced?
877 #define CVAR_NOWD 0x0010 ///< width is unknown
878 #define CVAR_UNAME 0x0020 ///< user-defined name
879 #define CVAR_UTYPE 0x0040 ///< user-defined type
880 #define CVAR_RESULT 0x0080 ///< function result variable
881 #define CVAR_ARG 0x0100 ///< function argument
882 #define CVAR_FAKE 0x0200 ///< fake return variable
883 #define CVAR_OVER 0x0400 ///< overlapping variable
884 #define CVAR_FLOAT 0x0800 ///< used in a fpu insn
885 #define CVAR_SPOILED 0x1000 ///< internal flag, do not use: spoiled var
886 #define CVAR_MAPDST 0x2000 ///< other variables are mapped to this var
887 #define CVAR_PARTIAL 0x4000 ///< variable type is partialy defined
888 //@}
889 
890 public:
891  qstring name; ///< variable name.
892  ///< use mbl_array_t::set_nice_lvar_name() and
893  ///< mbl_array_t::set_user_lvar_name() to modify it
894  qstring cmt; ///< variable comment string
895  tinfo_t tif; ///< variable type
896  int width; ///< variable size in bytes
897  int defblk; ///< first block defining the variable.
898  ///< 0 for args, -1 if unknown
899  uint64 divisor; ///< max known divisor of the variable
900 
901  lvar_t(void) : flags(CVAR_USED), width(0), defblk(-1), divisor(0) {}
902  lvar_t(const qstring &n, const vdloc_t &l, ea_t e, const tinfo_t &t, int w, int db)
903  : lvar_locator_t(l, e), flags(CVAR_USED), name(n), tif(t), width(w),
904  defblk(db), divisor(0) {}
905  lvar_t(mreg_t reg, int width, const tinfo_t &type, int nblock, ea_t defea);
906  const char *hexapi dstr(void) const;
907 
908  /// Is the variable used in the code?
909  bool used(void) const { return (flags & CVAR_USED) != 0; }
910  /// Has the variable a type?
911  bool typed(void) const { return (flags & CVAR_TYPE) != 0; }
912  /// Have corresponding microregs been replaced by references to this variable?
913  bool mreg_done(void) const { return (flags & CVAR_MREG) != 0; }
914  /// Does the variable have a nice name?
915  bool has_nice_name(void) const { return (flags & CVAR_NAME) != 0; }
916  /// Do we know the width of the variable?
917  bool is_unknown_width(void) const { return (flags & CVAR_NOWD) != 0; }
918  /// Has any user-defined information?
919  bool has_user_info(void) const { return (flags & (CVAR_UNAME|CVAR_UTYPE)) != 0 || !cmt.empty(); }
920  /// Has user-defined name?
921  bool has_user_name(void) const { return (flags & CVAR_UNAME) != 0; }
922  /// Has user-defined type?
923  bool has_user_type(void) const { return (flags & CVAR_UTYPE) != 0; }
924  /// Is the function result?
925  bool is_result_var(void) const { return (flags & CVAR_RESULT) != 0; }
926  /// Is the function argument?
927  bool is_arg_var(void) const { return (flags & CVAR_ARG) != 0; }
928  /// Is the promoted function argument?
929  bool is_promoted_arg(void) const;
930  /// Is fake return variable?
931  bool is_fake_var(void) const { return (flags & CVAR_FAKE) != 0; }
932  /// Is overlapped variable?
933  bool is_overlapped_var(void) const { return (flags & CVAR_OVER) != 0; }
934  /// Used by a fpu insn?
935  bool is_floating_var(void) const { return (flags & CVAR_FLOAT) != 0; }
936  /// Is spoiled var? (meaningful only during lvar allocation)
937  bool is_spoiled_var(void) const { return (flags & CVAR_SPOILED) != 0; }
938  /// Other variable(s) map to this var?
939  bool is_partialy_typed(void) const { return (flags & CVAR_PARTIAL) != 0; }
940  /// Other variable(s) map to this var?
941  bool is_mapdst_var(void) const { return (flags & CVAR_MAPDST) != 0; }
942  void set_used(void) { flags |= CVAR_USED; }
943  void clear_used(void) { flags &= ~CVAR_USED; }
944  void set_typed(void) { flags |= CVAR_TYPE; }
945  void set_non_typed(void) { flags &= ~CVAR_TYPE; }
946  void clr_user_info(void) { flags &= ~(CVAR_UNAME|CVAR_UTYPE); }
947  void set_user_name(void) { flags |= CVAR_NAME|CVAR_UNAME; }
948  void set_user_type(void) { flags |= CVAR_TYPE|CVAR_UTYPE; }
949  void clr_user_type(void) { flags &= ~CVAR_UTYPE; }
950  void clr_user_name(void) { flags &= ~CVAR_UNAME; }
951  void set_mreg_done(void) { flags |= CVAR_MREG; }
952  void clr_mreg_done(void) { flags &= ~CVAR_MREG; }
953  void set_unknown_width(void) { flags |= CVAR_NOWD; }
954  void clr_unknown_width(void) { flags &= ~CVAR_NOWD; }
955  void set_arg_var(void) { flags |= CVAR_ARG; }
956  void clr_arg_var(void) { flags &= ~CVAR_ARG; }
957  void set_fake_var(void) { flags |= CVAR_FAKE; }
958  void clr_fake_var(void) { flags &= ~CVAR_FAKE; }
959  void set_overlapped_var(void) { flags |= CVAR_OVER; }
960  void clr_overlapped_var(void) { flags &= ~CVAR_OVER; }
961  void set_floating_var(void) { flags |= CVAR_FLOAT; }
962  void clr_floating_var(void) { flags &= ~CVAR_FLOAT; }
963  void set_spoiled_var(void) { flags |= CVAR_SPOILED; }
964  void clr_spoiled_var(void) { flags &= ~CVAR_SPOILED; }
965  void set_mapdst_var(void) { flags |= CVAR_MAPDST; }
966  void clr_mapdst_var(void) { flags &= ~CVAR_MAPDST; }
967  void set_partialy_typed(void) { flags |= CVAR_PARTIAL; }
968  void clr_partialy_typed(void) { flags &= ~CVAR_PARTIAL; }
969 
970  void set_reg_name(const char *n)
971  {
972  name = n; // do not verify uniqueness
973  flags &= ~CVAR_USED; // do not display the declaration
974  flags |= CVAR_NAME; // do not autorename
975  }
976  /// Do variables overlap?
977  bool has_common(const lvar_t &v) const
978  {
979  return arglocs_overlap(location, width, v.location, v.width);
980  }
981  /// Does the variable overlap with the specified location?
982  bool has_common_bit(const vdloc_t &loc, asize_t width2) const
983  {
984  return arglocs_overlap(location, width, loc, width2);
985  }
986  /// Get variable type
987  const tinfo_t &type(void) const { return tif; }
988  tinfo_t &type(void) { return tif; }
989 
990  /// Check if the variable accept the specified type.
991  /// Some types are forbidden (void, function types, wrong arrays, etc)
992  bool hexapi accepts_type(const tinfo_t &t);
993 
994  /// Set variable type without any validation.
995  void force_lvar_type(const tinfo_t &t);
996 
997  /// Set variable type
998  /// Note: this function does not modify the idb, only the lvar instance
999  /// in the memory. for permanent changes see modify_user_lvars()
1000  /// \param t new type
1001  /// \param may_fail if false and type is bad, interr
1002  /// \return success
1003  bool hexapi set_lvar_type(const tinfo_t &t, bool may_fail=false);
1004 
1005  /// Set final variable type.
1006  void set_final_lvar_type(const tinfo_t &t)
1007  {
1008  set_lvar_type(t);
1009  set_typed();
1010  }
1011 
1012  /// Change the variable width. This function also changes
1013  /// the variable type.
1014  /// \param w new width
1015  /// \param svw_flags combination of SVW_... bits
1016  /// \return success
1017  bool hexapi set_width(int w, int svw_flags=0);
1018 #define SVW_INT 0x00 // integer value
1019 #define SVW_FLOAT 0x01 // floating point value
1020 #define SVW_SOFT 0x02 // may fail and return false;
1021  // if this bit is not set and the type is bad, interr
1022 
1023 };
1024 DECLARE_TYPE_AS_MOVABLE(lvar_t);
1025 
1026 /// Set of local variables
1027 struct lvars_t : public qvector<lvar_t>
1028 {
1029  /// Find input variable at the specified location.
1030  /// \param argloc variable location
1031  /// \param _size variable size
1032  /// \return -1 if failed, otherwise the index into the variables vector.
1033  int find_input_lvar(const vdloc_t &argloc, int _size) { return find_lvar(argloc, _size, 0); }
1034 
1035  /// Find stack variable at the specified location.
1036  /// \param spoff offset from the minimal sp
1037  /// \param width variable size
1038  /// \return -1 if failed, otherwise the index into the variables vector.
1039  int hexapi find_stkvar(int32 spoff, int width);
1040 
1041  /// Find variable at the specified location.
1042  /// \param ll variable location
1043  /// \return pointer to variable or NULL
1044  lvar_t *hexapi find(const lvar_locator_t &ll);
1045 
1046 
1047  /// Find variable at the specified location.
1048  /// \param location variable location
1049  /// \param width variable size
1050  /// \param defblk definition block of the lvar. -1 means any block
1051  /// \return -1 if failed, otherwise the index into the variables vector.
1052  int hexapi find_lvar(const vdloc_t &location, int width, int defblk=-1);
1053 };
1054 
1055 /// Saved user settings for local variables: name, type, comment
1057 {
1058  lvar_locator_t ll;
1059  qstring name;
1060  tinfo_t type;
1061  qstring cmt;
1062  ssize_t size; ///< type size (if not initialized then -1)
1063  int flags; ///< \ref LVINF_
1064 /// \defgroup LVINF_ saved user lvar info property bits
1065 /// Used in lvar_saved_info_t::flags
1066 //@{
1067 #define LVINF_KEEP 0x0001 ///< keep saved user settings regardless of vars
1068 //@}
1069  lvar_saved_info_t(void) : size(BADSIZE), flags(0) {}
1070  bool has_info(void) const { return !name.empty() || !type.empty() || !cmt.empty(); }
1071  bool operator==(const lvar_saved_info_t &r) const
1072  {
1073  return name == r.name
1074  && cmt == r.cmt
1075  && ll == r.ll
1076  && type == r.type;
1077  }
1078  bool operator!=(const lvar_saved_info_t &r) const { return !(*this == r); }
1079  bool is_kept(void) const { return (flags & LVINF_KEEP) != 0; }
1080  void clear_keep(void) { flags &= ~LVINF_KEEP; }
1081  void set_keep(void) { flags |= LVINF_KEEP; }
1082 };
1083 DECLARE_TYPE_AS_MOVABLE(lvar_saved_info_t);
1084 typedef qvector<lvar_saved_info_t> lvar_saved_infos_t;
1085 
1086 /// Local variable mapping (is used to merge variables)
1087 typedef std::map<lvar_locator_t, lvar_locator_t> lvar_mapping_t;
1088 
1089 /// All user-defined information about local variables
1091 {
1092  /// User-specified names, types, comments for lvars. Variables without
1093  /// user-specified info are not present in this vector.
1094  lvar_saved_infos_t lvvec;
1095 
1096  /// Local variable mapping (used for merging variables)
1098 
1099  /// Delta to add to IDA stack offset to calculate Hex-Rays stack offsets.
1100  /// Should be set by the caller before calling save_user_lvar_settings();
1102 
1103  /// Various flags. Possible values are from \ref ULV_
1105 /// \defgroup ULV_ lvar_uservec_t property bits
1106 /// Used in lvar_uservec_t::ulv_flags
1107 //@{
1108 #define ULV_PRECISE_DEFEA 0x0001 ///< Use precise defea's for lvar locations
1109 //@}
1110 
1111  lvar_uservec_t(void) : stkoff_delta(0), ulv_flags(ULV_PRECISE_DEFEA) {}
1112  void swap(lvar_uservec_t &r)
1113  {
1114  lvvec.swap(r.lvvec);
1115  lmaps.swap(r.lmaps);
1116  std::swap(stkoff_delta, r.stkoff_delta);
1117  std::swap(ulv_flags, r.ulv_flags);
1118  }
1119 
1120  /// find saved user settings for given var
1122  {
1123  for ( lvar_saved_infos_t::iterator p=lvvec.begin(); p != lvvec.end(); ++p )
1124  {
1125  if ( p->ll == vloc )
1126  return p;
1127  }
1128  return NULL;
1129  }
1130 
1131  /// keep user settings for given var
1132  void keep_info(const lvar_t &v)
1133  {
1134  lvar_saved_info_t *p = find_info(v);
1135  if ( p != NULL )
1136  p->set_keep();
1137  }
1138 };
1139 
1140 /// Restore user defined local variable settings in the database.
1141 /// \param func_ea entry address of the function
1142 /// \param lvinf ptr to output buffer
1143 /// \return success
1144 
1145 bool hexapi restore_user_lvar_settings(lvar_uservec_t *lvinf, ea_t func_ea);
1146 
1147 
1148 /// Save user defined local variable settings into the database.
1149 /// \param func_ea entry address of the function
1150 /// \param lvinf user-specified info about local variables
1151 
1152 void hexapi save_user_lvar_settings(ea_t func_ea, const lvar_uservec_t &lvinf);
1153 
1154 
1155 /// Helper class to modify saved local variable settings.
1157 {
1158  /// Modify lvar settings.
1159  /// Returns: true-modified
1160  virtual bool idaapi modify_lvars(lvar_uservec_t *lvinf) = 0;
1161 };
1162 
1163 /// Modify saved local variable settings.
1164 /// \param entry_ea function start address
1165 /// \param mlv local variable modifier
1166 /// \return true if modified variables
1167 
1168 bool hexapi modify_user_lvars(ea_t entry_ea, user_lvar_modifier_t &mlv);
1169 
1170 
1171 //-------------------------------------------------------------------------
1172 /// User-defined function calls
1173 struct udcall_t
1174 {
1175  qstring name; // name of the function
1176  tinfo_t tif; // function prototype
1177 };
1178 
1179 // All user-defined function calls (map address -> udcall)
1180 typedef std::map<ea_t, udcall_t> udcall_map_t;
1181 
1182 /// Restore user defined function calls from the database.
1183 /// \param udcalls ptr to output buffer
1184 /// \param func_ea entry address of the function
1185 /// \return success
1186 
1187 bool hexapi restore_user_defined_calls(udcall_map_t *udcalls, ea_t func_ea);
1188 
1189 
1190 /// Save user defined local function calls into the database.
1191 /// \param func_ea entry address of the function
1192 /// \param udcalls user-specified info about user defined function calls
1193 
1194 void hexapi save_user_defined_calls(ea_t func_ea, const udcall_map_t &udcalls);
1195 
1196 
1197 /// Convert function type declaration into internal structure
1198 /// \param udc - pointer to output structure
1199 /// \param decl - function type declaration
1200 /// \param silent - if TRUE: do not show warning in case of incorrect type
1201 /// \return success
1202 
1203 bool hexapi parse_user_call(udcall_t *udc, const char *decl, bool silent);
1204 
1205 
1206 /// try to generate user-defined call for an instruction
1207 /// \return \ref MERR_ code:
1208 /// MERR_OK - user-defined call generated
1209 /// else - error (MERR_INSN == inacceptable udc.tif)
1210 
1212 
1213 
1214 //-------------------------------------------------------------------------
1215 /// Generic microcode generator class.
1216 /// An instance of a derived class can be registered to be used for
1217 /// non-standard microcode generation. Before microcode generation for an
1218 /// instruction all registered object will be visited by the following way:
1219 /// if ( filter->match(cdg) )
1220 /// code = filter->apply(cdg);
1221 /// if ( code == MERR_OK )
1222 /// continue; // filter generated microcode, go to the next instruction
1224 {
1225  /// check if the filter object is to be appied
1226  /// \return success
1227  virtual bool match(codegen_t &cdg) = 0;
1228  /// generate microcode for an instruction
1229  /// \return MERR_... code:
1230  /// MERR_OK - user-defined call generated, go to the next instruction
1231  /// MERR_INSN - not generated - the caller should try the standard way
1232  /// else - error
1233  virtual merror_t apply(codegen_t &cdg) = 0;
1234 };
1235 
1236 /// register/unregister non-standard microcode generator
1237 /// \param filter - microcode generator object
1238 /// \param install - TRUE - register the object, FALSE - unregister
1239 void hexapi install_microcode_filter(microcode_filter_t *filter, bool install=true);
1240 
1241 //-------------------------------------------------------------------------
1242 /// Abstract class: User-defined call generator
1243 /// derived classes should implement method 'match'
1245 {
1246  udcall_t udc;
1247 
1248 public:
1249  /// return true if the filter object should be appied to given instruction
1250  virtual bool match(codegen_t &cdg) = 0;
1251 
1252  bool hexapi init(const char *decl);
1253  virtual merror_t hexapi apply(codegen_t &cdg);
1254 };
1255 
1256 //-------------------------------------------------------------------------
1257 typedef int mbitmap_t;
1258 const int bitset_width = (sizeof(mbitmap_t)*CHAR_BIT);
1259 const int bitset_align = bitset_width - 1;
1260 
1261 /// Bit set class
1263 {
1264  mbitmap_t *bitmap; ///< pointer to bitmap
1265  int high; ///< highest bit+1 (multiply of bitset_width)
1266 
1267 public:
1268  bitset_t(void) : bitmap(NULL), high(0) {}
1269  hexapi bitset_t(const bitset_t &m); // copy constructor
1270  ~bitset_t(void)
1271  {
1272  qfree(bitmap);
1273  bitmap = NULL;
1274  }
1275  void swap(bitset_t &r)
1276  {
1277  std::swap(bitmap, r.bitmap);
1278  std::swap(high, r.high);
1279  }
1280  bitset_t &operator=(const bitset_t &m); // assignment operator
1281  bool hexapi add(int bit); // add a bit
1282  bool hexapi add(int bit, int width); // add bits
1283  bool hexapi add(const bitset_t &ml); // add another bitset
1284  bool hexapi sub(int bit); // delete a bit
1285  bool hexapi sub(int bit, int width); // delete bits
1286  bool hexapi sub(const bitset_t &ml); // delete another bitset
1287  bool hexapi cut_at(int maxbit); // delete bits >= maxbit
1288  void hexapi shift_down(int shift); // shift bits down
1289  bool hexapi has(int bit) const; // test presence of a bit
1290  bool hexapi has_all(int bit, int width) const; // test presence of bits
1291  bool hexapi has_any(int bit, int width) const; // test presence of bits
1292  void print(
1293  qstring *vout,
1294  int (*get_bit_name)(qstring *out, int bit, int width, void *ud)=NULL,
1295  void *ud=NULL) const;
1296  const char *hexapi dstr(void) const;
1297  bool hexapi empty(void) const; // is empty?
1298  int hexapi count(void) const; // number of set bits
1299  int hexapi count(int bit) const; // get number set bits starting from 'bit'
1300  int hexapi last(void) const; // get the number of the last bit (-1-no bits)
1301  void clear(void) { high = 0; } // make empty
1302  void hexapi fill_with_ones(int maxbit);
1303  bool fill_gaps(int total_nbits);
1304  bool hexapi has_common(const bitset_t &ml) const; // has common elements?
1305  bool hexapi intersect(const bitset_t &ml); // intersect sets. returns true if changed
1306  bool hexapi is_subset_of(const bitset_t &ml) const; // is subset of?
1307  bool includes(const bitset_t &ml) const { return ml.is_subset_of(*this); }
1308  void extract(intvec_t &out) const;
1310  DEFINE_MEMORY_ALLOCATION_FUNCS()
1311 #ifndef SWIG
1312  class iterator
1313  {
1314  friend class bitset_t;
1315  int i;
1316  iterator(int n) : i(n) {}
1317  public:
1318  bool operator==(const iterator &n) const { return i == n.i; }
1319  bool operator!=(const iterator &n) const { return i != n.i; }
1320  int operator*(void) const { return i; }
1321  };
1322  typedef iterator const_iterator;
1323  iterator itat(int n) const { return iterator(goup(n)); }
1324  iterator begin(void) const { return itat(0); }
1325  iterator end(void) const { return iterator(high); }
1326  int front(void) const { return *begin(); }
1327  int back(void) const { return *end(); }
1328  void inc(iterator &p, int n=1) const { p.i = goup(p.i+n); }
1329 #endif
1330 private:
1331  int hexapi goup(int reg) const;
1332 };
1333 DECLARE_TYPE_AS_MOVABLE(bitset_t);
1334 typedef qvector<bitset_t> array_of_bitsets;
1335 
1336 //-------------------------------------------------------------------------
1337 struct ivl_t // an interval
1338 {
1339 private:
1340  //forbid the default constructor
1341  ivl_t(void) {}
1342  //...except for use in a vector
1343  friend class qvector<ivl_t>;
1344 public:
1345  uval_t off;
1346  asize_t size;
1347  ivl_t(uval_t _off, asize_t _size) : off(_off), size(_size) {}
1348  bool empty(void) const { return size == 0; }
1349  void clear(void) { size = 0; }
1350  uval_t end(void) const { return off+size; }
1351  void print(qstring *vout) const;
1352  const char *hexapi dstr(void) const;
1353  bool extend_to_cover(const ivl_t &r) // extend interval to cover 'r'
1354  {
1355  bool changed = false;
1356  sval_t d = off - r.off;
1357  if ( d > 0 )
1358  {
1359  off -= d;
1360  size += d;
1361  changed = true;
1362  }
1363  if ( end() < r.end() )
1364  {
1365  size = r.end() - off;
1366  changed = true;
1367  }
1368  return changed;
1369  }
1370  void intersect(const ivl_t &r)
1371  {
1372  uval_t s1 = qmax(off, r.off);
1373  uval_t s2 = qmin(end(), r.end());
1374  if ( s1 < s2 )
1375  {
1376  off = s1;
1377  size = s2 - s1;
1378  }
1379  else
1380  {
1381  size = 0;
1382  }
1383  }
1384 
1385  // do *this and ivl overlap?
1386  bool overlap(const ivl_t &ivl) const
1387  {
1388  return interval::overlap(off, size, ivl.off, ivl.size);
1389  }
1390  // does *this include ivl?
1391  bool includes(const ivl_t &ivl) const
1392  {
1393  return interval::includes(off, size, ivl.off, ivl.size);
1394  }
1395  // does *this contain off2?
1396  bool contains(uval_t off2) const
1397  {
1398  return interval::contains(off, size, off2);
1399  }
1400 
1402  DEFINE_MEMORY_ALLOCATION_FUNCS()
1403 
1404  static const ivl_t allmem;
1405 #define ALLMEM ivl_t::allmem
1406 };
1407 DECLARE_TYPE_AS_MOVABLE(ivl_t);
1408 
1409 //-------------------------------------------------------------------------
1410 class ivlset_t // set of intervals
1411 {
1412  typedef qvector<ivl_t> bag_t;
1413  bag_t bag;
1414  bool verify(void) const;
1415 public:
1416  ivlset_t(void) {}
1417  ivlset_t(const ivl_t &ivl);
1418  ivlset_t(const ivlset_t &ivs) : bag(ivs.bag)
1419  {
1420  }
1421  DEFINE_MEMORY_ALLOCATION_FUNCS()
1422 
1423  void swap(ivlset_t &r) { bag.swap(r.bag); }
1424  bool hexapi add(const ivl_t &ivl);
1425  bool add(ea_t ea, asize_t size) { return add(ivl_t(ea, size)); }
1426  bool hexapi add(const ivlset_t &ivs);
1427  bool hexapi addmasked(const ivlset_t &ivs, const ivl_t &mask);
1428  bool hexapi sub(const ivl_t &ivl);
1429  bool sub(ea_t ea, asize_t size) { return sub(ivl_t(ea, size)); }
1430  bool hexapi sub(const ivlset_t &ivs);
1431  bool hexapi has_common(const ivl_t &ivl, bool strict=false) const;
1432  void hexapi print(qstring *vout) const;
1433  const char *hexapi dstr(void) const;
1434  asize_t hexapi count(void) const; // size in bytes
1435  const ivl_t &getivl(int idx) const { return bag[idx]; }
1436  const ivl_t &lastivl(void) const { return bag.back(); }
1437  size_t nivls(void) const { return bag.size(); }
1438  bool empty(void) const { return bag.empty(); }
1439  void clear(void) { bag.clear(); }
1440  void qclear(void) { bag.qclear(); }
1441  bool hexapi has_common(const ivlset_t &ivs) const;
1442  bool hexapi contains(uval_t off) const;
1443  bool hexapi includes(const ivlset_t &ivs) const;
1444  bool hexapi intersect(const ivlset_t &ivs);
1445  bool is_subset_of(const ivlset_t &ivs) const { return ivs.includes(*this); }
1446  bool single_value(uval_t v) const { return nivls() == 1 && bag[0].off == v && bag[0].size == 1; }
1447 
1449  bool operator==(const ivl_t &v) const { return nivls() == 1 && bag[0] == v; }
1450  bool operator!=(const ivl_t &v) const { return !(*this == v); }
1451 
1452  typedef bag_t::iterator iterator;
1453  typedef bag_t::const_iterator const_iterator;
1454  const_iterator begin(void) const { return bag.begin(); }
1455  const_iterator end(void) const { return bag.end(); }
1456  iterator begin(void) { return bag.begin(); }
1457  iterator end(void) { return bag.end(); }
1458 
1459 };
1460 DECLARE_TYPE_AS_MOVABLE(ivlset_t);
1461 typedef qvector<ivlset_t> array_of_ivlsets;
1462 int hexapi get_mreg_name(qstring *out, int bit, int width, void *ud=NULL);
1463 //-------------------------------------------------------------------------
1464 class rlist_t : public bitset_t // list of registers
1465 {
1466 public:
1467  rlist_t(void) {}
1468  rlist_t(const rlist_t &m) : bitset_t(m)
1469  {
1470  }
1471  rlist_t(mreg_t reg, int width) { add(reg, width); }
1472  ~rlist_t(void) {}
1473  void hexapi print(qstring *vout) const;
1474  const char *hexapi dstr(void) const;
1475 };
1476 DECLARE_TYPE_AS_MOVABLE(rlist_t);
1477 
1478 //-------------------------------------------------------------------------
1479 // Microlist: register and memory sets
1480 struct mlist_t
1481 {
1482  rlist_t reg; // registers
1483  ivlset_t mem; // memory locations
1484 
1485  mlist_t(void) {}
1486  mlist_t(const ivl_t &ivl) : mem(ivl) {}
1487  mlist_t(mreg_t r, int size) : reg(r, size) {}
1488 
1489  void swap(mlist_t &r) { reg.swap(r.reg); mem.swap(r.mem); }
1490  bool hexapi addmem(ea_t ea, asize_t size);
1491  bool add(mreg_t r, int size) { return add(mlist_t(r, size)); }
1492  bool add(const rlist_t &r) { return reg.add(r); }
1493  bool add(const ivl_t &ivl) { return add(mlist_t(ivl)); }
1494  bool add(const mlist_t &lst) { return reg.add(lst.reg) | mem.add(lst.mem); }
1495  bool sub(mreg_t r, int size) { return sub(mlist_t(r, size)); }
1496  bool sub(const ivl_t &ivl) { return sub(mlist_t(ivl)); }
1497  bool sub(const mlist_t &lst) { return reg.sub(lst.reg) | mem.sub(lst.mem); }
1498  asize_t count(void) const { return reg.count() + mem.count(); }
1499  void hexapi print(qstring *vout) const;
1500  const char *hexapi dstr(void) const;
1501  bool empty(void) const { return reg.empty() && mem.empty(); }
1502  void clear(void) { reg.clear(); mem.clear(); }
1503  bool has(mreg_t r) const { return reg.has(r); }
1504  bool has_all(mreg_t r, int size) const { return reg.has_all(r, size); }
1505  bool has_any(mreg_t r, int size) const { return reg.has_any(r, size); }
1506  bool has_memory(void) const { return !mem.empty(); }
1507  bool has_allmem(void) const { return mem == ALLMEM; }
1508  bool has_common(const mlist_t &lst) const { return reg.has_common(lst.reg) || mem.has_common(lst.mem); }
1509  bool includes(const mlist_t &lst) const { return reg.includes(lst.reg) && mem.includes(lst.mem); }
1510  bool intersect(const mlist_t &lst) { return reg.intersect(lst.reg) | mem.intersect(lst.mem); }
1511  bool is_subset_of(const mlist_t &lst) const { return lst.includes(*this); }
1512 
1514  DEFINE_MEMORY_ALLOCATION_FUNCS()
1515 };
1516 DECLARE_TYPE_AS_MOVABLE(mlist_t);
1517 typedef qvector<mlist_t> mlistvec_t;
1518 DECLARE_TYPE_AS_MOVABLE(mlistvec_t);
1519 
1520 //-------------------------------------------------------------------------
1521 // abstract graph interface
1522 class simple_graph_t : public gdl_graph_t
1523 {
1524 public:
1525  qstring title;
1526  bool colored_gdl_edges;
1527 };
1528 
1529 //-------------------------------------------------------------------------
1530 // various visitors:
1531 
1533 {
1534  mbl_array_t *mba; // current block array
1535  mblock_t *blk; // current block
1536  minsn_t *topins; // top level instruction (parent of curins or curins itself)
1537  minsn_t *curins; // currently visited instruction
1538  op_parent_info_t(void) : mba(NULL), blk(NULL), topins(NULL), curins(NULL) {}
1539  DEFINE_MEMORY_ALLOCATION_FUNCS()
1540  bool really_alloc(void) const;
1541 };
1542 
1543 // micro insn visitor
1545 {
1546  virtual int idaapi visit_minsn(void) = 0;
1547 };
1548 
1549 // micro operand visitor
1551 {
1552  bool prune;
1553  virtual int idaapi visit_mop(mop_t *op, const tinfo_t *type, bool is_target) = 0;
1554 };
1555 
1556 // micro source operand visitor
1558 {
1559  virtual int idaapi visit_srcop(minsn_t &ui, mop_t &uop, bool is_src) = 0;
1560 };
1561 
1562 // scattered mop: visit each of the scattered locations as a separate mop
1564 {
1565  virtual int idaapi visit_scif_mop(const mop_t &r, int off) = 0;
1566 };
1567 
1568 //-------------------------------------------------------------------------
1569 // micro instruction operand types
1570 
1571 typedef uint8 mopt_t;
1572 const mopt_t
1573  mop_z = 0, // none
1574  mop_r = 1, // register LOW
1575  mop_n = 2, // immediate number constant
1576  mop_str = 3, // immediate string constant
1577  mop_d = 4, // result of another instruction
1578  mop_S = 5, // local stack variable (base:stack bottom) LOW
1579  mop_v = 6, // global variable
1580  mop_b = 7, // micro basic block (mblock_t)
1581  mop_f = 8, // list of arguments
1582  mop_l = 9, // local variable
1583  mop_a = 10, // mop_addr_t: address of operand (mop_l, mop_v, mop_S, mop_r)
1584  mop_h = 11, // helper function
1585  mop_c = 12, // mcases
1586  mop_fn = 13, // floating point constant
1587  mop_p = 14, // operand pair
1588  mop_sc = 15, // scattered
1589 
1590  // used only in the serialized form
1591  mop_sl = 3, // short local variable encoding (size SPWIDTH bytes)
1592  mop_sn = 13, // small positive constant (size SPWIDTH bytes)
1593  mop_esc = 15, // escape character for serialized form
1594  mop_sstr = 1, // serialized mop_str
1595  mop_sfn = 2, // serialized mop_fn
1596  mop_scs = 3; // serialized mop_sc
1597 
1598 const int NOSIZE = -1; // wrong operand size
1599 
1600 //-------------------------------------------------------------------------
1601 struct lvar_ref_t //-V690
1602 {
1603 public:
1604  mbl_array_t *const mba;
1605  sval_t off; // offset from the beginning of the variable
1606  int idx; // index into lvars_t
1607  lvar_ref_t(mbl_array_t *m, int i, sval_t o=0) : mba(m), off(o), idx(i) {}
1608  lvar_ref_t &operator=(const lvar_ref_t &r)
1609  {
1610  off = r.off;
1611  idx = r.idx;
1612  return *this;
1613  }
1615  DEFINE_MEMORY_ALLOCATION_FUNCS()
1616  void swap(lvar_ref_t &r)
1617  {
1618  std::swap(off, r.off);
1619  std::swap(idx, r.idx);
1620  }
1621  lvar_t &hexapi var(void) const;
1622 };
1623 
1624 //-------------------------------------------------------------------------
1626 {
1627  mbl_array_t *const mba;
1628  sval_t off; // stack offset (from the stack bottom)
1629  stkvar_ref_t(mbl_array_t *m, sval_t o) : mba(m), off(o) {}
1631  DEFINE_MEMORY_ALLOCATION_FUNCS()
1632  void swap(stkvar_ref_t &r)
1633  {
1634  std::swap(off, r.off);
1635  }
1636  member_t *hexapi get_stkvar(uval_t *p_off) const;
1637 };
1638 
1639 //-------------------------------------------------------------------------
1641 {
1642  uint64 value; // number value
1643  uint64 org_value; // original value before changing the operand size
1644  mnumber_t(uint64 v, ea_t _ea=BADADDR, int n=0)
1645  : operand_locator_t(_ea, n), value(v), org_value(v) {}
1646  DEFINE_MEMORY_ALLOCATION_FUNCS()
1648  {
1649  if ( value < r.value )
1650  return -1;
1651  if ( value > r.value )
1652  return -1;
1653  return 0;
1654  }
1655  // always use this function instead of manually modifying the 'value' field
1656  void update_value(uint64 val64)
1657  {
1658  value = val64;
1659  org_value = val64;
1660  }
1661 };
1662 
1663 //-------------------------------------------------------------------------
1664 // scattered operand info
1665 struct scif_t : public vdloc_t
1666 {
1667  mbl_array_t *mba;
1668  qstring name;
1669  tinfo_t type;
1670  scif_t(mbl_array_t *_mba, qstring *n, tinfo_t *tif) : mba(_mba)
1671  {
1672  n->swap(name);
1673  tif->swap(type);
1674  }
1675  scif_t &operator =(const vdloc_t &loc)
1676  {
1677  *(vdloc_t *)this = loc;
1678  return *this;
1679  }
1680 };
1681 
1682 //-------------------------------------------------------------------------
1683 struct fnumber_t /// Floating point constant.
1684  /// For more details, please see the ieee.h file from IDA SDK.
1685 {
1686  uint16 fnum[6]; ///< Internal representation of the number
1687  int nbytes; ///< Original size of the constant in bytes
1688  operator uint16 *(void) { return fnum; }
1689  operator const uint16 *(void) const { return fnum; }
1690  void hexapi print(qstring *vout) const;
1691  const char *hexapi dstr(void) const;
1692  DEFINE_MEMORY_ALLOCATION_FUNCS()
1694  {
1695  return ecmp(fnum, r.fnum);
1696  }
1697 };
1698 
1699 // Bits to control how we print instructions
1700 #define SHINS_NUMADDR 0x01 // display definition addresses for numbers
1701 #define SHINS_VALNUM 0x02 // display value numbers
1702 #define SHINS_SHORT 0x04 // do not display use-def chains and other attrs
1703 #define SHINS_LDXEA 0x08 // display address of ldx expressions (not used)
1704 
1705 
1706 //-------------------------------------------------------------------------
1707 // How to handle side effect of change_size()
1708 // Sometimes we need to create a temporary operand and change its size in order
1709 // to check some hypothesis. If we revert our changes, we do not want that the
1710 // database (global variables, stack frame, etc) changes in any manner.
1711 enum side_effect_t
1712 {
1713  NO_SIDEFF, // change operand size but ignore side effects
1714  // if you decide to keep the changed operand,
1715  // handle_new_size() must be called
1716  WITH_SIDEFF, // change operand size and handle side effects
1717  ONLY_SIDEFF, // only handle side effects
1718  ANY_REGSIZE = 0x80, // any register size is permitted
1719 };
1720 
1721 // max size of simple operands
1722 // there are many exceptions: udts, floating point, xmm/ymm, etc
1723 const int MAX_OPSIZE = 2 * sizeof(ea_t);
1724 const int DOUBLE_OPSIZE = 2 * MAX_OPSIZE;
1725 //-------------------------------------------------------------------------
1726 class mop_t
1727 {
1728  void hexapi copy(const mop_t &rop);
1729 public:
1730  // field definitions
1731  mopt_t t;
1732  uint8 oprops; // operand properties
1733 #define OPROP_IMPDONE 0x01 // imported operand (a pointer) has been dereferenced
1734 #define OPROP_UDT 0x02 // a struct or union
1735 #define OPROP_FLOAT 0x04 // possibly floating value
1736 #define OPROP_CCFLAGS 0x08 // condition codes register value
1737  uint16 valnum; // value number. 0 means unknown.
1738  // operands with the same value number are equal.
1739  int size; // operand size: 1,2,4,8 bytes or NOSIZE
1740  // for structures, other sizes are allowed
1741  union
1742  {
1743  mreg_t r; // mop_r register number
1744  mnumber_t *nnn; // mop_n immediate value
1745  minsn_t *d; // mop_d result (destination) of another instruction
1746  stkvar_ref_t *s; // mop_S stack variable
1747  ea_t g; // mop_v global variable (its linear address)
1748  int b; // mop_b block number (used in jmp,call instructions)
1749  mfuncinfo_t *f; // mop_f function call information
1750  lvar_ref_t *l; // mop_l local variable
1751  mop_addr_t *a; // mop_a variable whose address is taken
1752  char *helper; // mop_h helper function name
1753  char *cstr; // mop_str string constant
1754  mcases_t *c; // mop_c cases
1755  fnumber_t *fpc; // mop_fn floating point constant
1756  mop_pair_t *pair; // mop_p operand pair
1757  scif_t *scif; // mop_sc scattered operand info
1758  };
1759 
1760  // function definitions
1761  void set_impptr_done(void) { oprops |= OPROP_IMPDONE; }
1762  void set_udt(void) { oprops |= OPROP_UDT; }
1763  bool is_impptr_done(void) const { return (oprops & OPROP_IMPDONE) != 0; }
1764  bool is_udt(void) const { return (oprops & OPROP_UDT) != 0; }
1765  bool probably_floating(void) const { return (oprops & OPROP_FLOAT) != 0; }
1766  bool is_ccflags(void) const { return (oprops & OPROP_CCFLAGS) != 0; }
1767 
1768  mop_t(void) { zero(); }
1769  mop_t(mreg_t rg, int _size) { t = mop_r; oprops = 0; valnum = 0; r = rg; size = _size; }
1770  mop_t(const mop_t &rop) { copy(rop); }
1771  ~mop_t(void)
1772  {
1773  erase();
1774  }
1775  DEFINE_MEMORY_ALLOCATION_FUNCS()
1776  void zero(void) { t = mop_z; oprops = 0; valnum = 0; size = NOSIZE; }
1777  void hexapi swap(mop_t &rop);
1778  void hexapi erase(void);
1779  void erase_but_keep_size(void) { int s2 = size; erase(); size = s2; }
1780  bool hexapi create_from_rlist(const rlist_t &lst);
1781  bool hexapi create_from_mlist(mbl_array_t *mba, const mlist_t &lst, sval_t fullsize);
1782  bool hexapi create_from_ivlset(mbl_array_t *mba, const ivlset_t &ivs, sval_t fullsize);
1783  void hexapi create_from_vdloc(mbl_array_t *mba, const vdloc_t &loc, int _size);
1784  void hexapi create_from_scattered_vdloc(mbl_array_t *mba, const char *name, tinfo_t type, const vdloc_t &loc);
1785  mop_t &hexapi create_from_insn(const minsn_t *m);
1786  void hexapi print(qstring *vout, int shins_flags=SHINS_SHORT|SHINS_VALNUM) const;
1787  const char *hexapi dstr(void) const;
1788  void hexapi create_pair(int loreg, int hireg, int halfsize);
1789  void hexapi make_number(uint64 _value, int _size, ea_t _ea=BADADDR, int opnum=0);
1790  bool hexapi make_fpnum(const void *bytes, size_t _size);
1791  // value() can be used only on mop_n operands:
1792  uint64 value(bool is_signed) const { return extend_sign(nnn->value, size, is_signed); }
1793  int64 signed_value(void) const { return value(true); }
1794  uint64 unsigned_value(void) const { return value(false); }
1795  bool is_constant(uint64 *n=NULL, bool is_signed=true) const;
1796  bool is_equal_to(uint64 n, bool is_signed=type_signed) const
1797  {
1798  uint64 v;
1799  return is_constant(&v, is_signed) && v == n;
1800  }
1801  bool is_zero(void) const { return is_equal_to(0, false); }
1802  bool is_one(void) const { return is_equal_to(1, false); }
1803  bool is_positive_constant(void) const
1804  {
1805  uint64 v;
1806  return is_constant(&v, true) && int64(v) > 0;
1807  }
1808  bool is_negative_constant(void) const
1809  {
1810  uint64 v;
1811  return is_constant(&v, true) && int64(v) < 0;
1812  }
1813  bool hexapi is01(void) const; // can be only 0 or 1?
1814  void hexapi make_helper(const char *name);
1815  bool hexapi may_use_aliased_memory(void) const;
1816  bool is_reg(mreg_t _r, int _size) const { return t == mop_r && r == _r && size == _size; }
1817  bool is_reg(mreg_t _r) const { return t == mop_r && r == _r; }
1818  bool is_reg(void) const { return t == mop_r; }
1819  bool is_cc(void) const { return is_reg() && r >= mr_cf && r < mr_first; }
1820  static bool is_bit_reg(mreg_t reg);
1821  bool is_bit_reg(void) const { return is_reg() && is_bit_reg(r); }
1822  bool is_mob(int serial) const { return t == mop_b && b == serial; }
1823  bool is_scattered(void) const { return t == mop_sc; }
1824  inline bool is_mreg(void) const;
1825  inline bool is_virtual_stack_reg(void) const;
1826  inline bool is_kreg(void) const;
1827  inline bool is_glbaddr() const;
1828  inline bool is_glbaddr(ea_t ea) const;
1829  inline bool is_stkaddr() const;
1830  inline bool is_insn(mcode_t code) const;
1831  bool is_insn(void) const { return t == mop_d; }
1832  bool hexapi has_side_effects(bool include_ldx=false) const;
1833  const minsn_t *get_insn(mcode_t code) const;
1834  minsn_t *get_insn(mcode_t code);
1835  bool is_promoted_arg(void) const { return t == mop_l && l->var().is_promoted_arg(); }
1836  member_t *get_stkvar(uval_t *p_off) const { return s->get_stkvar(p_off); }
1837  // mop_S or a stack register or a scattered entirely mapped into a continuous stack region
1838  bool hexapi get_stkoff(sval_t *dest) const;
1839  bool equal_mops(const mop_t &rop, int eqflags) const; // eqflags: EQ_... constants
1840  bool operator==(const mop_t &rop) const { return equal_mops(rop, 0); }
1841  bool operator!=(const mop_t &rop) const { return !equal_mops(rop, 0); }
1842  mop_t &operator=(const mop_t &rop) { return assign(rop); }
1843  mop_t &hexapi assign(const mop_t &rop);
1844  bool hexapi low_half(int width);
1845  bool hexapi high_half(int width);
1846  bool hexapi first_half(int width);
1847  bool hexapi second_half(int width);
1848  bool hexapi shift_mop(int offset);
1849  bool hexapi is_sign_extended_from(int width) const;
1850  bool hexapi is_zero_extended_from(int width) const;
1851  bool is_extended_from(int width, bool is_signed) const
1852  {
1853  if ( is_signed )
1854  return is_sign_extended_from(width);
1855  else
1856  return is_zero_extended_from(width);
1857  }
1858  bool hexapi change_size(int nsize, side_effect_t sideff=WITH_SIDEFF); // with all possible subtree
1859  bool double_size(side_effect_t sideff=WITH_SIDEFF) { return change_size(size*2, sideff); }
1860  mreg_t hexapi get_regnum(void) const;
1861  int hexapi for_all_ops(mop_visitor_t &mv, const tinfo_t *type=NULL, bool is_target=false);
1862 };
1863 DECLARE_TYPE_AS_MOVABLE(mop_t);
1864 
1866 {
1867 public:
1868  mop_t lop; // low operand
1869  mop_t hop; // high operand
1870  DEFINE_MEMORY_ALLOCATION_FUNCS()
1871 };
1872 
1873 // address of an operand (mop_l, mop_v, mop_S, mop_r)
1874 class mop_addr_t : public mop_t
1875 {
1876 public:
1877  int insize; // how many bytes of the pointed operand can be read
1878  int outsize; // how many bytes of the pointed operand can be written
1879 
1880  mop_addr_t(): insize(NOSIZE), outsize(NOSIZE) {}
1881  mop_addr_t(const mop_addr_t &ra)
1882  : mop_t(ra), insize(ra.insize), outsize(ra.outsize) {}
1883  mop_addr_t(const mop_t &ra, int isz, int osz)
1884  : mop_t(ra), insize(isz), outsize(osz) {}
1885 
1886  mop_addr_t &operator=(const mop_addr_t &rop)
1887  {
1888  *(mop_t *)this = mop_t(rop);
1889  insize = rop.insize;
1890  outsize = rop.outsize;
1891  return *this;
1892  }
1893 };
1894 
1895 class mfuncarg_t : public mop_t // #funcarg
1896 {
1897 public:
1898  ea_t ea;
1899  tinfo_t type;
1900  qstring name;
1901  argloc_t argloc; // ida argloc
1902  mfuncarg_t(void) : ea(BADADDR) {}
1903  mfuncarg_t(const mop_t &rarg) : mop_t(rarg), ea(BADADDR) {}
1904  void hexapi print(qstring *vout, int shins_flags=SHINS_SHORT|SHINS_VALNUM) const;
1905  const char *hexapi dstr(void) const;
1906  void hexapi set_regarg(mreg_t mr, int sz, const tinfo_t &tif);
1907  void set_regarg(mreg_t mr, const tinfo_t &tif)
1908  {
1909  set_regarg(mr, tif.get_size(), tif);
1910  }
1911  void set_regarg(mreg_t mr, char dt, type_sign_t sign = type_unsigned)
1912  {
1913  int sz = get_dtype_size(dt);
1914  set_regarg(mr, sz, get_int_type_by_width_and_sign(sz, sign));
1915  }
1916  void make_int(int val, ea_t val_ea, int opno = 0)
1917  {
1918  type = tinfo_t(BTF_INT);
1919  make_number(val, inf.cc.size_i, val_ea, opno);
1920  }
1921  void make_uint(int val, ea_t val_ea, int opno = 0)
1922  {
1923  type = tinfo_t(BTF_UINT);
1924  make_number(val, inf.cc.size_i, val_ea, opno);
1925  }
1926 };
1927 DECLARE_TYPE_AS_MOVABLE(mfuncarg_t);
1928 typedef qvector<mfuncarg_t> mfuncargs_t;
1929 
1930 enum funcrole_t // function roles
1931  // they are used to calculate use/def lists
1932  // and to recognize a function without using strcmp
1933 {
1934  ROLE_UNK, // unknown function role
1935  ROLE_EMPTY, // empty, does not do anything (maybe spoils regs)
1936  ROLE_MEMSET, // memset(void *dst, uchar value, size_t count);
1937  ROLE_MEMSET32, // memset32(void *dst, uint32 value, size_t count);
1938  ROLE_MEMSET64, // memset32(void *dst, uint64 value, size_t count);
1939  ROLE_MEMCPY, // memcpy(void *dst, const void *src, size_t count);
1940  ROLE_STRCPY, // strcpy(char *dst, const char *src);
1941  ROLE_STRLEN, // strlen(const char *src);
1942  ROLE_STRCAT, // strcat(char *dst, const char *src);
1943  ROLE_TAIL, // char *tail(const char *str);
1944  ROLE_BUG, // BUG() helper macro: never returns, causes exception
1945  ROLE_JUMPOUT, // inconditional jump out of function
1946  ROLE_ALLOCA, // alloca() function
1947  ROLE_BSWAP, // bswap() function (any size)
1948  ROLE_PRESENT, // present() function (used in patterns)
1949  ROLE_CONTAINING_RECORD, // CONTAINING_RECORD() macro
1950  ROLE_FASTFAIL, // __fastfail()
1951  ROLE_READFLAGS, // __readeflags, __readcallersflags
1952  ROLE_IS_MUL_OK, // is_mul_ok
1953  ROLE_SATURATED_MUL, // saturated_mul
1954  ROLE_BITTEST, // [lock] bt
1955  ROLE_BITTESTANDSET, // [lock] bts
1956  ROLE_BITTESTANDRESET, // [lock] btr
1957  ROLE_BITTESTANDCOMPLEMENT, // [lock] btc
1958  ROLE_VA_ARG, // va_arg() macro
1959  ROLE_VA_COPY, // va_copy() function
1960  ROLE_VA_START, // va_start() function
1961  ROLE_VA_END, // va_end() function
1962  ROLE_ROL, // rotate left
1963  ROLE_ROR, // rotate right
1964 };
1965 
1966 #define FUNC_NAME_MEMCPY "memcpy"
1967 #define FUNC_NAME_MEMSET "memset"
1968 #define FUNC_NAME_MEMSET32 "memset32"
1969 #define FUNC_NAME_MEMSET64 "memset64"
1970 #define FUNC_NAME_STRCPY "strcpy"
1971 #define FUNC_NAME_STRLEN "strlen"
1972 #define FUNC_NAME_STRCAT "strcat"
1973 #define FUNC_NAME_TAIL "tail"
1974 #define FUNC_NAME_VA_ARG "va_arg"
1975 #define FUNC_NAME_EMPTY "$empty"
1976 #define FUNC_NAME_PRESENT "$present"
1977 #define FUNC_NAME_CONTAINING_RECORD "CONTAINING_RECORD"
1978 
1979 
1980 // the default 256 function arguments is too big, we use a lower value
1981 #undef MAX_FUNC_ARGS
1982 #define MAX_FUNC_ARGS 64
1983 
1984 class mfuncinfo_t // #funcinfo
1985 {
1986 public:
1987  ea_t callee; // called function address, if known
1988  int solid_args; // number of solid args
1989  // the rest is varargs
1990  int call_spd; // sp value at call insn
1991  int stkargs_top; // top of stkargs on the stack
1992  cm_t cc; // calling convention
1993  mfuncargs_t args; // function arguments
1994  mopvec_t retregs; // return register(s) (e.g., AX, AX:DX, etc.)
1995  // this vector is built from return_regs
1996  tinfo_t return_type;
1997  argloc_t return_argloc;
1998 
1999  mlist_t return_regs; // everything returned by the function
2000  mlist_t spoiled; // includes return_regs
2001  mlist_t pass_regs; // passthrough registers: registers that depend on input
2002  // values (this is subset of spoiled)
2003  ivlset_t visible_memory; // what memory is visible to the call?
2004  mlist_t dead_regs; // registers defined by the function but never used
2005  // upon propagation we do the following:
2006  // - dead_regs += return_regs
2007  // - ret.clear() since the call is propagated
2008  int flags;
2009 #define FCI_PROP 0x01 // call has been propagated
2010 #define FCI_DEAD 0x02 // some return registers were determined dead
2011 #define FCI_FINAL 0x04 // function call type is final, should not be changed
2012 #define FCI_NORET 0x08 // function call does not return
2013 #define FCI_PURE 0x10 // pure function
2014 #define FCI_SPLOK 0x20 // function spoiled/visible_memory lists have been
2015  // optimized. for some functions we can reduce them
2016  // as soon as information about the arguments becomes
2017  // available. in order not to try optimize them again
2018  // we use this bit.
2019 #define FCI_NOSIDE 0x40 // function does not have side effects
2020  funcrole_t role; // function role
2021  void hexapi print(qstring *vout, int size=-1, int shins_flags=SHINS_SHORT|SHINS_VALNUM) const;
2022  const char *hexapi dstr(void) const;
2023 };
2024 
2025 class mcases_t // #cases
2026 {
2027 public:
2028  casevec_t values; // expression values for each target
2029  intvec_t targets; // target block numbers
2030 
2031  void swap(mcases_t &r) { values.swap(r.values); targets.swap(r.targets); }
2033  DEFINE_MEMORY_ALLOCATION_FUNCS()
2034  const char *hexapi dstr(void) const;
2035 };
2036 
2037 //-------------------------------------------------------------------------
2038 // ud and du chains
2039 // we combine chains for several adjacent mreg_t if they are the same.
2040 // the number of combined chains is kept in "width"
2041 class chain_t : public intvec_t // sequence of block numbers
2042 {
2043 public:
2044  mreg_t reg; // number of the first chain in the current combination
2045  int width; // number of combined chains
2046  int varnum; // allocated variable (-1 - not allocated yet)
2047  uchar flags; // combination of:
2048 #define CHF_INITED 0x01 // is chain initialized? (valid only after lvar allocation)
2049 #define CHF_STKVAR 0x02 // stkvar chain?
2050 #define CHF_REPLACED 0x04 // chain operands have been replaced?
2051 #define CHF_OVER 0x08 // overlapped chain
2052 #define CHF_FAKE 0x10 // fake chain created by widen_chains()
2053 #define CHF_PASSTHRU 0x20 // pass-thru chain, must use the input variable to the block
2054 #define CHF_TERM 0x40 // terminating chain; the variable does not survive across the block
2055  chain_t(mreg_t r=mr_none, int w=1, int v=-1)
2056  : reg(r), width(w), varnum(v), flags(CHF_INITED) {}
2057  bool is_inited(void) const { return (flags & CHF_INITED) != 0; }
2058  bool is_stkvar(void) const { return (flags & CHF_STKVAR) != 0; }
2059  bool is_replaced(void) const { return (flags & CHF_REPLACED) != 0; }
2060  bool is_overlapped(void) const { return (flags & CHF_OVER) != 0; }
2061  bool is_fake(void) const { return (flags & CHF_FAKE) != 0; }
2062  bool is_passreg(void) const { return (flags & CHF_PASSTHRU) != 0; }
2063  bool is_term(void) const { return (flags & CHF_TERM) != 0; }
2064  void set_inited(bool b) { setflag(flags, CHF_INITED, b); }
2065  void set_replaced(bool b) { setflag(flags, CHF_REPLACED, b); }
2066  void set_overlapped(bool b) { setflag(flags, CHF_OVER, b); }
2067  void set_term(bool b) { setflag(flags, CHF_TERM, b); }
2068  void hexapi print(qstring *vout) const;
2069  const char *hexapi dstr(void) const;
2070 };
2071 //-------------------------------------------------------------------------
2072 // Chains of one block.
2073 // Please note that this class is based on std::map and it must be accessed
2074 // using the block_chains_begin(), block_chains_find() and similar functions.
2075 // This is required because different compilers use different implementations
2076 // of std::map. However, since the size of std::map depends on the compilation
2077 // options, we replace it with a byte array.
2078 #ifdef __NT__
2079 #define SIZEOF_BLOCK_CHAINS 24
2080 #else
2081 #define SIZEOF_BLOCK_CHAINS 56
2082 #endif
2083 
2085 {
2086  size_t body[SIZEOF_BLOCK_CHAINS/sizeof(size_t)]; // opaque std::map, uncopyable
2087 public:
2088  const chain_t *hexapi get_chain(mreg_t reg, int width=1) const;
2089  chain_t *get_chain(mreg_t reg, int width=1)
2090  { return (chain_t*)((const block_chains_t *)this)->get_chain(reg, width); }
2091  void hexapi print(qstring *vout) const;
2092  const char *hexapi dstr(void) const;
2093  DEFINE_MEMORY_ALLOCATION_FUNCS()
2094 };
2095 
2097 {
2098  block_chains_t *parent; // parent of the current chain
2099  chain_visitor_t(void) : parent(NULL) {}
2100  virtual int idaapi visit_chain(int nblock, chain_t &ch) = 0;
2101 };
2102 
2103 class graph_chains_t : public qvector<block_chains_t> // chains of a graph
2104 {
2105  int lock;
2106 public:
2107  graph_chains_t(void) : lock(0) {}
2108  ~graph_chains_t(void) { QASSERT(50444, !lock); }
2109  int hexapi for_all_chains(chain_visitor_t &cv, int gca_flags);
2110 #define GCA_EMPTY 0x01 // include empty chains
2111 #define GCA_SPEC 0x02 // include chains for special registers
2112 #define GCA_ALLOC 0x04 // enumerate only allocated chains
2113 #define GCA_NALLOC 0x08 // enumerate only non-allocated chains
2114 #define GCA_OFIRST 0x10 // consider only chains of the first block
2115 #define GCA_OLAST 0x20 // consider only chains of the last block
2116  bool is_locked(void) const { return lock != 0; }
2117  void acquire(void) { lock++; }
2118  void hexapi release(void);
2119  void swap(graph_chains_t &r)
2120  {
2121  qvector<block_chains_t>::swap(r);
2122  std::swap(lock, r.lock);
2123  }
2124 };
2125 //-------------------------------------------------------------------------
2126 class minsn_t
2127 {
2128 public:
2129  mcode_t opcode;
2130  int iprops; // instruction properties (see IPROP_... constants)
2131  minsn_t *next; // double linked list. check also nexti(), previ()
2132  minsn_t *prev;
2133  ea_t ea;
2134  mop_t l; // left
2135  mop_t r; // right
2136  mop_t d; // destination
2137  // bits to be used in patterns:
2138 #define IPROP_OPTIONAL 0x0001 // optional instruction
2139 #define IPROP_PERSIST 0x0002 // persistent insn; they are not destroyed
2140 #define IPROP_WILDMATCH 0x0004 // match multiple insns
2141 
2142  // instruction attributes:
2143 #define IPROP_CLNPOP 0x0008 // the purpose of the instructions is to clean stack
2144  // (this flag may be set for pop ecx instruction)
2145 #define IPROP_FPINSN 0x0010 // floating point insn
2146 #define IPROP_FARCALL 0x0020 // call of a far function using push cs/call sequence
2147 #define IPROP_TAILCALL 0x0040 // tail call
2148 #define IPROP_ASSERT 0x0080 // assertion: usually mov #val, op
2149  // assertions help the decompiler with the operand values
2150  // ctree output is not generated for assertions
2151 
2152  // instruction history:
2153 #define IPROP_SPLIT 0x0700 // the instruction has been split:
2154 #define IPROP_SPLIT1 0x0100 // into 1 byte
2155 #define IPROP_SPLIT2 0x0200 // into 2 bytes
2156 #define IPROP_SPLIT4 0x0300 // into 4 bytes
2157 #define IPROP_SPLIT8 0x0400 // into 8 bytes
2158 #define IPROP_COMBINED 0x0800 // insn has been modified because of partial reference
2159 #define IPROP_EXTSTX 0x1000 // this is m_ext propagated into m_stx
2160 #define IPROP_IGNLOWSRC 0x2000 // low part of the instruction source operand
2161  // has been created artificially
2162  // (this bit is used only for 'and x, 80...')
2163 #define IPROP_INV_JX 0x4000 // inverted conditional jump
2164 #define IPROP_WAS_NORET 0x8000 // was noret icall
2165 #define IPROP_MULTI_MOV 0x10000 // the minsn was generated as part of insn that moves multiple registers
2166  // (example: STM on ARM may transfer multiple registers)
2167 
2168  // bits that can be set by plugins:
2169 #define IPROP_DONT_PROP 0x20000 // may not propagate
2170 #define IPROP_DONT_COMB 0x40000 // may not combine this instruction with others
2171 
2172  bool is_optional(void) const { return (iprops & IPROP_OPTIONAL) != 0; }
2173  bool is_combined(void) const { return (iprops & IPROP_COMBINED) != 0; }
2174  bool is_farcall(void) const { return (iprops & IPROP_FARCALL) != 0; }
2175  bool is_cleaning_pop(void) const { return (iprops & IPROP_CLNPOP) != 0; }
2176  bool is_extstx(void) const { return (iprops & IPROP_EXTSTX) != 0; }
2177  bool is_tailcall(void) const { return (iprops & IPROP_TAILCALL) != 0; }
2178  bool is_fpinsn(void) const { return (iprops & IPROP_FPINSN) != 0; }
2179  bool is_assert(void) const { return (iprops & IPROP_ASSERT) != 0; }
2180  bool is_persistent(void) const { return (iprops & IPROP_PERSIST) != 0; }
2181  bool is_wild_match(void) const { return (iprops & IPROP_WILDMATCH) != 0; }
2182  bool is_propagatable(void) const { return (iprops & IPROP_DONT_PROP) == 0; }
2183  bool is_ignlowsrc(void) const { return (iprops & IPROP_IGNLOWSRC) != 0; }
2184  bool is_inverted_jx(void) const { return (iprops & IPROP_INV_JX) != 0; }
2185  bool was_noret_icall(void) const { return (iprops & IPROP_WAS_NORET) != 0; }
2186  bool is_multimov(void) const { return (iprops & IPROP_MULTI_MOV) != 0; }
2187  bool is_combinable(void) const { return (iprops & IPROP_DONT_COMB) == 0; }
2188  bool was_split(void) const { return (iprops & IPROP_SPLIT) != 0; }
2189 
2190  void set_optional(void) { iprops |= IPROP_OPTIONAL; }
2191  void set_combined(void);
2192  void clr_combined(void) { iprops &= ~IPROP_COMBINED; }
2193  void set_farcall(void) { iprops |= IPROP_FARCALL; }
2194  void set_cleaning_pop(void) { iprops |= IPROP_CLNPOP; }
2195  void set_extstx(void) { iprops |= IPROP_EXTSTX; }
2196  void set_tailcall(void) { iprops |= IPROP_TAILCALL; }
2197  void clr_tailcall(void) { iprops &= ~IPROP_TAILCALL; }
2198  void set_fpinsn(void) { iprops |= IPROP_FPINSN; }
2199  void clr_fpinsn(void) { iprops &= ~IPROP_FPINSN; }
2200  void set_assert(void) { iprops |= IPROP_ASSERT; }
2201  void clr_assert(void) { iprops &= ~IPROP_ASSERT; }
2202  void set_persistent(void) { iprops |= IPROP_PERSIST; }
2203  void set_wild_match(void) { iprops |= IPROP_WILDMATCH; }
2204  void clr_propagatable(void) { iprops |= IPROP_DONT_PROP; }
2205  void set_ignlowsrc(void) { iprops |= IPROP_IGNLOWSRC; }
2206  void clr_ignlowsrc(void) { iprops &= ~IPROP_IGNLOWSRC; }
2207  void set_inverted_jx(void) { iprops |= IPROP_INV_JX; }
2208  void set_noret_icall(void) { iprops |= IPROP_WAS_NORET; }
2209  void clr_noret_icall(void) { iprops &= ~IPROP_WAS_NORET; }
2210  void set_multimov(void) { iprops |= IPROP_MULTI_MOV; }
2211  void clr_multimov(void) { iprops &= ~IPROP_MULTI_MOV; }
2212  void set_combinable(void) { iprops &= ~IPROP_DONT_COMB; }
2213  void clr_combinable(void) { iprops |= IPROP_DONT_COMB; }
2214  void set_split_size(int s)
2215  { // s may be only 1,2,4,8. other values are ignored
2216  iprops &= ~IPROP_SPLIT;
2217  iprops |= (s == 1 ? IPROP_SPLIT1
2218  : s == 2 ? IPROP_SPLIT2
2219  : s == 4 ? IPROP_SPLIT4
2220  : s == 8 ? IPROP_SPLIT8 : 0);
2221  }
2222  int get_split_size(void) const
2223  {
2224  int cnt = (iprops & IPROP_SPLIT) >> 8;
2225  return cnt == 0 ? 0 : 1 << (cnt-1);
2226  }
2227 
2228  minsn_t(ea_t _ea) { init(_ea); }
2229  minsn_t(const minsn_t &m) { next = prev = NULL; copy(m); }
2230  DEFINE_MEMORY_ALLOCATION_FUNCS()
2231  void swap(minsn_t &m) { qswap(*this, m); }
2232  void hexapi swap_nolist(minsn_t &m); // do not modify prev/next fields
2233  minsn_t &operator=(const minsn_t &m) { copy(m); return *this; }
2234  void hexapi init(ea_t _ea);
2235  void hexapi copy(const minsn_t &m);
2236  void hexapi setaddr(ea_t nea);
2237  int optimize_flat(int optflags=0) { return optimize_subtree(NULL, NULL, NULL, NULL, optflags); }
2238 #define OPTI_ADDREXPRS 0x0001 // optimize all address expressions (&x+N; &x-&y)
2239 #define OPTI_MINSTKREF 0x0002 // may update minstkref
2240  int hexapi optimize_subtree(
2241  mblock_t *blk,
2242  minsn_t *top,
2243  minsn_t *parent,
2244  minsn_t **converted_call,
2245  int optflags=OPTI_MINSTKREF);
2246 
2247  int hexapi for_src_ops(srcop_visitor_t &sv);
2248  int hexapi for_all_ops(mop_visitor_t &mv);
2249  int hexapi for_all_insns(minsn_visitor_t &mv);
2250  void hexapi _make_nop(void);
2251  bool hexapi is_between(const minsn_t *m1, const minsn_t *m2) const;
2252  bool is_after(const minsn_t *m) const { return m != NULL && is_between(m->next, NULL); }
2253  bool hexapi equal_insns(const minsn_t &m, int eqflags) const; // intelligent comparison
2254 #define EQ_IGNSIZE 0x0001 // ignore operand sizes
2255 #define EQ_IGNCODE 0x0002 // ignore instruction opcodes
2256 #define EQ_CMPDEST 0x0004 // compare instruction destinations
2257 #define EQ_OPTINSN 0x0008 // optimize mop_d operands
2258  bool contains_opcode(mcode_t mcode) const { return find_opcode(mcode) != NULL; }
2259  const minsn_t *find_opcode(mcode_t mcode) const { return (CONST_CAST(minsn_t*)(this))->find_opcode(mcode); }
2260  minsn_t *hexapi find_opcode(mcode_t mcode);
2261  const minsn_t *hexapi find_ins_op(const mop_t **other, mcode_t op=m_nop) const;
2262  const mop_t *hexapi find_num_op(const mop_t **other) const;
2263  mop_t *find_num_op(mop_t **other) { return CONST_CAST(mop_t*)((CONST_CAST(const minsn_t*)(this))->find_num_op((const mop_t**)other)); }
2264  minsn_t *find_ins_op(mop_t **other, mcode_t op=m_nop) { return CONST_CAST(minsn_t*)((CONST_CAST(const minsn_t*)(this))->find_ins_op((const mop_t**)other, op)); }
2265  // ops is combination of bits 1, 0x10, 0x100
2266  // 1-l, 0x10-r, 0x100-d
2267  bool change_insn_size(int new_size, int ops, side_effect_t sideff=WITH_SIDEFF);
2268  bool double_insn_size(int ops) { return change_insn_size(l.size*2, ops); }
2269  bool is_mov(void) const { return opcode == m_mov || (opcode == m_f2f && l.size == d.size); }
2270  void hexapi print(qstring *vout, int shins_flags=SHINS_SHORT|SHINS_VALNUM) const;
2271  const char *hexapi dstr(void) const;
2272  minsn_t *hexapi find_call(bool with_helpers=false) const;
2273  bool hexapi has_side_effects(bool include_ldx=false) const;
2274  bool hexapi is_helper(const char *name) const;
2275  bool hexapi is_noret_call(bool ignore_noret_icall=false);
2276 private:
2277 };
2278 
2279 /// Skip assertions forward
2280 const minsn_t *hexapi getf_reginsn(const minsn_t *ins);
2281 /// Skip assertions backward
2282 const minsn_t *hexapi getb_reginsn(const minsn_t *ins);
2283 inline minsn_t *getf_reginsn(minsn_t *ins) { return CONST_CAST(minsn_t*)(getf_reginsn(CONST_CAST(const minsn_t *)(ins))); }
2284 inline minsn_t *getb_reginsn(minsn_t *ins) { return CONST_CAST(minsn_t*)(getb_reginsn(CONST_CAST(const minsn_t *)(ins))); }
2285 
2286 //-------------------------------------------------------------------------
2287 enum vrcode_t // value range codes
2288 {
2289  VRC_FAILED = -1, // failed to determine the value, it can be anything
2290  VRC_OK = 0, // determined the range ok
2291  VRC_NONE = 1, // could not find any assignments
2292 };
2293 
2294 //-------------------------------------------------------------------------
2295 enum mblock_type_t
2296 {
2297  BLT_NONE = 0, // unknown block type
2298  BLT_STOP = 1, // stops execution (must be the last block)
2299  BLT_0WAY = 2, // does not have successors (e.g. tail is a noret function)
2300  BLT_1WAY = 3, // passes execution to one block
2301  BLT_2WAY = 4, // passes execution to two blocks
2302  BLT_NWAY = 5 // passes execution to many blocks
2303 };
2304 
2305 //-------------------------------------------------------------------------
2307 {
2308  friend class codegen_t;
2309  DECLARE_UNCOPYABLE(mblock_t)
2310 public:
2311  mblock_t *nextb; // next block in the chain
2312  mblock_t *prevb; // previous block in the chain
2313  uint32 flags;
2314 #define MBL_PRIV 0x0001 // private block - no instructions except
2315  // the specified are accepted (used in patterns)
2316 #define MBL_NONFAKE 0x0000 // regular block
2317 #define MBL_FAKE 0x0002 // fake block (after a tail call)
2318 #define MBL_GOTO 0x0004 // this block is a goto target
2319 #define MBL_TCAL 0x0008 // aritifical call block for tail calls
2320 #define MBL_PUSH 0x0010 // needs "convert push/pop instructions"
2321 #define MBL_DMT64 0x0020 // needs "demote 64bits"
2322 #define MBL_COMB 0x0040 // needs "combine" pass
2323 #define MBL_PROP 0x0080 // needs 'propagation' pass
2324 #define MBL_DEAD 0x0100 // needs "eliminate deads" pass
2325 #define MBL_LIST 0x0200 // use/def lists are ready
2326 #define MBL_INCONST 0x0400 // inconsistent lists: we are building them
2327 #define MBL_CALL 0x0800 // call information has been built
2328 #define MBL_BACKPROP 0x1000 // performed backprop_cc
2329 #define MBL_NORET 0x2000 // dead end block: doesn't return execution control
2330  ea_t start; // start address
2331  ea_t end; // end address
2332  minsn_t *head; // pointer to the first instruction
2333  minsn_t *tail; // pointer to the last instruction
2334  mbl_array_t *mba; // the parent array
2335  int serial; // number of the block in the function
2336  mblock_type_t type; // type of block (BLT_NONE - not computed yet)
2337 
2338  mlist_t dead_at_start; // data that is dead at the block entry
2339  mlist_t mustbuse; // data that must be used by the block
2340  mlist_t maybuse; // data that may be used by the block
2341  mlist_t mustbdef; // data that must be defined by the block
2342  mlist_t maybdef; // data that may be defined by the block
2343  rlist_t dnu; // defined but not used data
2344 
2345  sval_t maxbsp; // maximal sp value in the block (0...stacksize)
2346  sval_t minbstkref; // lowest stack location accessible with indirect
2347  // addressing (offset from stack bottom)
2348  // initially 0 (not computed)
2349  sval_t minbargref; // the same for arguments
2350 
2351  intvec_t predset; // predecessors
2352  intvec_t succset; // successors
2353  qstring label; // block label. used for blocks loaded by load_gdl
2354 
2355  void mark_lists_dirty(void) { flags &= ~MBL_LIST; request_propagation(); }
2356  void request_propagation(void) { flags |= MBL_PROP; }
2357  bool needs_propagation(void) const { return (flags & MBL_PROP) != 0; }
2358  void request_demote64(void) { flags |= MBL_DMT64; }
2359  bool lists_dirty(void) const { return (flags & MBL_LIST) == 0; }
2360  bool lists_ready(void) const { return (flags & (MBL_LIST|MBL_INCONST)) == MBL_LIST; }
2361  int make_lists_ready(void) // returns number of changes
2362  {
2363  if ( lists_ready() )
2364  return 0;
2365  return build_lists(false);
2366  }
2367 
2368  int npred(void) const { return predset.size(); } // number of xrefs to the block
2369  int nsucc(void) const { return succset.size(); } // number of xrefs from the block
2370  int pred(int n) const { return predset[n]; }
2371  int succ(int n) const { return succset[n]; }
2372  virtual ~mblock_t(void);
2373  DEFINE_MEMORY_ALLOCATION_FUNCS()
2374  minsn_t *hexapi insert_into_block(minsn_t *nm, minsn_t *om); // insert 'nm' after 'om'
2375  minsn_t *hexapi remove_from_block(minsn_t *m); // returns next minsn. does not destroy m!
2376  void hexapi print(vd_printer_t &vp) const;
2377  void hexapi dump(void) const; // see mbl_array_t::dump for info
2378  AS_PRINTF(2, 0) void hexapi vdump_block(const char *title, va_list va) const;
2379  void hexapi print_block_header(qstrvec_t *vec) const;
2380  int hexapi build_lists(bool kill_deads); // build def-use lists and eliminate deads
2381  // returns the number of eliminated registers
2382 #define MAXRANGE bitrange_t(0, USHRT_MAX)
2383  void hexapi append_use_list(
2384  mlist_t *lst,
2385  const mop_t &op,
2386  maymust_t maymust,
2387  bitrange_t mask=MAXRANGE) const;
2388 
2389  void hexapi append_def_list(mlist_t *lst, const mop_t &op, maymust_t maymust) const;
2390  mlist_t hexapi build_use_list(const minsn_t &ins, maymust_t maymust) const;
2391  mlist_t hexapi build_def_list(const minsn_t &ins, maymust_t maymust) const;
2392  int hexapi for_all_ops(mop_visitor_t &mv);
2393  int hexapi for_all_insns(minsn_visitor_t &mv);
2394  void make_nop(minsn_t *m) { m->_make_nop(); mark_lists_dirty(); }
2395  bool is_used(mlist_t *list, const minsn_t *i1, const minsn_t *i2, maymust_t maymust=MAY_ACCESS) const
2396  { return find_first_use(list, i1, i2, maymust) != NULL; }
2397  // find first insn using mlist_t in [i1, i2) range
2398  // upon return list will contain only regs not redefined by insns [i1..answer]
2399  const minsn_t *hexapi find_first_use(mlist_t *list, const minsn_t *i1, const minsn_t *i2, maymust_t maymust=MAY_ACCESS) const;
2400  minsn_t *find_first_use(mlist_t *list, minsn_t *i1, const minsn_t *i2, maymust_t maymust=MAY_ACCESS) const
2401  {
2402  return CONST_CAST(minsn_t*)(find_first_use(list,
2403  CONST_CAST(const minsn_t*)(i1),
2404  i2,
2405  maymust));
2406  }
2407  bool is_redefined(const mlist_t &list, const minsn_t *i1, const minsn_t *i2, maymust_t maymust=MAY_ACCESS) const
2408  { return find_redefinition(list, i1, i2, maymust) != NULL; }
2409  // is the mlist_t redefined in [i1,i2)?
2410  const minsn_t *hexapi find_redefinition(const mlist_t &list, const minsn_t *i1, const minsn_t *i2, maymust_t maymust=MAY_ACCESS) const;
2411  minsn_t *find_redefinition(const mlist_t &list, minsn_t *i1, const minsn_t *i2, maymust_t maymust=MAY_ACCESS) const
2412  {
2413  return CONST_CAST(minsn_t*)(find_redefinition(list,
2414  CONST_CAST(const minsn_t*)(i1),
2415  i2,
2416  maymust));
2417  }
2418  // is rhs of instruction redefined in [from, to)
2419  bool hexapi is_rhs_redefined(minsn_t *m, minsn_t *from, minsn_t *to);
2420 
2421  bool hexapi is_accessed(mlist_t *list, const minsn_t *i1, const minsn_t *i2, access_type_t access_type, maymust_t maymust=MAY_ACCESS) const;
2422  // visit all using operands
2423  // this function modified list: removes redefined parts from it
2424  int hexapi for_all_uses(mlist_t *list, minsn_t *i1, minsn_t *i2, struct mlist_mop_visitor_t &mmv);
2425 
2426  /// Find the instruction that accesses the specified operand.
2427  /// This function searches inside the current basic block.
2428  /// \param op operand to search for
2429  /// \param parent ptr to ptr to a top level instruction.
2430  /// denotes the beginning of the search range.
2431  /// \param mend end of the search range (excluded). NULL means to search
2432  /// until the block boundary.
2433  /// \fdflags combination of FD_... bits
2434  /// \return the instruction that accesses the operand. this instruction
2435  /// may be a sub-instruction. to find out the top level
2436  /// instruction, check out *parent.
2437  /// NULL means 'not found'.
2438  minsn_t *hexapi find_access(const mop_t &op, minsn_t **parent, const minsn_t *mend, int fdflags) const;
2439 #define FD_BACKWARD 0x0000 // search direction
2440 #define FD_FORWARD 0x0001 // search direction
2441 #define FD_USE 0x0000 // look for use
2442 #define FD_DEF 0x0002 // look for definition
2443 #define FD_DIRTY 0x0004 // ignore possible implicit definitions
2444  // by function calls and indirect memory access
2445  // (not implemented yet)
2446  minsn_t *find_def(const mop_t &op, minsn_t **parent, const minsn_t *mend, int fdflags) { return find_access(op, parent, mend, fdflags|FD_DEF); }
2447  minsn_t *find_use(const mop_t &op, minsn_t **parent, const minsn_t *mend, int fdflags) { return find_access(op, parent, mend, fdflags|FD_USE); }
2448  vrcode_t hexapi find_value_of(ivlset_t *vals, minsn_t *top, const mop_t &op);
2449 };
2450 //-------------------------------------------------------------------------
2451 // Warning ids
2452 enum warnid_t
2453 {
2454  WARN_VARARG_REGS, // 0 can not handle register arguments in vararg function, discarded them
2455  WARN_ILL_PURGED, // 1 odd caller purged bytes %d, correcting
2456  WARN_ILL_FUNCTYPE, // 2 invalid function type has been ignored
2457  WARN_VARARG_TCAL, // 3 can not handle tail call to vararg
2458  WARN_VARARG_NOSTK, // 4 call vararg without local stack
2459  WARN_VARARG_MANY, // 5 too many varargs, some ignored
2460  WARN_ADDR_OUTARGS, // 6 can not handle address arithmetics in outgoing argument area of stack frame -- unused
2461  WARN_DEP_UNK_CALLS, // 7 found interdependent unknown calls
2462  WARN_ILL_ELLIPSIS, // 8 erroneously detected ellipsis type has been ignored
2463  WARN_GUESSED_TYPE, // 9 using guessed type %s;
2464  WARN_EXP_LINVAR, // 10 failed to expand a linear variable
2465  WARN_WIDEN_CHAINS, // 11 failed to widen chains
2466  WARN_BAD_PURGED, // 12 inconsistent function type and number of purged bytes
2467  WARN_CBUILD_LOOPS, // 13 too many cbuild loops
2468  WARN_NO_SAVE_REST, // 14 could not find valid save-restore pair for %s
2469  WARN_ODD_INPUT_REG, // 15 odd input register %s
2470  WARN_ODD_ADDR_USE, // 16 odd use of a variable address
2471  WARN_MUST_RET_FP, // 17 function return type is incorrect (must be floating point)
2472  WARN_ILL_FPU_STACK, // 18 inconsistent fpu stack
2473  WARN_SELFREF_PROP, // 19 self-referencing variable has been detected
2474  WARN_WOULD_OVERLAP, // 20 variables would overlap: %s
2475  WARN_ARRAY_INARG, // 21 array has been used for an input argument
2476  WARN_MAX_ARGS, // 22 too many input arguments, some ignored
2477  WARN_BAD_FIELD_TYPE,// 23 incorrect structure member type for %s::%s, ignored
2478  WARN_WRITE_CONST, // 24 write access to const memory at %a has been detected
2479  WARN_BAD_RETVAR, // 25 wrong return variable
2480  WARN_FRAG_LVAR, // 26 fragmented variable at %s may be wrong
2481  WARN_HUGE_STKOFF, // 27 exceedingly huge offset into the stack frame
2482  WARN_UNINITED_REG, // 28 reference to an uninitialized register has been removed: %s
2483  WARN_FIXED_MACRO, // 29 fixed broken macro-insn
2484  WARN_WRONG_VA_OFF, // 30 wrong offset of va_list variable
2485  WARN_CR_NOFIELD, // 31 CONTAINING_RECORD: no field '%s' in struct '%s' at %d
2486  WARN_CR_BADOFF, // 32 CONTAINING_RECORD: too small offset %d for struct '%s'
2487  WARN_BAD_STROFF, // 33 user specified stroff has not been processed: %s
2488  WARN_BAD_VARSIZE, // 34 inconsistent variable size for '%s'
2489  WARN_UNSUPP_REG, // 35 unsupported processor register '%s'
2490  WARN_UNALIGNED_ARG, // 36 unaligned function argument '%s'
2491  WARN_BAD_STD_TYPE, // 37 corrupted or unexisting local type '%s'
2492  WARN_BAD_CALL_SP, // 38 bad sp value at call
2493  WARN_MISSED_SWITCH, // 39 wrong markup of switch jump, skipped it
2494  WARN_BAD_SP, // 40 positive sp value %a has been found
2495  WARN_BAD_STKPNT, // 41 wrong sp change point
2496 
2497  WARN_MAX,
2498 };
2499 
2500 // Warnings
2502 {
2503  ea_t ea;
2504  warnid_t id;
2505  qstring text;
2507  {
2508  if ( ea < r.ea )
2509  return -1;
2510  if ( ea > r.ea )
2511  return 1;
2512  if ( id < r.id )
2513  return -1;
2514  if ( id > r.id )
2515  return 1;
2516  return strcmp(text.c_str(), r.text.c_str());
2517  }
2518 };
2519 DECLARE_TYPE_AS_MOVABLE(hexwarn_t);
2520 typedef qvector<hexwarn_t> hexwarns_t;
2521 
2522 //-------------------------------------------------------------------------
2523 /// Microcode maturity levels
2525 {
2526  MMAT_ZERO, ///< microcode does not exist
2527  MMAT_GENERATED, ///< generated microcode
2528  MMAT_PREOPTIMIZED, ///< preoptimized pass is complete
2529  MMAT_LOCOPT, ///< local optimization of each basic block is complete
2530  MMAT_CALLS, ///< detected call arguments
2531  MMAT_GLBOPT1, ///< performed the first pass of global optimization
2532  MMAT_GLBOPT2, ///< most global optimization passes are done
2533  MMAT_GLBOPT3, ///< completed all global optimization
2534  MMAT_LVARS, ///< allocated local variables
2535 };
2536 
2537 //-------------------------------------------------------------------------
2538 /// Ranges to decompile. Either a function, either explicit vector of ranges.
2540 {
2541  func_t *pfn; ///< function to decompile
2542  rangevec_t ranges; ///< empty ? snippet mode : function mode
2543  mba_ranges_t(func_t *_pfn=NULL) : pfn(_pfn) {}
2544  mba_ranges_t(const rangevec_t &r) : pfn(NULL), ranges(r) {}
2545  ea_t start(void) const { return (ranges.empty() ? *pfn : ranges[0]).start_ea; }
2546  bool empty(void) const { return pfn == NULL && ranges.empty(); }
2547  void clear(void) { pfn = NULL; ranges.clear(); }
2548  bool is_snippet(void) const { return !ranges.empty(); }
2549  bool range_contains(ea_t ea) const;
2550  bool is_fragmented(void) const { return ranges.empty() ? pfn->tailqty > 0 : ranges.size() > 1; }
2551 };
2552 
2553 /// Item iterator of arbitrary rangevec items
2555 {
2556  const rangevec_t *ranges;
2557  const range_t *rptr; // pointer into ranges
2558  ea_t cur; // current address
2559  range_item_iterator_t(void) : ranges(NULL), rptr(NULL), cur(BADADDR) {}
2560  bool set(const rangevec_t &r);
2561  bool next_code(void);
2562  ea_t current(void) const { return cur; }
2563 };
2564 
2565 /// Item iterator for mba_ranges_t
2567 {
2569  func_item_iterator_t fii; // this is used if rii.ranges==NULL
2570  bool is_snippet(void) const { return rii.ranges != NULL; }
2571  bool set(const mba_ranges_t &mbr)
2572  {
2573  if ( mbr.is_snippet() )
2574  return rii.set(mbr.ranges);
2575  else
2576  return fii.set(mbr.pfn);
2577  }
2578  bool next_code(void)
2579  {
2580  if ( is_snippet() )
2581  return rii.next_code();
2582  else
2583  return fii.next_code();
2584  }
2585  ea_t current(void) const
2586  {
2587  return is_snippet() ? rii.current() : fii.current();
2588  }
2589 };
2590 
2591 /// Chunk iterator of arbitrary rangevec items
2593 {
2594  const range_t *rptr; // pointer into ranges
2595  const range_t *rend;
2596  range_chunk_iterator_t(void) : rptr(NULL), rend(NULL) {}
2597  bool set(const rangevec_t &r) { rptr = r.begin(); rend = r.end(); return rptr != rend; }
2598  bool next(void) { return ++rptr != rend; }
2599  const range_t &chunk(void) const { return *rptr; }
2600 };
2601 
2602 /// Chunk iterator for mba_ranges_t
2604 {
2606  func_tail_iterator_t fii; // this is used if rii.rptr==NULL
2607  bool is_snippet(void) const { return rii.rptr != NULL; }
2608  bool set(const mba_ranges_t &mbr)
2609  {
2610  if ( mbr.is_snippet() )
2611  return rii.set(mbr.ranges);
2612  else
2613  return fii.set(mbr.pfn);
2614  }
2615  bool next(void)
2616  {
2617  if ( is_snippet() )
2618  return rii.next();
2619  else
2620  return fii.next();
2621  }
2622  const range_t &chunk(void) const
2623  {
2624  return is_snippet() ? rii.chunk() : fii.chunk();
2625  }
2626 };
2627 
2628 //-------------------------------------------------------------------------
2629 /// Array of micro blocks represents the currently decompiled function.
2630 /// The first micro block is the entry point, the last one if the exit point.
2631 /// The entry and exit blocks are always empty. The exit block is generated
2632 /// at MMAT_LOCOPT maturity level.
2633 class mbl_array_t
2634 {
2635  DECLARE_UNCOPYABLE(mbl_array_t)
2636  uint32 flags;
2637  uint32 flags2;
2638 public:
2639 /*
2640  +-----------+ <- inargtop
2641  | prmN |
2642  | ... | <- minargref
2643  | prm0 |
2644  +-----------+ <- inargoff
2645  |shadow_args|
2646  +-----------+
2647  | retaddr |
2648  frsize+frregs +-----------+ <- initial esp |
2649  | frregs | |
2650  +frsize +-----------+ <- typical ebp |
2651  | | | |
2652  | | | fpd |
2653  | | | |
2654  | frsize | <- current ebp |
2655  | | |
2656  | | |
2657  | | | stacksize
2658  | | |
2659  | | |
2660  | | <- minstkref |
2661  stkvar base off 0 +---.. | | | current
2662  | | | | stack
2663  | | | | pointer
2664  | | | | range
2665  | tmpstk | | | (what getspd() returns)
2666  | | | |
2667  | | | |
2668  +-----------+ <- minimal sp | | offset 0 for the decompiler (vd)
2669 */
2670 
2671  // convert a stack offset used in vd to a stack offset used in ida stack frame
2672  sval_t stkoff_vd2ida(sval_t off) const
2673  {
2674  return off - tmpstk_size;
2675  }
2676  // convert a ida stack frame offset to a stack offset used in vd
2677  sval_t stkoff_ida2vd(sval_t off) const
2678  {
2679  return off + tmpstk_size;
2680  }
2681  sval_t argbase() const
2682  {
2683  return retsize + stacksize;
2684  }
2685  static vdloc_t idaloc2vd(const argloc_t &loc, int width, sval_t spd);
2686  vdloc_t idaloc2vd(const argloc_t &loc, int width) const
2687  {
2688  return idaloc2vd(loc, width, argbase());
2689  }
2690  // helper for mvm.get_func_output_regs
2691  static vdloc_t idaloc2vd(const mbl_array_t *mba, const argloc_t &loc, int width)
2692  {
2693  return mbl_array_t::idaloc2vd(loc, width, mba == NULL ? 0 : mba->argbase());
2694  }
2695 
2696  static argloc_t vd2idaloc(const vdloc_t &loc, int width, sval_t spd);
2697  argloc_t vd2idaloc(const vdloc_t &loc, int width) const
2698  {
2699  return vd2idaloc(loc, width, argbase());
2700  }
2701 
2702  bool is_stkarg(const lvar_t &v) const
2703  {
2704  return v.location.is_stkoff() && v.location.stkoff() >= inargoff;
2705  }
2706  member_t *get_stkvar(sval_t vd_stkoff, uval_t *poff) const;
2707  // get lvar location
2708  argloc_t get_ida_argloc(const lvar_t &v) const
2709  {
2710  return vd2idaloc(v.location, v.width);
2711  }
2712  mba_ranges_t mbr;
2713  ea_t entry_ea;
2714  ea_t last_prolog_ea;
2715  ea_t first_epilog_ea;
2716  int qty; // number of basic blocks
2717  int npurged; // -1 - unknown
2718  cm_t cc; // calling convention
2719  sval_t tmpstk_size; // size of the temporary stack part
2720  // (which dynamically changes with push/pops)
2721  sval_t frsize; // size of local stkvars range in the stack frame
2722  sval_t frregs; // size of saved registers range in the stack frame
2723  sval_t fpd; // frame pointer delta
2724  int pfn_flags; // copy of func_t::flags
2725  int retsize; // size of return address in the stack frame
2726  int shadow_args; // size of shadow argument area
2727  sval_t fullsize; // Full stack size including incoming args
2728  sval_t stacksize; // The maximal size of the function stack including
2729  // bytes allocated for outgoing call arguments
2730  // (up to retaddr)
2731  sval_t inargoff; // offset of the first stack argument
2732  sval_t minstkref; // The lowest stack location whose address was taken
2733  ea_t minstkref_ea; // address with lowest minstkref (for debugging)
2734  sval_t minargref; // The lowest stack argument location whose address was taken
2735  // This location and locations above it can be aliased
2736  // It controls locations >= inargoff-shadow_args
2737  ivl_t aliased_vars; // Aliased stkvar locations
2738  ivl_t aliased_args; // Aliased stkarg locations
2739  ivlset_t gotoff_stkvars; // stkvars that hold .got offsets. considered to be unaliasable
2740  ivlset_t restricted_memory;
2741  ivlset_t aliased_memory; // aliased_memory+restricted_memory=ALLMEM
2742  mlist_t nodel_memory; // global dead elimination may not delete references to this area
2743  rlist_t consumed_argregs; // registers converted into stack arguments, should not be used as arguments
2744 
2745  mba_maturity_t maturity; // current maturity level
2746  mba_maturity_t reqmat; // required maturity level
2747 
2748  bool final_type; // is the function type final? (specified by the user)
2749  tinfo_t idb_type;
2750  reginfovec_t idb_spoiled; // MBL_SPLINFO && final_type: info in ida format
2751  mlist_t spoiled_list; // MBL_SPLINFO && !final_type: info in vd format
2752  int fti_flags; // FTI_... constants for the current function
2753 
2754  netnode idb_node;
2755 #define NALT_VD 2 // this index is not used by ida
2756 
2757  qstring label; // name of the function or pattern (colored)
2758  lvars_t vars; // local variables
2759  intvec_t argidx; // input arguments (indexes into 'vars')
2760  int retvaridx; // index of variable holding the return value
2761  // -1 means none
2762 
2763  ea_t error_ea; // during microcode generation holds ins.ea
2764  qstring error_strarg;
2765 
2766  mblock_t *blocks; // double linked list of blocks
2767  mblock_t **natural; // natural order of blocks
2768 
2769  mutable hexwarns_t notes;
2770  mutable uchar occurred_warns[32]; // occurred warning messages
2771  // (even disabled warnings are taken into account)
2772  bool write_to_const_detected(void) const
2773  {
2774  return test_bit(occurred_warns, WARN_WRITE_CONST);
2775  }
2776  bool bad_call_sp_detected(void) const
2777  {
2778  return test_bit(occurred_warns, WARN_BAD_CALL_SP);
2779  }
2780  bool regargs_is_not_aligned(void) const
2781  {
2782  return test_bit(occurred_warns, WARN_UNALIGNED_ARG);
2783  }
2784  bool has_bad_sp(void) const
2785  {
2786  return test_bit(occurred_warns, WARN_BAD_SP);
2787  }
2788 
2789  // the exact size of this class is not documented, they may be more fields
2790  char reserved[];
2791  mbl_array_t(void);
2792  ~mbl_array_t(void) { term(); }
2793  DEFINE_MEMORY_ALLOCATION_FUNCS()
2794  void hexapi term(void);
2795  void clear(void);
2796  func_t *get_curfunc(void) const { return mbr.pfn; }
2797  bool use_frame(void) const { return mbr.pfn != NULL; }
2798  bool range_contains(ea_t ea) const { return mbr.range_contains(ea); }
2799  /// Optimize each basic block locally
2800  /// \return number of changes. 0 means nothing changed
2801  int hexapi optimize_local(int todo); // locopt_all_blocks + refine_return_type
2802 #define LOCOPT_ALL 0x0001 // redo optimization for all blocks. if this bit
2803  // is not set, only dirty blocks will be optimized
2804 #define LOCOPT_REFINE 0x0002 // refine return type, ok to fail
2805 #define LOCOPT_REFINE2 0x0004 // refine return type, try harder
2806  /// Build control flow graph
2807  /// This function may be called only once. It calculates the type of each
2808  /// basic block and the adjacency list.
2809  /// \return error code
2810  merror_t hexapi build_graph(void);
2811  /// Get control graph
2812  mbl_graph_t *hexapi get_graph(void);
2813  /// Analyze calls and determine calling conventions
2814  /// \param acflags permitted actions that are necessary for successful detection
2815  /// of calling conventions.
2816  /// \return number of calls. -1 means error.
2817  int hexapi analyze_calls(int acflags);
2818 #define ACFL_LOCOPT 0x01 /// perform local propagation (requires ACFL_BLKOPT)
2819 #define ACFL_BLKOPT 0x02 /// perform interblock transformations
2820 #define ACFL_GLBPROP 0x04 /// perform global propagation
2821 #define ACFL_GLBDEL 0x08 /// perform dead code eliminition
2822 #define ACFL_GUESS 0x10 /// may guess calling conventions
2823  /// Optimize microcode globally
2824  /// This function applies various optimization methods until we reach the
2825  /// fixed point. At all preallocates lvars unless reqmat forbids it.
2826  /// \return error code
2827  merror_t hexapi optimize_global(void);
2828  /// Allocate local variables.
2829  /// Must be called only immediately after optimize_global(). Converts registers,
2830  /// stack variables, and similar operands into mop_l. This call will not fail
2831  /// because all necessary checks were performed in optimize_global().
2832  /// After this call the microcode reaches its final state. However, data
2833  /// dependency are lost after lvar allocation.
2834  void hexapi alloc_lvars(void);
2835  /// Dump microcode to a file
2836  /// The file will be created in the directory pointed by IDA_DUMPDIR envvar.
2837  /// Dump will be created only if IDA is run under debugger.
2838  void hexapi dump(void) const;
2839  const mblock_t *get_mblock(int n) const { return natural[n]; }
2840  mblock_t *get_mblock(int n) { return CONST_CAST(mblock_t*)((CONST_CAST(const mbl_array_t *)(this))->get_mblock(n)); }
2841  int hexapi for_all_ops(mop_visitor_t &mv);
2842  int hexapi for_all_insns(minsn_visitor_t &mv);
2843  int hexapi for_all_topinsns(minsn_visitor_t &mv);
2844  bool hexapi remove_empty_blocks(void);
2845  bool hexapi combine_blocks(void);
2846  AS_PRINTF(3, 0) void hexapi vdump_mba(bool _verify, const char *title, va_list va) const;
2847  void hexapi print(vd_printer_t &vp) const;
2848  void hexapi serialize(bytevec_t &b) const; // See also deserialize_mbl_array()
2849  void hexapi verify(bool always) const;
2850  void hexapi make_chains_dirty(void);
2851  lvar_t &hexapi arg(int n);
2852  const lvar_t &arg(int n) const { return CONST_CAST(mbl_array_t*)(this)->arg(n); }
2853 };
2854 //-------------------------------------------------------------------------
2855 // convenience class to release graph chains automatically
2857 {
2858  graph_chains_t *gc;
2859  chain_keeper_t &operator=(const chain_keeper_t &); // not defined
2860 public:
2861  chain_keeper_t(graph_chains_t *_gc) : gc(_gc) { QASSERT(50446, gc != NULL); gc->acquire(); }
2862  ~chain_keeper_t(void)
2863  {
2864  gc->release();
2865  }
2866  block_chains_t &operator[](size_t idx) { return (*gc)[idx]; }
2867  block_chains_t &front(void) { return gc->front(); }
2868  block_chains_t &back(void) { return gc->back(); }
2869  operator graph_chains_t &(void) { return *gc; }
2870  int for_all_chains(chain_visitor_t &cv, int gca) { return gc->for_all_chains(cv, gca); }
2871  DEFINE_MEMORY_ALLOCATION_FUNCS()
2872 };
2873 
2874 //-------------------------------------------------------------------------
2875 // kind of du-ud chains to calculate
2876 enum gctype_t
2877 {
2878  GC_ONLY_REGS, // only registers
2879  GC_REGS_AND_STKVARS, // registers and stkvars
2880  GC_ASR, // all the above and assertions
2881  GC_XDSU, // only registers calculated with FULL_XDSU
2882  GC_END, // number of chain types
2883  GC_DIRTY_ALL = (1 << (2*GC_END))-1, // bitmask to represent all chains
2884 };
2885 
2886 #ifndef SWIG
2888 {
2889  mbl_array_t *mba;
2890  int dirty;
2891  int chain_stamp; // we increment this counter each time chains are recalculated
2892  graph_chains_t gcs[2*GC_END];
2893 
2894  bool hexapi is_accessed_globally(
2895  const mlist_t &list, // list to verify
2896  int b1, // starting block
2897  int b2, // ending block
2898  const minsn_t *m1, // starting instruction (in b1)
2899  const minsn_t *m2, // ending instruction (in b2)
2900  access_type_t access_type,
2901  maymust_t maymust) const;
2902 
2903  int get_ud_gc_idx(gctype_t gctype) const { return (gctype << 1); }
2904  int get_du_gc_idx(gctype_t gctype) const { return (gctype << 1)+1; }
2905  int get_ud_dirty_bit(gctype_t gctype) { return 1 << get_ud_gc_idx(gctype); }
2906  int get_du_dirty_bit(gctype_t gctype) { return 1 << get_du_gc_idx(gctype); }
2907 
2908 public:
2909  void make_chains_dirty(void) { dirty = GC_DIRTY_ALL; }
2910  bool is_ud_chain_dirty(gctype_t gctype)
2911  {
2912  int bit = get_ud_dirty_bit(gctype);
2913  return (dirty & bit) != 0;
2914  }
2915  bool is_du_chain_dirty(gctype_t gctype)
2916  {
2917  int bit = get_du_dirty_bit(gctype);
2918  return (dirty & bit) != 0;
2919  }
2920  int get_chain_stamp(void) const { return chain_stamp; }
2921 
2922  graph_chains_t *hexapi get_ud(gctype_t gctype);
2923  graph_chains_t *hexapi get_du(gctype_t gctype);
2924  // is the list redefined/used in the graph?
2925  // b2 may be = -1 and m2 = NULL. This means to follow all paths
2926  bool is_redefined_globally(const mlist_t &list, int b1, int b2, const minsn_t *m1, const minsn_t *m2, maymust_t maymust=MAY_ACCESS) const
2927  { return is_accessed_globally(list, b1, b2, m1, m2, WRITE_ACCESS, maymust); }
2928  bool is_used_globally(const mlist_t &list, int b1, int b2, const minsn_t *m1, const minsn_t *m2, maymust_t maymust=MAY_ACCESS) const
2929  { return is_accessed_globally(list, b1, b2, m1, m2, READ_ACCESS, maymust); }
2930  mblock_t *get_mblock(int n) const { return mba->get_mblock(n); }
2931  void compute_stkvar_chains(
2932  graph_chains_t &du,
2933  graph_chains_t &ud,
2934  int flags,
2935  block_chains_t *p_reachcall,
2936  int reachcall_block) const;
2937  void compute_stkvar_ud(graph_chains_t *sud, int chcalc_flags=0) const;
2938  void compute_stkvar_du(graph_chains_t *sdu, int chcalc_flags=0) const;
2939 };
2940 #endif
2941 
2942 //-------------------------------------------------------------------------
2943 // helper class to generate the initial microcode
2945 {
2946 public:
2947  mbl_array_t *mba;
2948  mblock_t *mb;
2949  insn_t insn;
2950  char ignore_micro;
2951 
2953  : mba(m), mb(NULL), ignore_micro(IM_NONE) {}
2954  virtual ~codegen_t(void)
2955  {
2956  }
2957 
2958  // Analyze prolog/epilog of the function to decompile
2959  // If found, allocate and fill 'mba->pi' structure.
2960  virtual merror_t idaapi analyze_prolog(
2961  const class qflow_chart_t &fc,
2962  const class bitset_t &reachable) = 0;
2963 
2964  // Generate microcode for one instruction
2965  virtual merror_t idaapi gen_micro() = 0;
2966 
2967  // Generate microcode to load one operand
2968  virtual mreg_t idaapi load_operand(int opnum) = 0;
2969 };
2970 
2971 //-------------------------------------------------------------------------
2972 /// Get decompiler version.
2973 /// The returned string is of the form <major>.<minor>.<revision>.<build-date>
2974 /// \return pointer to version string. For example: "2.0.0.140605"
2975 
2976 const char *hexapi get_hexrays_version(void);
2977 
2978 
2979 /// Open pseudocode window.
2980 /// The specified function is decompiled and the pseudocode window is opened.
2981 /// \param ea function to decompile
2982 /// \param new_window 0:reuse existing window; 1:open new window;
2983 /// -1: reuse existing window if the current view is pseudocode
2984 /// \return false if failed
2985 
2986 vdui_t *hexapi open_pseudocode(ea_t ea, int new_window);
2987 
2988 
2989 /// Close pseudocode window.
2990 /// \param f pointer to window
2991 /// \return false if failed
2992 
2994 
2995 
2996 /// Get the vdui_t instance associated to the TWidget
2997 /// \param f pointer to window
2998 /// \return a vdui_t *, or NULL
2999 
3001 
3002 
3003 /// \defgroup VDRUN_ Batch decompilation bits
3004 //@{
3005 #define VDRUN_NEWFILE 0x0000 ///< Create a new file or overwrite existing file
3006 #define VDRUN_APPEND 0x0001 ///< Create a new file or append to existing file
3007 #define VDRUN_ONLYNEW 0x0002 ///< Fail if output file already exists
3008 #define VDRUN_SILENT 0x0004 ///< Silent decompilation
3009 #define VDRUN_SENDIDB 0x0008 ///< Send problematic databases to hex-rays.com
3010 #define VDRUN_MAYSTOP 0x0010 ///< the user can cancel decompilation
3011 #define VDRUN_CMDLINE 0x0020 ///< called from ida's command line
3012 #define VDRUN_STATS 0x0040 ///< print statistics into vd_stats.txt
3013 //@}
3014 
3015 /// Batch decompilation.
3016 /// Decompile all or the specified functions
3017 /// \return true if no internal error occurred and the user has not cancelled decompilation
3018 /// \param outfile name of the output file
3019 /// \param funcaddrs list of functions to decompile.
3020 /// If NULL or empty, then decompile all nonlib functions
3021 /// \param flags \ref VDRUN_
3022 
3023 bool hexapi decompile_many(const char *outfile, eavec_t *funcaddrs, int flags);
3024 
3025 
3026 /// Exception object: decompiler failure information
3028 {
3029  merror_t code; ///< \ref MERR_
3030  ea_t errea; ///< associated address
3031  qstring str; ///< string information
3032  hexrays_failure_t(void) : code(MERR_OK), errea(BADADDR) {}
3033  hexrays_failure_t(merror_t c, ea_t ea, const char *buf=NULL) : code(c), errea(ea), str(buf) {}
3034  hexrays_failure_t(merror_t c, ea_t ea, const qstring &buf) : code(c), errea(ea), str(buf) {}
3035  qstring hexapi desc(void) const;
3036  DEFINE_MEMORY_ALLOCATION_FUNCS()
3037 };
3038 
3039 /// Exception object: decompiler exception
3040 struct vd_failure_t : public std::exception
3041 {
3042  hexrays_failure_t hf;
3043  vd_failure_t(void) {}
3044  vd_failure_t(merror_t code, ea_t ea, const char *buf=NULL) : hf(code, ea, buf) {}
3045  vd_failure_t(merror_t code, ea_t ea, const qstring &buf) : hf(code, ea, buf) {}
3046  vd_failure_t(const hexrays_failure_t &_hf) : hf(_hf) {}
3047  qstring desc(void) const { return hf.desc(); }
3048 #ifdef __GNUC__
3049  ~vd_failure_t(void) throw() {}
3050 #endif
3051  DEFINE_MEMORY_ALLOCATION_FUNCS()
3052 };
3053 
3054 /// Exception object: decompiler internal error
3055 struct vd_interr_t : public vd_failure_t
3056 {
3057  vd_interr_t(ea_t ea, const qstring &buf) : vd_failure_t(MERR_INTERR, ea, buf) {}
3058  vd_interr_t(ea_t ea, const char *buf) : vd_failure_t(MERR_INTERR, ea, buf) {}
3059 };
3060 
3061 /// Send the database to Hex-Rays.
3062 /// This function sends the current database to the hex-rays server.
3063 /// The database is sent in the compressed form over an encrypted (SSL) connection.
3064 /// \param err failure description object. Empty hexrays_failure_t object can be used if error information is not available.
3065 /// \param silent if false, a dialog box will be displayed before sending the database.
3066 
3067 void hexapi send_database(const hexrays_failure_t &err, bool silent);
3068 
3069 const char *hexapi dstr(const tinfo_t *tif);
3070 mbl_array_t *hexapi deserialize_mbl_array(const uchar *bytes, size_t nbytes);
3071 void hexapi remitem(const citem_t *e);
3072 //-------------------------------------------------------------------------
3073 /// Ctree element type. At the beginning of this list there are expression
3074 /// elements (cot_...), followed by statement elements (cit_...).
3076 {
3077  cot_empty = 0,
3078  cot_comma = 1, ///< x, y
3079  cot_asg = 2, ///< x = y
3080  cot_asgbor = 3, ///< x |= y
3081  cot_asgxor = 4, ///< x ^= y
3082  cot_asgband = 5, ///< x &= y
3083  cot_asgadd = 6, ///< x += y
3084  cot_asgsub = 7, ///< x -= y
3085  cot_asgmul = 8, ///< x *= y
3086  cot_asgsshr = 9, ///< x >>= y signed
3087  cot_asgushr = 10, ///< x >>= y unsigned
3088  cot_asgshl = 11, ///< x <<= y
3089  cot_asgsdiv = 12, ///< x /= y signed
3090  cot_asgudiv = 13, ///< x /= y unsigned
3091  cot_asgsmod = 14, ///< x %= y signed
3092  cot_asgumod = 15, ///< x %= y unsigned
3093  cot_tern = 16, ///< x ? y : z
3094  cot_lor = 17, ///< x || y
3095  cot_land = 18, ///< x && y
3096  cot_bor = 19, ///< x | y
3097  cot_xor = 20, ///< x ^ y
3098  cot_band = 21, ///< x & y
3099  cot_eq = 22, ///< x == y int or fpu (see EXFL_FPOP)
3100  cot_ne = 23, ///< x != y int or fpu (see EXFL_FPOP)
3101  cot_sge = 24, ///< x >= y signed or fpu (see EXFL_FPOP)
3102  cot_uge = 25, ///< x >= y unsigned
3103  cot_sle = 26, ///< x <= y signed or fpu (see EXFL_FPOP)
3104  cot_ule = 27, ///< x <= y unsigned
3105  cot_sgt = 28, ///< x > y signed or fpu (see EXFL_FPOP)
3106  cot_ugt = 29, ///< x > y unsigned
3107  cot_slt = 30, ///< x < y signed or fpu (see EXFL_FPOP)
3108  cot_ult = 31, ///< x < y unsigned
3109  cot_sshr = 32, ///< x >> y signed
3110  cot_ushr = 33, ///< x >> y unsigned
3111  cot_shl = 34, ///< x << y
3112  cot_add = 35, ///< x + y
3113  cot_sub = 36, ///< x - y
3114  cot_mul = 37, ///< x * y
3115  cot_sdiv = 38, ///< x / y signed
3116  cot_udiv = 39, ///< x / y unsigned
3117  cot_smod = 40, ///< x % y signed
3118  cot_umod = 41, ///< x % y unsigned
3119  cot_fadd = 42, ///< x + y fp
3120  cot_fsub = 43, ///< x - y fp
3121  cot_fmul = 44, ///< x * y fp
3122  cot_fdiv = 45, ///< x / y fp
3123  cot_fneg = 46, ///< -x fp
3124  cot_neg = 47, ///< -x
3125  cot_cast = 48, ///< (type)x
3126  cot_lnot = 49, ///< !x
3127  cot_bnot = 50, ///< ~x
3128  cot_ptr = 51, ///< *x, access size in 'ptrsize'
3129  cot_ref = 52, ///< &x
3130  cot_postinc = 53, ///< x++
3131  cot_postdec = 54, ///< x--
3132  cot_preinc = 55, ///< ++x
3133  cot_predec = 56, ///< --x
3134  cot_call = 57, ///< x(...)
3135  cot_idx = 58, ///< x[y]
3136  cot_memref = 59, ///< x.m
3137  cot_memptr = 60, ///< x->m, access size in 'ptrsize'
3138  cot_num = 61, ///< n
3139  cot_fnum = 62, ///< fpc
3140  cot_str = 63, ///< string constant
3141  cot_obj = 64, ///< obj_ea
3142  cot_var = 65, ///< v
3143  cot_insn = 66, ///< instruction in expression, internal representation only
3144  cot_sizeof = 67, ///< sizeof(x)
3145  cot_helper = 68, ///< arbitrary name
3146  cot_type = 69, ///< arbitrary type
3147  cot_last = cot_type,
3148  cit_empty = 70, ///< instruction types start here
3149  cit_block = 71, ///< block-statement: { ... }
3150  cit_expr = 72, ///< expression-statement: expr;
3151  cit_if = 73, ///< if-statement
3152  cit_for = 74, ///< for-statement
3153  cit_while = 75, ///< while-statement
3154  cit_do = 76, ///< do-statement
3155  cit_switch = 77, ///< switch-statement
3156  cit_break = 78, ///< break-statement
3157  cit_continue = 79, ///< continue-statement
3158  cit_return = 80, ///< return-statement
3159  cit_goto = 81, ///< goto-statement
3160  cit_asm = 82, ///< asm-statement
3161  cit_end
3162 };
3163 
3164 /// \defgroup fixtype_t C operator writing styles
3165 /// Used in operator_info_t::fixtype
3166 //@{
3167 const uchar
3168  FX_NONE = 0, ///< not applicable
3169  FX_INFIX = 1, ///< infix: a + b
3170  FX_PREFIX = 2, ///< prefix: *a
3171  FX_POSTFIX = 3, ///< postfix: a++
3172  FX_TERNARY = 4; ///< ternary: a ? b : c
3173 //@}
3174 
3175 /// \defgroup opattrs_t C operator attributes
3176 /// Used in operator_info_t::flags
3177 //@{
3178 const uchar
3179  COI_RL = 0x00, ///< right to left
3180  COI_LR = 0x01, ///< left to right
3181  COI_INT = 0x02, ///< requires integer operands
3182  COI_FP = 0x04, ///< requires floating point operands
3183  COI_SH = 0x08, ///< is shift operation?
3184  COI_SGN = 0x10, ///< sign sensitive?
3185  COI_SBN = 0x20; ///< is simple binary?
3186 //@}
3187 
3188 /// Information about C operator
3190 {
3191  DEFINE_MEMORY_ALLOCATION_FUNCS()
3192  const char *text; ///< Text representation
3193  uchar precedence; ///< Operator precedence (lower: more priority)
3194  uchar valency; ///< Number of operator arguments
3195  uchar fixtype; ///< \ref fixtype_t
3196  uchar flags; ///< \ref opattrs_t
3197 };
3198 
3199 
3200 
3201 /// Negate a comparison operator. For example, \ref cot_sge becomes \ref cot_slt
3203 /// Get operator sign. Meaningful for sign-dependent operators, like \ref cot_sdiv
3204 type_sign_t hexapi get_op_signness(ctype_t op);
3205 /// Convert plain operator into assignment operator. For example, \ref cot_add returns \ref cot_asgadd
3207 /// Convert assignment operator into plain operator. For example, \ref cot_asgadd returns \ref cot_add
3208 /// \return cot_empty is the input operator is not an assignment operator.
3210 /// Does operator use the 'x' field of cexpr_t?
3211 inline bool op_uses_x(ctype_t op) { return op >= cot_comma && op <= cot_memptr; }
3212 /// Does operator use the 'y' field of cexpr_t?
3213 inline bool op_uses_y(ctype_t op) { return (op >= cot_comma && op <= cot_fdiv) || op == cot_idx; }
3214 /// Does operator use the 'z' field of cexpr_t?
3215 inline bool op_uses_z(ctype_t op) { return op == cot_tern; }
3216 /// Is binary operator?
3217 inline bool is_binary(ctype_t op) { return op_uses_y(op) && op != cot_tern; } // x,y
3218 /// Is unary operator?
3219 inline bool is_unary(ctype_t op) { return op >= cot_fneg && op <= cot_predec; }
3220 /// Is comparison operator?
3221 inline bool is_relational(ctype_t op) { return op >= cot_eq && op <= cot_ult; }
3222 /// Is assignment operator?
3223 inline bool is_assignment(ctype_t op) { return op >= cot_asg && op <= cot_asgumod; }
3224 // Can operate on UDTs?
3225 inline bool accepts_udts(ctype_t op) { return op == cot_asg || op == cot_comma || op > cot_last; }
3226 /// Is pre/post increment/decrement operator?
3227 inline bool is_prepost(ctype_t op) { return op >= cot_postinc && op <= cot_predec; }
3228 /// Is commutative operator?
3229 inline bool is_commutative(ctype_t op)
3230 {
3231  return op == cot_bor
3232  || op == cot_xor
3233  || op == cot_band
3234  || op == cot_add
3235  || op == cot_mul
3236  || op == cot_fadd
3237  || op == cot_fmul
3238  || op == cot_ne
3239  || op == cot_eq;
3240 }
3241 /// Is additive operator?
3242 inline bool is_additive(ctype_t op)
3243 {
3244  return op == cot_add
3245  || op == cot_sub
3246  || op == cot_fadd
3247  || op == cot_fsub;
3248 }
3249 /// Is multiplicative operator?
3251 {
3252  return op == cot_mul
3253  || op == cot_sdiv
3254  || op == cot_udiv
3255  || op == cot_fmul
3256  || op == cot_fdiv;
3257 }
3258 
3259 /// Is bit related operator?
3260 inline bool is_bitop(ctype_t op)
3261 {
3262  return op == cot_bor
3263  || op == cot_xor
3264  || op == cot_band
3265  || op == cot_bnot;
3266 }
3267 
3268 /// Is logical operator?
3269 inline bool is_logical(ctype_t op)
3270 {
3271  return op == cot_lor
3272  || op == cot_land
3273  || op == cot_lnot;
3274 }
3275 
3276 /// Is loop statement code?
3277 inline bool is_loop(ctype_t op)
3278 {
3279  return op == cit_for
3280  || op == cit_while
3281  || op == cit_do;
3282 }
3283 /// Does a break statement influence the specified statement code?
3285 {
3286  return is_loop(op) || op == cit_switch;
3287 }
3288 
3289 /// Is Lvalue operator?
3290 inline bool is_lvalue(ctype_t op)
3291 {
3292  return op == cot_ptr // *x
3293  || op == cot_idx // x[y]
3294  || op == cot_memref // x.m
3295  || op == cot_memptr // x->m
3296  || op == cot_obj // v
3297  || op == cot_var; // l
3298 }
3299 
3300 /// Is the operator allowed on small struni (structure/union)?
3302 {
3303  return op == cit_return
3304  || op == cot_asg
3305  || op == cot_eq
3306  || op == cot_ne
3307  || op == cot_comma
3308  || op == cot_tern
3309  || (op > cot_last && op < cit_end); // any insn
3310 }
3311 
3312 /// An immediate number
3314 {
3315  uint64 _value; ///< its value
3316  number_format_t nf; ///< how to represent it
3317  cnumber_t(int _opnum=0) : _value(0), nf(_opnum) {}
3318 
3319  /// Get text representation
3320  /// \param vout output buffer
3321  /// \param type number type
3322  /// \param parent parent expression
3323  /// \param nice_stroff out: printed as stroff expression
3324  void hexapi print(
3325  qstring *vout,
3326  const tinfo_t &type,
3327  const citem_t *parent=NULL,
3328  bool *nice_stroff=NULL) const;
3329 
3330 
3331  /// Get value.
3332  /// This function will properly extend the number sign to 64bits
3333  /// depending on the type sign.
3334  uint64 hexapi value(const tinfo_t &type) const;
3335 
3336  /// Assign new value
3337  /// \param v new value
3338  /// \param nbytes size of the new value in bytes
3339  /// \param sign sign of the value
3340  void hexapi assign(uint64 v, int nbytes, type_sign_t sign);
3341 
3343 };
3344 
3345 /// Reference to a local variable
3347 {
3348  mbl_array_t *mba; ///< pointer to the underlying micro array
3349  int idx; ///< index into lvars_t
3350  DEFINE_MEMORY_ALLOCATION_FUNCS()
3352 };
3353 
3354 /// Vector of parents
3355 typedef qvector<citem_t *> ctree_items_t;
3356 typedef ctree_items_t parents_t;
3357 
3358 /// A generic helper class that is used for ctree traversal
3360 {
3361  DEFINE_MEMORY_ALLOCATION_FUNCS()
3362  int cv_flags; ///< \ref CV_
3363 /// \defgroup CV_ Ctree visitor property bits
3364 /// Used in ctree_visitor_t::cv_flags
3365 //@{
3366 #define CV_FAST 0x0000 ///< do not maintain parent information
3367 #define CV_PRUNE 0x0001 ///< this bit is set by visit...() to prune the walk
3368 #define CV_PARENTS 0x0002 ///< maintain parent information
3369 #define CV_POST 0x0004 ///< call the leave...() functions
3370 #define CV_RESTART 0x0008 ///< restart enumeration at the top expr (apply_to_exprs)
3371 #define CV_INSNS 0x0010 ///< visit only statements, prune all expressions
3372  ///< do not use before the final ctree maturity because
3373  ///< expressions may contain statements at intermediate
3374  ///< stages (see cot_insn). Otherwise you risk missing
3375  ///< statements embedded into expressions.
3376 //@}
3377  /// Should the parent information by maintained?
3378  bool maintain_parents(void) const { return (cv_flags & CV_PARENTS) != 0; }
3379  /// Should the traversal skip the children of the current item?
3380  bool must_prune(void) const { return (cv_flags & CV_PRUNE) != 0; }
3381  /// Should the traversal restart?
3382  bool must_restart(void) const { return (cv_flags & CV_RESTART) != 0; }
3383  /// Should the leave...() functions be called?
3384  bool is_postorder(void) const { return (cv_flags & CV_POST) != 0; }
3385  /// Should all expressions be automatically pruned?
3386  bool only_insns(void) const { return (cv_flags & CV_INSNS) != 0; }
3387  /// Prune children.
3388  /// This function may be called by a visitor() to skip all children of the current item.
3389  void prune_now(void) { cv_flags |= CV_PRUNE; }
3390  /// Do not prune children. This is an internal function, no need to call it.
3391  void clr_prune(void) { cv_flags &= ~CV_PRUNE; }
3392  /// Restart the travesal. Meaningful only in apply_to_exprs()
3393  void set_restart(void) { cv_flags |= CV_RESTART; }
3394  /// Do not restart. This is an internal function, no need to call it.
3395  void clr_restart(void) { cv_flags &= ~CV_RESTART; }
3396 
3397  parents_t parents; ///< Vector of parents of the current item
3398 
3399  /// Constructor.
3400  /// This constructor can be used with CV_FAST, CV_PARENTS
3401  /// combined with CV_POST, CV_ONLYINS
3402  ctree_visitor_t(int _flags) : cv_flags(_flags) {}
3403 
3404  DEFINE_VIRTUAL_DTOR(ctree_visitor_t);
3405  /// Traverse ctree.
3406  /// The traversal will start at the specified item and continue until
3407  /// of one the visit_...() functions return a non-zero value.
3408  /// \param item root of the ctree to traverse
3409  /// \param parent parent of the specified item. can be specified as NULL.
3410  /// \return 0 or a non-zero value returned by a visit_...() function
3411  int hexapi apply_to(citem_t *item, citem_t *parent);
3412 
3413  /// Traverse only expressions.
3414  /// The traversal will start at the specified item and continue until
3415  /// of one the visit_...() functions return a non-zero value.
3416  /// \param item root of the ctree to traverse
3417  /// \param parent parent of the specified item. can be specified as NULL.
3418  /// \return 0 or a non-zero value returned by a visit_...() function
3419  int hexapi apply_to_exprs(citem_t *item, citem_t *parent);
3420 
3421  /// Get parent of the current item as an expression
3422  cexpr_t *parent_expr(void) { return (cexpr_t *)parents.back(); }
3423  /// Get parent of the current item as a statement
3424  cinsn_t *parent_insn(void) { return (cinsn_t *)parents.back(); }
3425 
3426  // the following functions are redefined by the derived class
3427  // in order to perform the desired actions during the traversal
3428 
3429  /// Visit a statement.
3430  /// This is a visitor function which should be overridden by a derived
3431  /// class to do some useful work.
3432  /// This visitor performs pre-order traserval, i.e. an item is visited before
3433  /// its children.
3434  /// \return 0 to continue the traversal, nonzero to stop.
3435  virtual int idaapi visit_insn(cinsn_t *) { return 0; }
3436 
3437  /// Visit an expression.
3438  /// This is a visitor function which should be overridden by a derived
3439  /// class to do some useful work.
3440  /// This visitor performs pre-order traserval, i.e. an item is visited before
3441  /// its children.
3442  /// \return 0 to continue the traversal, nonzero to stop.
3443  virtual int idaapi visit_expr(cexpr_t *) { return 0; }
3444 
3445  /// Visit a statement after having visited its children
3446  /// This is a visitor function which should be overridden by a derived
3447  /// class to do some useful work.
3448  /// This visitor performs post-order traserval, i.e. an item is visited after
3449  /// its children.
3450  /// \return 0 to continue the traversal, nonzero to stop.
3451  virtual int idaapi leave_insn(cinsn_t *) { return 0; }
3452 
3453  /// Visit an expression after having visited its children
3454  /// This is a visitor function which should be overridden by a derived
3455  /// class to do some useful work.
3456  /// This visitor performs post-order traserval, i.e. an item is visited after
3457  /// its children.
3458  /// \return 0 to continue the traversal, nonzero to stop.
3459  virtual int idaapi leave_expr(cexpr_t *) { return 0; }
3460 };
3461 
3462 /// A helper ctree traversal class that maintains parent information
3464 {
3465  ctree_parentee_t(bool post=false)
3466  : ctree_visitor_t((post ? CV_POST : 0)|CV_PARENTS) {}
3467 
3468  /// Recalculate types of parent node.
3469  /// If a node type has been changed, the visitor must recalculate
3470  /// all parent types, otherwise the ctree becomes inconsistent.
3471  /// If during this recalculation a parent node is added/deleted,
3472  /// this function returns true. In this case it is recommended
3473  /// to restart the traversal because the information about parent nodes
3474  /// is stale.
3475  /// \return false-ok to continue the traversal, true-must stop.
3476  bool hexapi recalc_parent_types(void);
3477 };
3478 
3479 /// Class to traverse the whole function
3481 {
3482  cfunc_t *func; ///< Pointer to current function
3483  cfunc_parentee_t(cfunc_t *f, bool post=false)
3484  : ctree_parentee_t(post), func(f) {}
3485 
3486  /// Calculate rvalue type.
3487  /// This function tries to determine the type of the specified item
3488  /// based on its context. For example, if the current expression is the
3489  /// right side of an assignment operator, the type
3490  /// of its left side will be returned. This function can be used to determine the 'best'
3491  /// type of the specified expression.
3492  /// \param[in] e expression to determine the desired type
3493  /// \param[out] target 'best' type of the expression will be returned here
3494  /// \return false if failed
3495  bool hexapi calc_rvalue_type(tinfo_t *target, const cexpr_t *e);
3496 };
3497 
3498 /// Ctree maturity level. The level will increase
3499 /// as we switch from one phase of ctree generation to the next one
3501 {
3502  CMAT_ZERO, ///< does not exist
3503  CMAT_BUILT, ///< just generated
3504  CMAT_TRANS1, ///< applied first wave of transformations
3505  CMAT_NICE, ///< nicefied expressions
3506  CMAT_TRANS2, ///< applied second wave of transformations
3507  CMAT_CPA, ///< corrected pointer arithmetic
3508  CMAT_TRANS3, ///< applied third wave of transformations
3509  CMAT_CASTED, ///< added necessary casts
3510  CMAT_FINAL, ///< ready-to-use
3511 };
3512 
3513 //--------------------------------------------------------------------------
3514 /// Comment item preciser.
3515 /// Item preciser is used to assign comments to ctree items
3516 /// A ctree item may have several comments attached to it. For example,
3517 /// an if-statement may have the following comments: <pre>
3518 /// if ( ... ) // cmt1
3519 /// { // cmt2
3520 /// } // cmt3
3521 /// else // cmt4
3522 /// { -- usually the else block has a separate ea
3523 /// } </pre>
3524 /// The first 4 comments will have the same ea. In order to denote the exact
3525 /// line for the comment, we store the item_preciser along with ea.
3527 {
3528  // inner comments (comments within an expression)
3529  ITP_EMPTY, ///< nothing
3530  ITP_ARG1, ///< , (64 entries are reserved for 64 call arguments)
3531  ITP_ARG64 = ITP_ARG1+63, // ,
3532  ITP_BRACE1, // (
3533  ITP_INNER_LAST = ITP_BRACE1,
3534  // outer comments
3535  ITP_ASM, ///< __asm-line
3536  ITP_ELSE, ///< else-line
3537  ITP_DO, ///< do-line
3538  ITP_SEMI, ///< semicolon
3539  ITP_CURLY1, ///< {
3540  ITP_CURLY2, ///< }
3541  ITP_BRACE2, ///< )
3542  ITP_COLON, ///< : (label)
3543  ITP_BLOCK1, ///< opening block comment. this comment is printed before the item
3544  ///< (other comments are indented and printed after the item)
3545  ITP_BLOCK2, ///< closing block comment.
3546  ITP_CASE = 0x40000000, ///< bit for switch cases
3547  ITP_SIGN = 0x20000000, ///< if this bit is set too, then we have a negative case value
3548  // this is a hack, we better introduce special indexes for case values
3549  // case value >= ITP_CASE will be processed incorrectly
3550 };
3551 /// Ctree location. Used to denote comment locations.
3553 {
3554  DEFINE_MEMORY_ALLOCATION_FUNCS()
3555  ea_t ea;
3556  item_preciser_t itp;
3557  bool operator < (const treeloc_t &r) const
3558  {
3559  return ea < r.ea
3560  || (ea == r.ea && itp < r.itp);
3561  }
3562  bool operator == (const treeloc_t &r) const
3563  {
3564  return ea == r.ea && itp == r.itp;
3565  }
3566 };
3567 
3568 /// Comment retrieval type.
3569 /// Ctree remembers what comments have already been retrieved.
3570 /// This is done because our mechanism of item_precisers is still
3571 /// not perfect and in theory some listing lines can not be told
3572 /// apart. To avoid comment duplication, we remember if a comment
3573 /// has already been used or not.
3575 {
3576  RETRIEVE_ONCE, ///< Retrieve comment if it has not been used yet
3577  RETRIEVE_ALWAYS, ///< Retrieve comment even if it has been used
3578 };
3579 
3580 /// Ctree item comment.
3581 /// For each comment we remember its body and the fact of its retrieval
3582 struct citem_cmt_t : public qstring
3583 {
3584  mutable bool used; ///< the comment has been retrieved?
3585  citem_cmt_t(void) : used(false) {}
3586  citem_cmt_t(const char *s) : qstring(s), used(false) {}
3587 };
3588 
3589 // Comments are attached to tree locations:
3590 typedef std::map<treeloc_t, citem_cmt_t> user_cmts_t;
3591 
3592 /// Generic ctree element locator. It can be used for instructions and some expression
3593 /// types. However, we need more precise locators for other items (e.g. for numbers)
3595 {
3596  ea_t ea; ///< citem address
3597  ctype_t op; ///< citem operation
3598 private:
3599  //forbid the default constructor
3600  citem_locator_t(void) {}
3601 public:
3602  citem_locator_t(ea_t _ea, ctype_t _op) : ea(_ea), op(_op) {}
3603  citem_locator_t(const citem_t *i);
3605  DEFINE_MEMORY_ALLOCATION_FUNCS()
3606 };
3607 
3608 // citem_t::iflags are attached to (ea,op) pairs
3609 typedef std::map<citem_locator_t, int32> user_iflags_t;
3610 
3611 // union field selections
3612 // they are represented as a vector of integers. each integer represents the
3613 // number of union field (0 means the first union field, etc)
3614 // the size of this vector is equal to the number of nested unions in the selection.
3615 typedef std::map<ea_t, intvec_t> user_unions_t;
3616 
3617 //--------------------------------------------------------------------------
3618 /// Basic ctree element. This is an abstract class (but we don't use virtual
3619 /// functions in ctree, so the compiler will not disallow you to create citem_t
3620 /// instances). However, elements of pure citem_t type must never be created.
3621 /// Two classes, cexpr_t and cinsn_t are derived from it.
3622 struct citem_t
3623 {
3624  ea_t ea; ///< address that corresponds to the item
3625  ctype_t op; ///< element type
3626  int label_num; ///< label number. -1 means no label. items of expression
3627  ///< types (cot_...) should not have labels at the final maturity
3628  ///< level, but at the intermediate levels any ctree element
3629  ///< may have a label. Labels must be unique. Usually
3630  ///< they correspond to the basic block numbers.
3631  mutable int index; ///< item index. meaningful only after print_func()
3632  citem_t(void) : ea(BADADDR), op(cot_empty), label_num(-1), index(-1) {}
3633  citem_t(ctype_t o) : ea(BADADDR), op(o), label_num(-1), index(-1) {}
3634  /// Swap two citem_t
3635  void swap(citem_t &r)
3636  {
3637  std::swap(ea, r.ea);
3638  std::swap(op, r.op);
3639  std::swap(label_num, r.label_num);
3640  }
3641  /// Is an expression?
3642  bool is_expr(void) const { return op <= cot_last; }
3643  /// Does the item contain a label?
3644  bool hexapi contains_label(void) const;
3645  /// Find parent of the specified item.
3646  /// \param sitem Item to find the parent of. The search will be performed
3647  /// among the children of the item pointed by \c this.
3648  /// \return NULL if not found
3649  const citem_t *hexapi find_parent_of(const citem_t *sitem) const;
3650  citem_t *find_parent_of(const citem_t *item)
3651  { return CONST_CAST(citem_t*)((CONST_CAST(const citem_t*)(this))->find_parent_of(item)); }
3652  citem_t *hexapi find_closest_addr(ea_t _ea);
3653  void print1(qstring *vout, const cfunc_t *func) const;
3654  ~citem_t(void)
3655  {
3656  remitem(this);
3657  }
3658  DEFINE_MEMORY_ALLOCATION_FUNCS()
3659 };
3660 
3661 /// Ctree element: expression.
3662 /// Depending on the exact expression item type, various fields of this structure are used.
3663 struct cexpr_t : public citem_t
3664 {
3665  union
3666  {
3667  cnumber_t *n; ///< used for \ref cot_num
3668  fnumber_t *fpc; ///< used for \ref cot_fnum
3669  struct
3670  {
3671  union
3672  {
3673  var_ref_t v; ///< used for \ref cot_var
3674  ea_t obj_ea; ///< used for \ref cot_obj
3675  };
3676  int refwidth; ///< how many bytes are accessed? (-1: none)
3677  };
3678  struct
3679  {
3680  cexpr_t *x; ///< the first operand of the expression
3681  union
3682  {
3683  cexpr_t *y; ///< the second operand of the expression
3684  carglist_t *a;///< argument list (used for \ref cot_call)
3685  uint32 m; ///< member offset (used for \ref cot_memptr, \ref cot_memref)
3686  ///< for unions, the member number
3687  };
3688  union
3689  {
3690  cexpr_t *z; ///< the third operand of the expression
3691  int ptrsize; ///< memory access size (used for \ref cot_ptr, \ref cot_memptr)
3692  };
3693  };
3694  cinsn_t *insn; ///< an embedded statement, they are prohibited
3695  ///< at the final maturity stage (\ref CMAT_FINAL)
3696  char *helper; ///< helper name (used for \ref cot_helper)
3697  char *string; ///< string constant (used for \ref cot_str)
3698  };
3699  tinfo_t type; ///< expression type. must be carefully maintained
3700  int exflags; ///< \ref EXFL_
3701 /// \defgroup EXFL_ Expression attributes
3702 /// Used in cexpr_t::exflags
3703 //@{
3704 #define EXFL_CPADONE 0x0001 ///< pointer arithmetic correction done
3705 #define EXFL_LVALUE 0x0002 ///< expression is lvalue even if it doesn't look like it
3706 #define EXFL_FPOP 0x0004 ///< floating point operation
3707 #define EXFL_ALONE 0x0008 ///< standalone helper
3708 #define EXFL_CSTR 0x0010 ///< string literal
3709 #define EXFL_PARTIAL 0x0020 ///< type of the expression is considered partial
3710 #define EXFL_ALL 0x003F ///< all currently defined bits
3711 //@}
3712  /// Pointer arithmetic correction done for this expression?
3713  bool cpadone(void) const { return (exflags & EXFL_CPADONE) != 0; }
3714  bool is_odd_lvalue(void) const { return (exflags & EXFL_LVALUE) != 0; }
3715  bool is_fpop(void) const { return (exflags & EXFL_FPOP) != 0; }
3716  bool is_cstr(void) const { return (exflags & EXFL_CSTR) != 0; }
3717  bool is_type_partial(void) const { return (exflags & EXFL_PARTIAL) != 0; }
3718 
3719 
3720  void set_cpadone(void) { exflags |= EXFL_CPADONE; }
3721  void set_type_partial(void) { exflags |= EXFL_PARTIAL; }
3722 
3723  cexpr_t(void) : x(NULL), y(NULL), z(NULL), exflags(0) {}
3724  cexpr_t(ctype_t cop, cexpr_t *_x) : citem_t(cop), x(_x), y(NULL), z(NULL), exflags(0) {}
3725  cexpr_t(ctype_t cop, cexpr_t *_x, cexpr_t *_y) : citem_t(cop), x(_x), y(_y), z(NULL), exflags(0) {}
3726  cexpr_t(ctype_t cop, cexpr_t *_x, cexpr_t *_y, cexpr_t *_z) : citem_t(cop), x(_x), y(_y), z(_z), exflags(0) {}
3727  cexpr_t(mbl_array_t *mba, const lvar_t &v);
3728  cexpr_t(const cexpr_t &r) : citem_t() { *this = r; }
3729  void swap(cexpr_t &r) { qswap(*this, r); }
3730  cexpr_t &operator=(const cexpr_t &r) { return assign(r); }
3731  cexpr_t &hexapi assign(const cexpr_t &r);
3733  ~cexpr_t(void) { cleanup(); }
3734 
3735  /// Replace the expression.
3736  /// The children of the expression are abandoned (not freed).
3737  /// The expression pointed by 'r' is moved to 'this' expression
3738  /// \param r the source expression. It is deleted after being copied
3739  void hexapi replace_by(cexpr_t *r);
3740 
3741  /// Cleanup the expression.
3742  /// This function properly deletes all children and sets the item type to cot_empty.
3743  void hexapi cleanup(void);
3744 
3745  /// Assign a number to the expression.
3746  /// \param func current function
3747  /// \param value number value
3748  /// \param nbytes size of the number in bytes
3749  /// \param sign number sign
3750  void hexapi put_number(cfunc_t *func, uint64 value, int nbytes, type_sign_t sign=no_sign);
3751 
3752  /// Print expression into one line.
3753  /// \param vout output buffer
3754  /// \param func parent function. This argument is used to find out the referenced variable names.
3755  void hexapi print1(qstring *vout, const cfunc_t *func) const;
3756 
3757  /// Calculate the type of the expression.
3758  /// Use this function to calculate the expression type when a new expression is built
3759  /// \param recursive if true, types of all children expression will be calculated
3760  /// before calculating our type
3761  void hexapi calc_type(bool recursive);
3762 
3763  /// Compare two expressions.
3764  /// This function tries to compare two expressions in an 'intelligent' manner.
3765  /// For example, it knows about commutitive operators and can ignore useless casts.
3766  /// \param r the expression to compare against the current expression
3767  /// \return true expressions can be considered equal
3768  bool hexapi equal_effect(const cexpr_t &r) const;
3769 
3770  /// Verify if the specified item is our parent.
3771  /// \param parent possible parent item
3772  /// \return true if the specified item is our parent
3773  bool hexapi is_child_of(const citem_t *parent) const;
3774 
3775  /// Check if the expression contains the specified operator.
3776  /// \param needed_op operator code to search for
3777  /// \param times how many times the operator code should be present
3778  /// \return true if the expression has at least TIMES children with NEEDED_OP
3779  bool hexapi contains_operator(ctype_t needed_op, int times=1) const;
3780 
3781  /// Does the expression contain another expression?
3782  bool contains_expr(const cexpr_t *e) const;
3783  /// Does the expression contain a comma operator?
3784  bool contains_comma(int times=1) const { return contains_operator(cot_comma, times); }
3785  /// Does the expression contain an embedded statement operator?
3786  bool contains_insn(int times=1) const { return contains_operator(cot_insn, times); }
3787  /// Does the expression contain an embedded statement operator or a label?
3788  bool contains_insn_or_label(void) const { return contains_insn() || contains_label(); }
3789  /// Does the expression contain a comma operator or an embedded statement operator or a label?
3790  bool contains_comma_or_insn_or_label(int maxcommas=1) const { return contains_comma(maxcommas) || contains_insn_or_label(); }
3791  /// Is nice expression?
3792  /// Nice expressions do not contain comma operators, embedded statements, or labels.
3793  bool is_nice_expr(void) const { return !contains_comma_or_insn_or_label(); }
3794  /// Is nice condition?.
3795  /// Nice condition is a nice expression of the boolean type.
3796  bool is_nice_cond(void) const { return is_nice_expr() && type.is_bool(); }
3797  /// Is call object?
3798  /// \return true if our expression is the call object of the specified parent expression.
3799  bool is_call_object_of(const citem_t *parent) const { return parent != NULL && parent->op == cot_call && ((cexpr_t*)parent)->x == this; }
3800  /// Is call argument?
3801  /// \return true if our expression is a call argument of the specified parent expression.
3802  bool is_call_arg_of(const citem_t *parent) const { return parent != NULL && parent->op == cot_call && ((cexpr_t*)parent)->x != this; }
3803  /// Get expression sign
3804  type_sign_t get_type_sign(void) const { return type.get_sign(); }
3805  /// Is expression unsigned?
3806  bool is_type_unsigned(void) const { return type.is_unsigned(); }
3807  /// Is expression signed?
3808  bool is_type_signed(void) const { return type.is_signed(); }
3809  /// Get max number of bits that can really be used by the expression.
3810  /// For example, x % 16 can yield only 4 non-zero bits
3811  int hexapi get_high_nbit_bound(int pbits, type_sign_t psign, bool *p_maybe_negative=NULL) const;
3812  /// Get min number of bits that are always present in the expression.
3813  /// For example, 16 always uses 5 bits.
3814  int hexapi get_low_nbit_bound(type_sign_t psign, bool *p_maybe_negative=NULL) const;
3815  /// Check if the expression requires an lvalue.
3816  /// \param child The function will check if this child of our expression must be an lvalue.
3817  /// \return true if child must be an lvalue.
3818  bool hexapi requires_lvalue(const cexpr_t *child) const;
3819  /// Check if the expression has side effects.
3820  /// Calls, pre/post inc/dec, and assignments have side effects.
3821  bool hexapi has_side_effects(void) const;
3822  /// Check if the expression if aliasable.
3823  /// Simple registers and non-aliasble stack slots return false.
3824  bool is_aliasable(void) const;
3825  /// Get numeric value of the expression.
3826  /// This function can be called only on cot_num expressions!
3827  uint64 numval(void) const
3828  {
3829  QASSERT(50071, op == cot_num);
3830  return n->value(type);
3831  }
3832  /// Check if the expression is a number with the specified value.
3833  bool is_const_value(uint64 _v) const
3834  {
3835  return op == cot_num && numval() == _v;
3836  }
3837  /// Check if the expression is a negative number.
3838  bool is_negative_const(void) const
3839  {
3840  return op == cot_num && int64(numval()) < 0;
3841  }
3842  /// Check if the expression is a non-zero number.
3843  bool is_non_zero_const(void) const
3844  {
3845  return op == cot_num && numval() != 0;
3846  }
3847  /// Check if the expression is a zero.
3848  bool is_zero_const(void) const { return is_const_value(0); }
3849  /// Does the PARENT need the expression value
3850  bool is_value_used(const citem_t *parent) const;
3851  /// Get expression value.
3852  /// \param out Pointer to the variable where the expression value is returned.
3853  /// \return true if the expression is a number.
3854  bool get_const_value(uint64 *out) const
3855  {
3856  if ( op == cot_num )
3857  {
3858  if ( out != NULL )
3859  *out = numval();
3860  return true;
3861  }
3862  return false;
3863  }
3864  /// May the expression be a pointer?
3865  bool maybe_ptr(void) const
3866  {
3867  uint64 val;
3868  if ( get_const_value(&val)
3869  && (ea_t(val) != val || !is_mapped((ea_t)val)) )
3870  {
3871  return false;
3872  }
3873  return true;
3874  }
3875  /// Find pointer or array child.
3877  {
3878  if ( x->type.is_ptr_or_array() )
3879  return x;
3880  if ( y->type.is_ptr_or_array() )
3881  return y;
3882  return NULL;
3883  }
3884  /// Find the child with the specified operator.
3885  const cexpr_t *find_op(ctype_t _op) const
3886  {
3887  if ( x->op == _op )
3888  return x;
3889  if ( y->op == _op )
3890  return y;
3891  return NULL;
3892  }
3893  cexpr_t *find_op(ctype_t _op)
3894  {
3895  return (cexpr_t *)((const cexpr_t *)this)->find_op(_op);
3896  }
3897 
3898  /// Find the operand with a numeric value
3899  const cexpr_t *find_num_op(void) const { return find_op(cot_num); }
3900  cexpr_t *find_num_op(void) { return find_op(cot_num); }
3901  /// Find the pointer operand.
3902  /// This function returns the pointer operand for binary expressions.
3903  const cexpr_t *find_ptr_or_array(bool remove_eqsize_casts) const;
3904  /// Get the other operand.
3905  /// This function returns the other operand (not the specified one)
3906  /// for binary expressions.
3907  const cexpr_t *theother(const cexpr_t *what) const { return what == x ? y : x; }
3908  cexpr_t *theother(const cexpr_t *what) { return what == x ? y : x; }
3909 
3910  // these are inline functions, see below
3911  bool get_1num_op(cexpr_t **o1, cexpr_t **o2);
3912  bool get_1num_op(const cexpr_t **o1, const cexpr_t **o2) const;
3913 
3914 };
3915 
3916 /// Statement with an expression.
3917 /// This is a base class for various statements with expressions.
3918 struct ceinsn_t
3919 {
3920  DEFINE_MEMORY_ALLOCATION_FUNCS()
3921  cexpr_t expr; ///< Expression of the statement
3922 };
3923 
3924 /// Should curly braces be printed?
3926 {
3927  CALC_CURLY_BRACES, ///< print curly braces if necessary
3928  NO_CURLY_BRACES, ///< don't print curly braces
3929  USE_CURLY_BRACES, ///< print curly braces without any checks
3930 };
3931 
3932 /// If statement
3933 struct cif_t : public ceinsn_t
3934 {
3935  cinsn_t *ithen; ///< Then-branch of the if-statement
3936  cinsn_t *ielse; ///< Else-branch of the if-statement. May be NULL.
3937  cif_t(void) : ithen(NULL), ielse(NULL) {}
3938  cif_t(const cif_t &r) : ceinsn_t(), ithen(NULL), ielse(NULL) { *this = r; }
3939  cif_t &operator=(const cif_t &r) { return assign(r); }
3940  cif_t &hexapi assign(const cif_t &r);
3942  ~cif_t(void) { cleanup(); }
3943  void cleanup(void);
3944 };
3945 
3946 /// Base class for loop statements
3947 struct cloop_t : public ceinsn_t
3948 {
3949  cinsn_t *body;
3950  cloop_t(void) : body(NULL) {}
3951  cloop_t(cinsn_t *b) : body(b) {}
3952  cloop_t(const cloop_t &r) : ceinsn_t(), body(NULL) { *this = r; }
3953  cloop_t &operator=(const cloop_t &r) { return assign(r); }
3954  cloop_t &hexapi assign(const cloop_t &r);
3955  ~cloop_t(void) { cleanup(); }
3956  void cleanup(void);
3957 };
3958 
3959 /// For-loop
3960 struct cfor_t : public cloop_t
3961 {
3962  cexpr_t init; ///< Initialization expression
3963  cexpr_t step; ///< Step expression
3965 };
3966 
3967 /// While-loop
3968 struct cwhile_t : public cloop_t
3969 {
3971 };
3972 
3973 /// Do-loop
3974 struct cdo_t : public cloop_t
3975 {
3977 };
3978 
3979 /// Return statement
3980 struct creturn_t : public ceinsn_t
3981 {
3983 };
3984 
3985 /// Goto statement
3986 struct cgoto_t
3987 {
3988  int label_num; ///< Target label number
3990  DEFINE_MEMORY_ALLOCATION_FUNCS()
3991  void print(const citem_t *parent, int indent, vc_printer_t &vp) const;
3992 };
3993 
3994 /// asm statement
3995 struct casm_t : public eavec_t
3996 {
3997  casm_t(ea_t ea) { push_back(ea); }
3998  casm_t(const casm_t &r) : eavec_t(eavec_t(r)) {}
4000  void print(const citem_t *parent, int indent, vc_printer_t &vp) const;
4001  bool one_insn(void) const { return size() == 1; }
4002  void genasm(qstring *buf, ea_t ea) const;
4003 };
4004 
4005 /// Vector of pointers to statements.
4006 typedef qvector<cinsn_t *> cinsnptrvec_t;
4007 
4008 /// Ctree element: statement.
4009 /// Depending on the exact statement type, various fields of the union are used.
4010 struct cinsn_t : public citem_t
4011 {
4012  union
4013  {
4014  cblock_t *cblock; ///< details of block-statement
4015  cexpr_t *cexpr; ///< details of expression-statement
4016  cif_t *cif; ///< details of if-statement
4017  cfor_t *cfor; ///< details of for-statement
4018  cwhile_t *cwhile; ///< details of while-statement
4019  cdo_t *cdo; ///< details of do-statement
4020  cswitch_t *cswitch; ///< details of switch-statement
4021  creturn_t *creturn; ///< details of return-statement
4022  cgoto_t *cgoto; ///< details of goto-statement
4023  casm_t *casm; ///< details of asm-statement
4024  };
4025 
4026  cinsn_t(void) : citem_t(cit_empty) {}
4027  cinsn_t(const cinsn_t &r) : citem_t(cit_empty) { *this = r; }
4028  void swap(cinsn_t &r) { citem_t::swap(r); std::swap(cblock, r.cblock); }
4029  cinsn_t &operator=(const cinsn_t &r) { return assign(r); }
4030  cinsn_t &hexapi assign(const cinsn_t &r);
4032  ~cinsn_t(void) { cleanup(); }
4033 
4034  /// Replace the statement.
4035  /// The children of the statement are abandoned (not freed).
4036  /// The statement pointed by 'r' is moved to 'this' statement
4037  /// \param r the source statement. It is deleted after being copied
4038  void hexapi replace_by(cinsn_t *r);
4039 
4040  /// Cleanup the statement.
4041  /// This function properly deletes all children and sets the item type to cit_empty.
4042  void hexapi cleanup(void);
4043 
4044  /// Overwrite with zeroes without cleaning memory or deleting children
4045  void zero(void) { op = cit_empty; cblock = NULL; }
4046 
4047  /// Create a new statement.
4048  /// The current statement must be a block. The new statement will be appended to it.
4049  /// \param insn_ea statement address
4050  cinsn_t &hexapi new_insn(ea_t insn_ea);
4051 
4052  /// Create a new if-statement.
4053  /// The current statement must be a block. The new statement will be appended to it.
4054  /// \param cnd if condition. It will be deleted after being copied.
4055  cif_t &hexapi create_if(cexpr_t *cnd);
4056 
4057  /// Print the statement into many lines.
4058  /// \param indent indention (number of spaces) for the statement
4059  /// \param vp printer helper class which will receive the generated text.
4060  /// \param use_curly if the statement is a block, how should curly braces be printed.
4061  void hexapi print(int indent, vc_printer_t &vp, use_curly_t use_curly=CALC_CURLY_BRACES) const;
4062 
4063  /// Print the statement into one line.
4064  /// Currently this function is not available.
4065  /// \param vout output buffer
4066  /// \param func parent function. This argument is used to find out the referenced variable names.
4067  void hexapi print1(qstring *vout, const cfunc_t *func) const;
4068 
4069  /// Check if the statement passes execution to the next statement.
4070  /// \return false if the statement breaks the control flow (like goto, return, etc)
4071  bool hexapi is_ordinary_flow(void) const;
4072 
4073  /// Check if the statement contains a statement of the specified type.
4074  /// \param type statement opcode to look for
4075  /// \param times how many times TYPE should be present
4076  /// \return true if the statement has at least TIMES children with opcode == TYPE
4077  bool hexapi contains_insn(ctype_t type, int times=1) const;
4078 
4079  /// Collect free \c break statements.
4080  /// This function finds all free \c break statements within the current statement.
4081  /// A \c break statement is free if it does not have a loop or switch parent that
4082  /// that is also within the current statement.
4083  /// \param breaks pointer to the variable where the vector of all found free
4084  /// \c break statements is returned. This argument can be NULL.
4085  /// \return true if some free \c break statements have been found
4086  bool hexapi collect_free_breaks(cinsnptrvec_t *breaks);
4087 
4088  /// Collect free \c continue statements.
4089  /// This function finds all free \c continue statements within the current statement.
4090  /// A \c continue statement is free if it does not have a loop parent that
4091  /// that is also within the current statement.
4092  /// \param continues pointer to the variable where the vector of all found free
4093  /// \c continue statements is returned. This argument can be NULL.
4094  /// \return true if some free \c continue statements have been found
4095  bool hexapi collect_free_continues(cinsnptrvec_t *continues);
4096 
4097  /// Check if the statement has free \c break statements.
4098  bool contains_free_break(void) const { return CONST_CAST(cinsn_t*)(this)->collect_free_breaks(NULL); }
4099  /// Check if the statement has free \c continue statements.
4100  bool contains_free_continue(void) const { return CONST_CAST(cinsn_t*)(this)->collect_free_continues(NULL); }
4101 
4102 };
4103 
4104 /// Compound statement (curly braces)
4105 struct cblock_t : public qlist<cinsn_t> // we need list to be able to manipulate
4106 { // its elements freely
4108 };
4109 
4110 /// Function argument
4111 struct carg_t : public cexpr_t
4112 {
4113  bool is_vararg; ///< is a vararg (matches ...)
4114  tinfo_t formal_type; ///< formal parameter type (if known)
4115  void consume_cexpr(cexpr_t *e)
4116  {
4117  qswap(*(cexpr_t*)this, *e);
4118  delete e;
4119  }
4120  carg_t(void) : is_vararg(false) {}
4122  {
4123  return cexpr_t::compare(r);
4124  }
4125 };
4126 DECLARE_TYPE_AS_MOVABLE(carg_t);
4127 
4128 /// Function argument list
4129 struct carglist_t : public qvector<carg_t>
4130 {
4131  tinfo_t functype; ///< function object type
4132  int flags; ///< call flags
4133 #define CFL_FINAL 0x0001 ///< call type is final, should not be changed
4134 #define CFL_HELPER 0x0002 ///< created from a decompiler helper function
4135  carglist_t(void) : flags(0) {}
4136  carglist_t(const tinfo_t &ftype, int fl = 0) : functype(ftype), flags(fl) {}
4138  void print(qstring *vout, const cfunc_t *func) const;
4139  int print(int curpos, vc_printer_t &vp) const;
4140 };
4141 
4142 /// Switch case. Usually cinsn_t is a block
4143 struct ccase_t : public cinsn_t
4144 {
4145  qvector<uint64> values; ///< List of case values.
4146  ///< if empty, then 'default' case
4148  void print(const cinsn_t *parent, int indent, vc_printer_t &vp) const;
4149  void set_insn(cinsn_t *i); // deletes 'i'
4150  size_t size(void) const { return values.size(); }
4151  const uint64 &value(int i) const { return values[i]; }
4152 };
4153 DECLARE_TYPE_AS_MOVABLE(ccase_t);
4154 
4155 /// Vector of switch cases
4156 struct ccases_t : public qvector<ccase_t>
4157 {
4159  void print(const cinsn_t *parent, int indent, vc_printer_t &vp) const;
4160  int find_value(uint64 v) const;
4161 };
4162 
4163 /// Switch statement
4164 struct cswitch_t : public ceinsn_t
4165 {
4166  cnumber_t mvnf; ///< Maximal switch value and number format
4167  ccases_t cases; ///< Switch cases: values and instructions
4169 };
4170 
4171 //---------------------------------------------------------------------------
4172 /// Invisible COLOR_ADDR tags in the output text are used to refer to ctree items and variables
4174 {
4175  uval_t value;
4176 #define ANCHOR_INDEX 0x1FFFFFFF
4177 #define ANCHOR_MASK 0xC0000000
4178 #define ANCHOR_CITEM 0x00000000 ///< c-tree item
4179 #define ANCHOR_LVAR 0x40000000 ///< declaration of local variable
4180 #define ANCHOR_ITP 0x80000000 ///< item type preciser
4181 #define ANCHOR_BLKCMT 0x20000000 ///< block comment (for ctree items)
4182  ctree_anchor_t(void) : value(BADADDR) {}
4183  int get_index(void) const { return value & ANCHOR_INDEX; }
4184  item_preciser_t get_itp(void) const { return item_preciser_t(value & ~ANCHOR_ITP); }
4185  bool is_valid_anchor(void) const { return value != BADADDR; }
4186  bool is_citem_anchor(void) const { return (value & ANCHOR_MASK) == ANCHOR_CITEM; }
4187  bool is_lvar_anchor(void) const { return (value & ANCHOR_MASK) == ANCHOR_LVAR; }
4188  bool is_itp_anchor(void) const { return (value & ANCHOR_ITP) != 0; }
4189  bool is_blkcmt_anchor(void) const { return (value & ANCHOR_BLKCMT) != 0; }
4190 };
4191 
4192 /// Type of the cursor item.
4194 {
4195  VDI_NONE, ///< undefined
4196  VDI_EXPR, ///< c-tree item
4197  VDI_LVAR, ///< declaration of local variable
4198  VDI_FUNC, ///< the function itself (the very first line with the function prototype)
4199  VDI_TAIL, ///< cursor is at (beyond) the line end (commentable line)
4200 };
4201 
4202 /// Cursor item.
4203 /// Information about the item under the cursor
4205 {
4206  DEFINE_MEMORY_ALLOCATION_FUNCS()
4207  cursor_item_type_t citype; ///< Item type
4208  union
4209  {
4210  citem_t *it;
4211  cexpr_t *e; ///< VDI_EXPR: Expression
4212  cinsn_t *i; ///< VDI_EXPR: Statement
4213  lvar_t *l; ///< VDI_LVAR: Local variable
4214  cfunc_t *f; ///< VDI_FUNC: Function
4215  treeloc_t loc; ///< VDI_TAIL: Line tail
4216  };
4217 
4218  ctree_item_t(): citype(VDI_NONE) {}
4219 
4220  void verify(const mbl_array_t *mba) const;
4221 
4222  /// Get pointer to structure member.
4223  /// If the current item is a structure field,
4224  /// this function will return pointer to its definition.
4225  /// \return NULL if failed
4226  /// \param[out] p_sptr pointer to the variable where the pointer to the
4227  /// parent structure is returned. This parameter can be NULL.
4228 
4229  member_t *hexapi get_memptr(struc_t **p_sptr=NULL) const;
4230 
4231  /// Get pointer to local variable.
4232  /// If the current item is a local variable,
4233  /// this function will return pointer to its definition.
4234  /// \return NULL if failed
4235 
4236  lvar_t *hexapi get_lvar(void) const;
4237 
4238 
4239  /// Get address of the current item.
4240  /// Each ctree item has an address.
4241  /// \return BADADDR if failed
4242 
4243  ea_t hexapi get_ea(void) const;
4244 
4245 
4246  /// Get label number of the current item.
4247  /// \param[in] gln_flags Combination of \ref GLN_ bits
4248  /// \return -1 if failed or no label
4249 
4250  int hexapi get_label_num(int gln_flags) const;
4251 /// \defgroup GLN_ get_label_num control
4252 //@{
4253 #define GLN_CURRENT 0x01 ///< get label of the current item
4254 #define GLN_GOTO_TARGET 0x02 ///< get goto target
4255 #define GLN_ALL 0x03 ///< get both
4256 //@}
4257 
4258  /// Is the current item is a ctree item?
4259  bool is_citem(void) const { return citype == VDI_EXPR; }
4260 
4261 };
4262 
4263 /// Unused label disposition.
4265 {
4266  FORBID_UNUSED_LABELS = 0, ///< Unused labels cause interr
4267  ALLOW_UNUSED_LABELS = 1, ///< Unused labels are permitted
4268 };
4269 
4270 typedef std::map<int, qstring> user_labels_t;
4271 
4272 /// Logically negate the specified expression.
4273 /// The specified expression will be logically negated.
4274 /// For example, "x == y" is converted into "x != y" by this function.
4275 /// \param e expression to negate. After the call, e must not be used anymore
4276 /// because it can be changed by the function. The function return value
4277 /// must be used to refer to the expression.
4278 /// \return logically negated expression.
4279 
4280 cexpr_t *hexapi lnot(cexpr_t *e);
4281 
4282 
4283 /// Create a new block-statement.
4284 
4285 cinsn_t *hexapi new_block(void);
4286 
4287 
4288 /// Create a helper object.
4289 /// This function creates a helper object.
4290 /// The named function is not required to exist, the decompiler will only print
4291 /// its name in the output. Helper functions are usually used to represent arbitrary
4292 /// function or macro calls in the output.
4293 /// \param standalone false:helper must be called; true:helper can be used in any expression
4294 /// \param type type of the create function object
4295 /// \param format printf-style format string that will be used to create the function name.
4296 /// \param va additional arguments for printf
4297 /// \return the created expression.
4298 
4299 AS_PRINTF(3, 0) cexpr_t *hexapi vcreate_helper(bool standalone, const tinfo_t &type, const char *format, va_list va);
4300 
4301 /// Create a helper object..
4302 AS_PRINTF(3, 4) inline cexpr_t *create_helper(bool standalone, const tinfo_t &type, const char *format, ...)
4303 {
4304  va_list va;
4305  va_start(va, format);
4306  cexpr_t *e = vcreate_helper(standalone, type, format, va);
4307  va_end(va);
4308  return e;
4309 }
4310 
4311 
4312 /// Create a helper call expression.
4313 /// This function creates a new expression: a call of a helper function.
4314 /// \param rettype type of the whole expression.
4315 /// \param args helper arguments. this object will be consumed by the function.
4316 /// if there are no args, this parameter may be specified as NULL.
4317 /// \param format printf-style format string that will be used to create the function name.
4318 /// \param va additional arguments for printf
4319 /// \return the created expression.
4320 
4321 AS_PRINTF(3, 0) cexpr_t *hexapi vcall_helper(const tinfo_t &rettype, carglist_t *args, const char *format, va_list va);
4322 
4323 /// Create a helper call.
4324 AS_PRINTF(3, 4) inline cexpr_t *call_helper(
4325  const tinfo_t &rettype,
4326  carglist_t *args,
4327  const char *format, ...)
4328 {
4329  va_list va;
4330  va_start(va, format);
4331  cexpr_t *e = vcall_helper(rettype, args, format, va);
4332  va_end(va);
4333  return e;
4334 }
4335 
4336 
4337 /// Create a number expression
4338 /// \param n value
4339 /// \param func current function
4340 /// \param ea definition address of the number
4341 /// \param opnum operand number of the number (in the disassembly listing)
4342 /// \param sign number sign
4343 /// \param size size of number in bytes
4344 
4345 cexpr_t *hexapi make_num(uint64 n, cfunc_t *func=NULL, ea_t ea=BADADDR, int opnum=0, type_sign_t sign=no_sign, int size=0);
4346 
4347 
4348 /// Create a reference.
4349 /// This function performs the following conversion: "obj" => "&obj".
4350 /// It can handle casts, annihilate "&*", and process other special cases.
4351 
4353 
4354 
4355 /// Dereference a pointer.
4356 /// This function dereferences a pointer expression.
4357 /// It performs the following conversion: "ptr" => "*ptr"
4358 /// It can handle discrepancies in the pointer type and the access size.
4359 /// \param e expression to deference
4360 /// \param ptrsize access size
4361 /// \param is_flt dereferencing for floating point access?
4362 /// \return dereferenced expression
4363 
4364 cexpr_t *hexapi dereference(cexpr_t *e, int ptrsize, bool is_flt=false);
4365 
4366 
4367 /// Save user defined labels into the database.
4368 /// \param func_ea the entry address of the function
4369 /// \param user_labels collection of user defined labels
4370 
4371 void hexapi save_user_labels(ea_t func_ea, const user_labels_t *user_labels);
4372 
4373 
4374 /// Save user defined comments into the database.
4375 /// \param func_ea the entry address of the function
4376 /// \param user_cmts collection of user defined comments
4377 
4378 void hexapi save_user_cmts(ea_t func_ea, const user_cmts_t *user_cmts);
4379 
4380 /// Save user defined number formats into the database.
4381 /// \param func_ea the entry address of the function
4382 /// \param numforms collection of user defined comments
4383 
4384 void hexapi save_user_numforms(ea_t func_ea, const user_numforms_t *numforms);
4385 
4386 
4387 /// Save user defined citem iflags into the database.
4388 /// \param func_ea the entry address of the function
4389 /// \param iflags collection of user defined citem iflags
4390 
4391 void hexapi save_user_iflags(ea_t func_ea, const user_iflags_t *iflags);
4392 
4393 
4394 /// Save user defined union field selections into the database.
4395 /// \param func_ea the entry address of the function
4396 /// \param unions collection of union field selections
4397 
4398 void hexapi save_user_unions(ea_t func_ea, const user_unions_t *unions);
4399 
4400 
4401 /// Restore user defined labels from the database.
4402 /// \param func_ea the entry address of the function
4403 /// \return collection of user defined labels.
4404 /// The returned object must be deleted by the caller using delete_user_labels()
4405 
4407 
4408 
4409 /// Restore user defined comments from the database.
4410 /// \param func_ea the entry address of the function
4411 /// \return collection of user defined comments.
4412 /// The returned object must be deleted by the caller using delete_user_cmts()
4413 
4414 user_cmts_t *hexapi restore_user_cmts(ea_t func_ea);
4415 
4416 
4417 /// Restore user defined number formats from the database.
4418 /// \param func_ea the entry address of the function
4419 /// \return collection of user defined number formats.
4420 /// The returned object must be deleted by the caller using delete_user_numforms()
4421 
4423 
4424 
4425 /// Restore user defined citem iflags from the database.
4426 /// \param func_ea the entry address of the function
4427 /// \return collection of user defined iflags.
4428 /// The returned object must be deleted by the caller using delete_user_iflags()
4429 
4430 user_iflags_t *hexapi restore_user_iflags(ea_t func_ea);
4431 
4432 
4433 /// Restore user defined union field selections from the database.
4434 /// \param func_ea the entry address of the function
4435 /// \return collection of union field selections
4436 /// The returned object must be deleted by the caller using delete_user_unions()
4437 
4438 user_unions_t *hexapi restore_user_unions(ea_t func_ea);
4439 
4440 
4441 typedef std::map<ea_t, cinsnptrvec_t> eamap_t;
4442 // map of instruction boundaries. may contain INS_EPILOG for the epilog instructions
4443 typedef std::map<cinsn_t *, rangeset_t> boundaries_t;
4444 #define INS_EPILOG ((cinsn_t *)1)
4445 // Tags to find this location quickly: #cfunc_t #func_t
4446 //-------------------------------------------------------------------------
4447 /// Decompiled function. Decompilation result is kept here.
4448 struct cfunc_t
4449 {
4450  ea_t entry_ea; ///< function entry address
4451  mbl_array_t *mba; ///< underlying microcode
4452  cinsn_t body; ///< function body, must be a block
4453  intvec_t &argidx; ///< list of arguments (indexes into vars)
4454  ctree_maturity_t maturity; ///< maturity level
4455  // The following maps must be accessed using helper functions.
4456  // Example: for user_labels_t, see functions starting with "user_labels_".
4457  user_labels_t *user_labels;///< user-defined labels.
4458  user_cmts_t *user_cmts; ///< user-defined comments.
4459  user_numforms_t *numforms; ///< user-defined number formats.
4460  user_iflags_t *user_iflags;///< user-defined item flags \ref CIT_
4461  user_unions_t *user_unions;///< user-defined union field selections.
4462 /// \defgroup CIT_ ctree item iflags bits
4463 //@{
4464 #define CIT_COLLAPSED 0x0001 ///< display element in collapsed form
4465 //@}
4466  int refcnt; ///< reference count to this object. use cfuncptr_t
4467  int statebits; ///< current cfunc_t state. see \ref CFS_
4468 /// \defgroup CFS_ cfunc state bits
4469 #define CFS_BOUNDS 0x0001 ///< 'eamap' and 'boundaries' are ready
4470 #define CFS_TEXT 0x0002 ///< 'sv' is ready (and hdrlines)
4471 #define CFS_LVARS_HIDDEN 0x0004 ///< local variable definitions are collapsed
4472  eamap_t *eamap; ///< ea->insn map. use \ref get_eamap
4473  boundaries_t *boundaries; ///< map of instruction boundaries. use \ref get_boundaries
4474  strvec_t sv; ///< decompilation output: function text. use \ref get_pseudocode
4475  int hdrlines; ///< number of lines in the declaration area
4476  mutable ctree_items_t treeitems; ///< vector of ctree items
4477 
4478 public:
4479  cfunc_t(mbl_array_t *mba);
4480  ~cfunc_t(void) { cleanup(); }
4481  void release(void) { delete this; }
4482  DEFINE_MEMORY_ALLOCATION_FUNCS()
4483 
4484  /// Generate the function body.
4485  /// This function (re)generates the function body from the underlying microcode.
4486  void hexapi build_c_tree(void);
4487 
4488  /// Verify the ctree.
4489  /// This function verifies the ctree. If the ctree is malformed, an internal error
4490  /// is generated. Use it to verify the ctree after your modifications.
4491  /// \param aul Are unused labels acceptable?
4492  /// \param even_without_debugger if false and there is no debugger, the verification will be skipped
4493  void hexapi verify(allow_unused_labels_t aul, bool even_without_debugger) const;
4494 
4495  /// Print function prototype.
4496  /// \param vout output buffer
4497  void hexapi print_dcl(qstring *vout) const;
4498 
4499  /// Print function text.
4500  /// \param vp printer helper class to receive the generated text.
4501  void hexapi print_func(vc_printer_t &vp) const;
4502 
4503  /// Get the function type.
4504  /// \param type variable where the function type is returned
4505  /// \return false if failure
4506  bool hexapi get_func_type(tinfo_t *type) const;
4507 
4508  /// Get vector of local variables.
4509  /// \return pointer to the vector of local variables. If you modify this vector,
4510  /// the ctree must be regenerated in order to have correct cast operators.
4511  /// Use build_c_tree() for that.
4512  /// Removing lvars should be done carefully: all references in ctree
4513  /// and microcode must be corrected after that.
4514  lvars_t *hexapi get_lvars(void);
4515 
4516  /// Get stack offset delta.
4517  /// The local variable stack offsets retrieved by v.location.stkoff()
4518  /// should be adjusted before being used as stack frame offsets in IDA.
4519  /// \return the delta to apply.
4520  /// example: ida_stkoff = v.location.stkoff() - f->get_stkoff_delta()
4521  sval_t hexapi get_stkoff_delta(void);
4522 
4523  /// Find the label.
4524  /// \return pointer to the ctree item with the specified label number.
4525  citem_t *hexapi find_label(int label);
4526 
4527  /// Remove unused labels.
4528  /// This function check what labels are really used by the function and
4529  /// removes the unused ones.
4530  void hexapi remove_unused_labels(void);
4531 
4532  /// Retrieve a user defined comment.
4533  /// \param loc ctree location
4534  /// \param rt should already retrieved comments retrieved again?
4535  /// \return pointer to the comment string or NULL
4536  const char *hexapi get_user_cmt(const treeloc_t &loc, cmt_retrieval_type_t rt) const;
4537 
4538  /// Set a user defined comment.
4539  /// This function stores the specified comment in the cfunc_t structure.
4540  /// The save_user_cmts() function must be called after it.
4541  /// \param loc ctree location
4542  /// \param cmt new comment. if empty or NULL, then an existing comment is deleted.
4543  void hexapi set_user_cmt(const treeloc_t &loc, const char *cmt);
4544 
4545  /// Retrieve citem iflags.
4546  /// \param loc citem locator
4547  /// \return \ref CIT_ or 0
4548  int32 hexapi get_user_iflags(const citem_locator_t &loc) const;
4549 
4550  /// Set citem iflags.
4551  /// \param loc citem locator
4552  /// \param iflags new iflags
4553  void hexapi set_user_iflags(const citem_locator_t &loc, int32 iflags);
4554 
4555  /// Check if there are orphan comments.
4556  bool hexapi has_orphan_cmts(void) const;
4557 
4558  /// Delete all orphan comments.
4559  /// The save_user_cmts() function must be called after this call.
4560  int hexapi del_orphan_cmts(void);
4561 
4562  /// Retrieve a user defined union field selection.
4563  /// \param ea address
4564  /// \param path out: path describing the union selection.
4565  /// \return pointer to the path or NULL
4566  bool hexapi get_user_union_selection(ea_t ea, intvec_t *path);
4567 
4568  /// Set a union field selection.
4569  /// The save_user_unions() function must be called after calling this function.
4570  /// \param ea address
4571  /// \param path in: path describing the union selection.
4572  void hexapi set_user_union_selection(ea_t ea, const intvec_t &path);
4573 
4574  /// Save user-defined labels into the database
4575  void save_user_labels(void) const { ::save_user_labels(entry_ea, user_labels); }
4576  /// Save user-defined comments into the database
4577  void save_user_cmts(void) const { ::save_user_cmts(entry_ea, user_cmts); }
4578  /// Save user-defined number formats into the database
4579  void save_user_numforms(void) const { ::save_user_numforms(entry_ea, numforms); }
4580  /// Save user-defined iflags into the database
4581  void save_user_iflags(void) const { ::save_user_iflags(entry_ea, user_iflags); }
4582  /// Save user-defined union field selections into the database
4583  void save_user_unions(void) const { ::save_user_unions(entry_ea, user_unions); }
4584 
4585  /// Get ctree item for the specified cursor position.
4586  /// \return false if failed to get the current item
4587  /// \param line line of decompilation text (element of \ref sv)
4588  /// \param x x cursor coordinate in the line
4589  /// \param is_ctree_line does the line belong to statement area? (if not, it is assumed to belong to the declaration area)
4590  /// \param phead ptr to the first item on the line (used to attach block comments). May be NULL
4591  /// \param pitem ptr to the current item. May be NULL
4592  /// \param ptail ptr to the last item on the line (used to attach indented comments). May be NULL
4593  /// \sa vdui_t::get_current_item()
4594  bool hexapi get_line_item(const char *line, int x, bool is_ctree_line, ctree_item_t *phead, ctree_item_t *pitem, ctree_item_t *ptail);
4595 
4596  /// Get information about decompilation warnings.
4597  /// \return reference to the vector of warnings
4598  hexwarns_t &hexapi get_warnings(void);
4599 
4600  /// Get pointer to ea->insn map.
4601  /// This function initializes eamap if not done yet.
4602  eamap_t &hexapi get_eamap(void);
4603 
4604  /// Get pointer to map of instruction boundaries.
4605  /// This function initializes the boundary map if not done yet.
4606  boundaries_t &hexapi get_boundaries(void);
4607 
4608  /// Get pointer to decompilation output: the pseudocode.
4609  /// This function generates pseudocode if not done yet.
4610  const strvec_t &hexapi get_pseudocode(void);
4611 
4612  bool hexapi gather_derefs(const ctree_item_t &ci, udt_type_data_t *udm=NULL) const;
4613  bool hexapi find_item_coords(const citem_t *item, int *px, int *py);
4614 private:
4615  /// Cleanup.
4616  /// Properly delete all children and free memory.
4617  void hexapi cleanup(void);
4618  DECLARE_UNCOPYABLE(cfunc_t)
4619 };
4620 typedef qrefcnt_t<cfunc_t> cfuncptr_t;
4621 
4622 /// \defgroup DECOMP_ decompile() flags
4623 //@{
4624 #define DECOMP_NO_WAIT 0x0001 ///< do not display waitbox
4625 #define DECOMP_NO_CACHE 0x0002 ///< do not use decompilation cache
4626 #define DECOMP_NO_FRAME 0x0004 ///< do not use function frame info (only snippet mode)
4627 //@}
4628 
4629 
4630 /// Decompile a snippet or a function.
4631 /// \param mbr what to decompile
4632 /// \param hf extended error information (if failed)
4633 /// \param flags bitwise combination of \ref DECOMP_... bits
4634 /// \return pointer to the decompilation result (a reference counted pointer).
4635 /// NULL if failed.
4636 
4637 cfuncptr_t hexapi decompile(
4638  const mba_ranges_t &mbr,
4639  hexrays_failure_t *hf,
4640  int flags=0);
4641 
4642 
4643 
4644 /// Decompile a function.
4645 /// Multiple decompilations of the same function return the same object.
4646 /// \param pfn pointer to function to decompile
4647 /// \param hf extended error information (if failed)
4648 /// \return pointer to the decompilation result (a reference counted pointer).
4649 /// NULL if failed.
4650 
4651 inline cfuncptr_t decompile_func(func_t *pfn, hexrays_failure_t *hf)
4652 {
4653  mba_ranges_t mbr(pfn);
4654  return decompile(mbr, hf, 0);
4655 }
4656 
4657 
4658 /// Decompile a snippet.
4659 /// \param ranges snippet ranges. ranges[0].start_ea is the entry point
4660 /// \param hf extended error information (if failed)
4661 /// \param flags bitwise combination of \ref DECOMP_... bits
4662 /// \return pointer to the decompilation result (a reference counted pointer).
4663 /// NULL if failed.
4664 
4665 inline cfuncptr_t decompile_snippet(
4666  const rangevec_t &ranges,
4667  hexrays_failure_t *hf,
4668  int flags=0)
4669 {
4670  mba_ranges_t mbr(ranges);
4671  return decompile(mbr, hf, flags);
4672 }
4673 
4674 
4675 /// Generate microcode of an arbitrary code snippet
4676 /// \param mbr snippet ranges
4677 /// \param retlist list of registers the snippet returns
4678 /// \param hf extended error information (if failed)
4679 /// \param flags bitwise combination of \ref DECOMP_... bits
4680 /// \param reqmat required microcode maturity
4681 /// \return pointer to the microcode, NULL if failed.
4682 
4684  const mba_ranges_t &mbr,
4685  hexrays_failure_t *hf,
4686  const mlist_t *retlist=NULL,
4687  int flags=0,
4688  mba_maturity_t reqmat=MMAT_GLBOPT3);
4689 
4690 
4691 
4692 /// Flush the cached decompilation results.
4693 /// Erases a cache entry for the specified function.
4694 /// \param ea function to erase from the cache
4695 /// \param close_views close pseudocode windows that show the function
4696 /// \return if a cache entry existed.
4697 
4698 bool hexapi mark_cfunc_dirty(ea_t ea, bool close_views=false);
4699 
4700 
4701 /// Flush all cached decompilation results.
4702 
4703 void hexapi clear_cached_cfuncs(void);
4704 
4705 
4706 /// Do we have a cached decompilation result for 'ea'?
4707 
4708 bool hexapi has_cached_cfunc(ea_t ea);
4709 
4710 //--------------------------------------------------------------------------
4711 // Now cinsn_t class is defined, define the cleanup functions:
4712 inline void cif_t::cleanup(void) { delete ithen; delete ielse; }
4713 inline void cloop_t::cleanup(void) { delete body; }
4714 
4715 /// Print item into one line.
4716 /// \param vout output buffer
4717 /// \param func parent function. This argument is used to find out the referenced variable names.
4718 /// \return length of the generated text.
4719 
4720 inline void citem_t::print1(qstring *vout, const cfunc_t *func) const
4721 {
4722  if ( is_expr() )
4723  ((cexpr_t*)this)->print1(vout, func);
4724  else
4725  ((cinsn_t*)this)->print1(vout, func);
4726 }
4727 
4728 /// Get pointers to operands. at last one operand should be a number
4729 /// o1 will be pointer to the number
4730 
4731 inline bool cexpr_t::get_1num_op(cexpr_t **o1, cexpr_t **o2)
4732 {
4733  if ( x->op == cot_num )
4734  {
4735  *o1 = x;
4736  *o2 = y;
4737  }
4738  else
4739  {
4740  if ( y->op != cot_num )
4741  return false;
4742  *o1 = y;
4743  *o2 = x;
4744  }
4745  return true;
4746 }
4747 
4748 inline bool cexpr_t::get_1num_op(const cexpr_t **o1, const cexpr_t **o2) const
4749 {
4750  return CONST_CAST(cexpr_t*)(this)->get_1num_op(
4751  CONST_CAST(cexpr_t**)(o1),
4752  CONST_CAST(cexpr_t**)(o2));
4753 }
4754 
4755 inline citem_locator_t::citem_locator_t(const citem_t *i) : ea(i->ea), op(i->op)
4756 {
4757 }
4758 
4759 const char *hexapi get_ctype_name(ctype_t op);
4760 qstring hexapi create_field_name(const tinfo_t &type, uval_t offset=BADADDR);
4761 typedef void *hexdsp_t(int code, ...);
4762 const int64 HEXRAYS_API_MAGIC = 0x00DEC0DE00000002LL;
4763 
4764 /// Decompiler events.
4765 /// Use install_hexrays_callback() to install a handler for decompiler events.
4766 /// When the possible return value is not specified, your callback
4767 /// must return zero.
4769 {
4770  // When a function is decompiled, the following events occur:
4771 
4772  hxe_flowchart, ///< Flowchart has been generated.
4773  ///< qflow_chart_t *fc
4774 
4775  hxe_stkpnts, ///< SP change points have been calculated.
4776  ///< mbl_array_t *mba \n
4777  ///< stkpnts_t *stkpnts \n
4778  ///< return \ref MERR_ code
4779 
4780  hxe_prolog, ///< Prolog analysis has been finished.
4781  ///< mbl_array_t *mba \n
4782  ///< qflow_chart_t *fc \n
4783  ///< bitset_t *reachable_blocks
4784  ///< return \ref MERR_ code
4785 
4786  hxe_microcode, ///< Microcode has been generated.
4787  ///< mbl_array_t *mba
4788  ///< return \ref MERR_ code
4789 
4790  hxe_preoptimized, ///< Microcode has been preoptimized.
4791  ///< mbl_array_t *mba
4792  ///< return \ref MERR_ code
4793 
4794  hxe_locopt, ///< Basic block level optimization has been finished.
4795  ///< mbl_array_t *mba
4796  ///< return \ref MERR_ code
4797 
4798  hxe_prealloc, ///< Local variables: preallocation step begins. \n
4799  ///< mbl_array_t *mba \n
4800  ///< This event may occur several times \n
4801  ///< Should return: 1 if modified microcode \n
4802  ///< Negative values are \ref MERR_ error codes
4803 
4804  hxe_glbopt, ///< Global optimization has been finished.
4805  ///< mbl_array_t *mba
4806  ///< return \ref MERR_ code
4807 
4808  hxe_structural, ///< Structural analysis has been finished.
4809  ///< control_graph_t *ct
4810 
4811  hxe_maturity, ///< Ctree maturity level is being changed.
4812  ///< cfunc_t *cfunc \n
4813  ///< ctree_maturity_t new_maturity
4814 
4815  hxe_interr, ///< Internal error has occurred.
4816  ///< int errcode
4817 
4818  hxe_combine, ///< Trying to combine instructions of basic block.
4819  ///< mblock_t *blk \n
4820  ///< minsn_t *insn \n
4821  ///< Should return: 1 if combined the current instruction
4822  ///< with a preceding one
4823 
4824  hxe_print_func, ///< Printing ctree and generating text.
4825  ///< cfunc_t *cfunc \n
4826  ///< vc_printer_t *vp \n
4827  ///< Returns: 1 if text has been generated by the plugin
4828 
4829  hxe_func_printed, ///< Function text has been generated. Plugins may
4830  ///< modify the text in \ref cfunc_t::sv.
4831  ///< cfunc_t *cfunc
4832 
4833  hxe_resolve_stkaddrs, ///< The optimizer is about to resolve stack addresses.
4834  ///< mbl_array_t *mba
4835 
4836  // User interface related events:
4837 
4839  ///< New pseudocode view has been opened.
4840  ///< vdui_t *vu
4841 
4842  hxe_switch_pseudocode,///< Existing pseudocode view has been reloaded
4843  ///< with a new function. Its text has not been
4844  ///< refreshed yet, only cfunc and mba pointers are ready.\n
4845  ///< vdui_t *vu
4846 
4847  hxe_refresh_pseudocode,///< Existing pseudocode text has been refreshed.
4848  ///< Adding/removing pseudocode lines is forbidden in this event.
4849  ///< This event is obsolete, please use \ref hxe_func_printed.
4850  ///< vdui_t *vu \n
4851  ///< See also hxe_text_ready, which happens earlier
4852 
4853  hxe_close_pseudocode, ///< Pseudocode view is being closed.
4854  ///< vdui_t *vu
4855 
4856  hxe_keyboard, ///< Keyboard has been hit.
4857  ///< vdui_t *vu \n
4858  ///< int key_code (VK_...) \n
4859  ///< int shift_state \n
4860  ///< Should return: 1 if the event has been handled
4861 
4862  hxe_right_click, ///< Mouse right click.
4863  ///< Use hxe_populating_popup instead, in case you
4864  ///< want to add items in the popup menu.
4865  ///< vdui_t *vu
4866 
4867  hxe_double_click, ///< Mouse double click.
4868  ///< vdui_t *vu \n
4869  ///< int shift_state \n
4870  ///< Should return: 1 if the event has been handled
4871 
4872  hxe_curpos, ///< Current cursor position has been changed.
4873  ///< (for example, by left-clicking or using keyboard)\n
4874  ///< vdui_t *vu
4875 
4876  hxe_create_hint, ///< Create a hint for the current item.
4877  ///< vdui_t *vu \n
4878  ///< qstring *result_hint \n
4879  ///< int *implines \n
4880  ///< Possible return values: \n
4881  ///< 0: the event has not been handled \n
4882  ///< 1: hint has been created (should set *implines to nonzero as well)\n
4883  ///< 2: hint has been created but the standard hints must be
4884  ///< appended by the decompiler
4885 
4886  hxe_text_ready, ///< Decompiled text is ready.
4887  ///< vdui_t *vu \n
4888  ///< This event can be used to modify the output text (sv).
4889  ///< The text uses regular color codes (see lines.hpp)
4890  ///< COLOR_ADDR is used to store pointers to ctree elements
4891 
4892  hxe_populating_popup, ///< Populating popup menu. We can add menu items now.
4893  ///< TWidget *widget \n
4894  ///< TPopupMenu *popup_handle \n
4895  ///< vdui_t *vu \n
4896 
4897  lxe_lvar_name_changed,///< Local variable got renamed.
4898  ///< vdui_t *vu \n
4899  ///< lvar_t *v \n
4900  ///< const char *name \n
4901  ///< bool is_user_name \n
4902  ///< Please note that it is possible to read/write
4903  ///< user settings for lvars directly from the idb.
4904 
4905  lxe_lvar_type_changed,///< Local variable type got changed.
4906  ///< vdui_t *vu \n
4907  ///< lvar_t *v \n
4908  ///< const char *tinfo \n
4909  ///< Please note that it is possible to read/write
4910  ///< user settings for lvars directly from the idb.
4911 
4912  lxe_lvar_cmt_changed, ///< Local variable comment got changed.
4913  ///< vdui_t *vu \n
4914  ///< lvar_t *v \n
4915  ///< const char *cmt \n
4916  ///< Please note that it is possible to read/write
4917  ///< user settings for lvars directly from the idb.
4918 
4919  lxe_lvar_mapping_changed, ///< Local variable mapping got changed.
4920  ///< vdui_t *vu \n
4921  ///< lvar_t *from \n
4922  ///< lvar_t *to \n
4923  ///< Please note that it is possible to read/write
4924  ///< user settings for lvars directly from the idb.
4925 };
4926 
4927 /// Handler of decompiler events.
4928 /// \param ud user data. the value specified at the handler installation time
4929 /// is passed here.
4930 /// \param event decompiler event code
4931 /// \param va additional arguments
4932 /// \return as a rule the callback must return 0 unless specified otherwise
4933 /// in the event description.
4934 
4935 typedef ssize_t idaapi hexrays_cb_t(void *ud, hexrays_event_t event, va_list va);
4936 
4937 
4938 /// Install handler for decompiler events.
4939 /// \param callback handler to install
4940 /// \param ud user data. this pointer will be passed to your handler by the decompiler.
4941 /// \return false if failed
4942 
4943 bool hexapi install_hexrays_callback(hexrays_cb_t *callback, void *ud);
4944 
4945 /// Uninstall handler for decompiler events.
4946 /// \param callback handler to uninstall
4947 /// \param ud user data. if NULL, all handler corresponding to \c callback is uninstalled.
4948 /// if not NULL, only the callback instance with the specified \c ud value is uninstalled.
4949 /// \return number of uninstalled handlers.
4950 
4951 int hexapi remove_hexrays_callback(hexrays_cb_t *callback, void *ud);
4952 
4953 
4954 
4955 //---------------------------------------------------------------------------
4956 /// \defgroup vdui User interface definitions
4957 //@{
4958 
4959 /// Type of the input device.
4960 /// How the user command has been invoked
4962 {
4963  USE_KEYBOARD = 0, ///< Keyboard
4964  USE_MOUSE = 1, ///< Mouse
4965 };
4966 //@}
4967 
4968 //---------------------------------------------------------------------------
4969 /// Cursor position in the output text (pseudocode).
4971 {
4972  int lnnum; ///< Line number
4973  int x; ///< x coordinate of the cursor within the window
4974  int y; ///< y coordinate of the cursor within the window
4975  /// Is the cursor in the variable/type declaration area?
4976  /// \param hdrlines Number of lines of the declaration area
4977  bool in_ctree(int hdrlines) const { return lnnum >= hdrlines; }
4978  /// Comparison operators
4980  {
4981  if ( lnnum < r.lnnum ) return -1;
4982  if ( lnnum > r.lnnum ) return 1;
4983  if ( x < r.x ) return -1;
4984  if ( x > r.x ) return 1;
4985  return 0;
4986  }
4987  ctext_position_t(int _lnnum=-1, int _x=0, int _y=0)
4988  : lnnum(_lnnum), x(_x), y(_y) {}
4989 };
4990 
4991 /// Navigation history item.
4992 /// Holds information about interactive decompilation history.
4993 /// Currently this is not saved in the database.
4995 {
4996  ea_t ea; ///< The entry address of the decompiled function
4997  ea_t end; ///< BADADDR-decompile function; otherwise end of the range
4998  history_item_t(ea_t _ea=BADADDR, int _lnnum=-1, int _x=0, int _y=0)
4999  : ctext_position_t(_lnnum, _x, _y), ea(_ea), end(BADADDR) {}
5000  history_item_t(ea_t _ea, const ctext_position_t &p)
5001  : ctext_position_t(p), ea(_ea), end(BADADDR) {}
5002 };
5003 
5004 /// Navigation history.
5005 typedef qstack<history_item_t> history_t;
5006 
5007 /// Comment types
5008 typedef int cmt_type_t;
5009 const cmt_type_t
5010  CMT_NONE = 0x0000, ///< No comment is possible
5011  CMT_TAIL = 0x0001, ///< Indented comment
5012  CMT_BLOCK1 = 0x0002, ///< Anterioir block comment
5013  CMT_BLOCK2 = 0x0004, ///< Posterior block comment
5014  CMT_LVAR = 0x0008, ///< Local variable comment
5015  CMT_FUNC = 0x0010, ///< Function comment
5016  CMT_ALL = 0x001F; ///< All comments
5017 
5018 //---------------------------------------------------------------------------
5019 /// Information about pseudocode window
5020 struct vdui_t
5021 {
5022  int flags; ///< \ref VDUI_
5023 /// \defgroup VDUI_ Properties of pseudocode window
5024 /// Used in vdui_t::flags
5025 //@{
5026 #define VDUI_VISIBLE 0x0001 ///< is visible?
5027 #define VDUI_VALID 0x0002 ///< is valid?
5028 #define VDUI_LOCKED 0x0004 ///< is locked?
5029 //@}
5030 
5031  /// Is the pseudocode window visible?
5032  /// if not, it might be invisible or destroyed
5033  bool visible(void) const { return (flags & VDUI_VISIBLE) != 0; }
5034  /// Does the pseudocode window contain valid code?
5035  /// It can become invalid if the function type gets changed in IDA.
5036  bool valid(void) const { return (flags & VDUI_VALID) != 0; }
5037  /// Does the pseudocode window contain valid code?
5038  /// We lock windows before modifying them
5039  bool locked(void) const { return (flags &