Functions that deal with the disassembling of program instructions. More...
Classes | |
class | op_t |
Operand of an instruction. More... | |
class | insn_t |
union | value_u |
This structure is used to pass values of bytes to helper functions. More... | |
struct | value_u::dq_t |
struct | value_u::dt_t |
struct | value_u::d128_t |
struct | outctx_base_t |
struct | outctx_t |
struct | macro_constructor_t |
Helper class for processor modules to build macro instructions. More... | |
Macros | |
#define | OF_NO_BASE_DISP 0x80 |
base displacement doesn't exist. More... | |
#define | OF_OUTER_DISP 0x40 |
outer displacement exists. More... | |
#define | PACK_FORM_DEF 0x20 |
packed factor defined. More... | |
#define | OF_NUMBER 0x10 |
the operand can be converted to a number only | |
#define | OF_SHOW 0x08 |
should the operand be displayed? | |
#define | dt_byte 0 |
8 bit integer | |
#define | dt_word 1 |
16 bit integer | |
#define | dt_dword 2 |
32 bit integer | |
#define | dt_float 3 |
4 byte floating point | |
#define | dt_double 4 |
8 byte floating point | |
#define | dt_tbyte 5 |
variable size ( processor_t::tbyte_size) floating point | |
#define | dt_packreal 6 |
packed real format for mc68040 | |
#define | dt_qword 7 |
64 bit integer | |
#define | dt_byte16 8 |
128 bit integer | |
#define | dt_code 9 |
ptr to code (not used?) | |
#define | dt_void 10 |
none | |
#define | dt_fword 11 |
48 bit | |
#define | dt_bitfild 12 |
bit field (mc680x0) | |
#define | dt_string 13 |
pointer to asciiz string | |
#define | dt_unicode 14 |
pointer to unicode string | |
#define | dt_ldbl 15 |
long double (which may be different from tbyte) | |
#define | dt_byte32 16 |
256 bit integer | |
#define | dt_byte64 17 |
512 bit integer | |
#define | dt_half 18 |
2-byte floating point | |
#define | DECLARE_INSN_HELPERS(decl) |
#define | Op1 ops[0] |
first operand | |
#define | Op2 ops[1] |
second operand | |
#define | Op3 ops[2] |
third operand | |
#define | Op4 ops[3] |
fourth operand | |
#define | Op5 ops[4] |
fifth operand | |
#define | Op6 ops[5] |
sixth operand | |
#define | Op7 ops[6] |
seventh operand | |
#define | Op8 ops[7] |
eighth operand | |
#define | INSN_MACRO 0x01 |
macro instruction | |
#define | INSN_MODMAC 0x02 |
may modify the database to make room for the macro insn | |
#define | INSN_64BIT 0x04 |
belongs to 64bit segment? | |
#define | CTXF_MAIN 0x00001 |
#define | CTXF_MULTI 0x00002 |
#define | CTXF_CODE 0x00004 |
#define | CTXF_STACK 0x00008 |
#define | CTXF_GEN_XREFS 0x00010 |
#define | CTXF_XREF_STATE 0x00060 |
#define | XREFSTATE_NONE 0x00 |
#define | XREFSTATE_GO 0x20 |
#define | XREFSTATE_DONE 0x40 |
#define | CTXF_GEN_CMT 0x00080 |
#define | CTXF_CMT_STATE 0x00300 |
#define | COMMSTATE_NONE 0x000 |
#define | COMMSTATE_GO 0x100 |
#define | COMMSTATE_DONE 0x200 |
#define | CTXF_VOIDS 0x00400 |
#define | CTXF_NORMAL_LABEL 0x00800 |
#define | CTXF_DEMANGLED_LABEL 0x01000 |
#define | CTXF_LABEL_OK 0x02000 |
#define | CTXF_DEMANGLED_OK 0x04000 |
#define | CTXF_OVSTORE_PRNT 0x08000 |
#define | CTXF_OUTCTX_T 0x10000 |
#define | CTXF_DBLIND_OPND 0x20000 |
#define | CTXF_BINOP_STATE 0xC0000 |
#define | BINOPSTATE_NONE 0x00000 |
#define | BINOPSTATE_GO 0x40000 |
#define | BINOPSTATE_DONE 0x80000 |
#define | CTXF_HIDDEN_ADDR 0x100000 |
#define | OOF_SIGNMASK 0x0003 |
sign symbol (+/-) output | |
#define | OOFS_IFSIGN 0x0000 |
output sign if needed | |
#define | OOFS_NOSIGN 0x0001 |
don't output sign, forbid the user to change the sign | |
#define | OOFS_NEEDSIGN 0x0002 |
always out sign (+-) | |
#define | OOF_SIGNED 0x0004 |
output as signed if < 0 | |
#define | OOF_NUMBER 0x0008 |
always as a number | |
#define | OOF_WIDTHMASK 0x0070 |
width of value in bits | |
#define | OOFW_IMM 0x0000 |
take from x.dtype | |
#define | OOFW_8 0x0010 |
8 bit width | |
#define | OOFW_16 0x0020 |
16 bit width | |
#define | OOFW_24 0x0030 |
24 bit width | |
#define | OOFW_32 0x0040 |
32 bit width | |
#define | OOFW_64 0x0050 |
64 bit width | |
#define | OOF_ADDR 0x0080 |
output x.addr, otherwise x.value | |
#define | OOF_OUTER 0x0100 |
output outer operand | |
#define | OOF_ZSTROFF 0x0200 |
meaningful only if is_stroff(uFlag); append a struct field name if the field offset is zero? if AFL_ZSTROFF is set, then this flag is ignored. More... | |
#define | OOF_NOBNOT 0x0400 |
prohibit use of binary not | |
#define | OOF_SPACES 0x0800 |
do not suppress leading spaces; currently works only for floating point numbers | |
#define | OOF_ANYSERIAL 0x1000 |
if enum: select first available serial | |
#define | DEFAULT_INDENT 0xFFFF |
#define | MAKELINE_NONE 0x00 |
#define | MAKELINE_BINPREF 0x01 |
#define | MAKELINE_VOID 0x02 |
#define | MAKELINE_STACK 0x04 |
#define | GH_PRINT_PROC (1 << 0) |
#define | GH_PRINT_ASM (1 << 1) |
#define | GH_PRINT_BYTESEX (1 << 2) |
#define | GH_PRINT_HEADER (1 << 3) |
#define | GH_BYTESEX_HAS_HIGHBYTE (1 << 4) |
#define | GH_PRINT_PROC_AND_ASM (GH_PRINT_PROC | GH_PRINT_ASM) |
#define | GH_PRINT_PROC_ASM_AND_BYTESEX (GH_PRINT_PROC_AND_ASM | GH_PRINT_BYTESEX) |
#define | GH_PRINT_ALL (GH_PRINT_PROC_ASM_AND_BYTESEX | GH_PRINT_HEADER) |
#define | GH_PRINT_ALL_BUT_BYTESEX (GH_PRINT_PROC_AND_ASM | GH_PRINT_HEADER) |
Emulator helpers | |
#define | STKVAR_VALID_SIZE 0x0001 |
x.dtype contains correct variable type (for insns like 'lea' this bit must be off). More... | |
Typedefs | |
typedef uchar | optype_t |
see Operand types | |
Functions | |
CASSERT (sizeof(insn_t)==216) | |
idaman size_t ida_export | get_immvals (uval_t *out, ea_t ea, int n, flags64_t F, insn_t *cache=nullptr) |
Get immediate values at the specified address. More... | |
size_t | get_printable_immvals (uval_t *out, ea_t ea, int n, flags64_t F, insn_t *cache=nullptr) |
Get immediate ready-to-print values at the specified address. More... | |
idaman int ida_export | get_lookback (void) |
Number of instructions to look back. More... | |
Variables | |
const optype_t | o_void = 0 |
No Operand. | |
const optype_t | o_reg = 1 |
General Register (al,ax,es,ds...). More... | |
const optype_t | o_mem = 2 |
Direct Memory Reference (DATA). More... | |
const optype_t | o_phrase = 3 |
Memory Ref [Base Reg + Index Reg]. More... | |
const optype_t | o_displ = 4 |
Memory Ref [Base Reg + Index Reg + Displacement]. More... | |
const optype_t | o_imm = 5 |
Immediate Value. More... | |
const optype_t | o_far = 6 |
Immediate Far Address (CODE). More... | |
const optype_t | o_near = 7 |
Immediate Near Address (CODE). More... | |
const optype_t | o_idpspec0 = 8 |
processor specific type. | |
const optype_t | o_idpspec1 = 9 |
processor specific type. | |
const optype_t | o_idpspec2 = 10 |
processor specific type. | |
const optype_t | o_idpspec3 = 11 |
processor specific type. | |
const optype_t | o_idpspec4 = 12 |
processor specific type. | |
const optype_t | o_idpspec5 = 13 |
processor specific type. More... | |
Address translation | |
The following functions can be used by processor modules to map addresses from one region to another. They are especially useful for microprocessors that map the same memory region to multiple address ranges or use memory bank switching. The user can use the following techniques to desribe address translations:
| |
#define | FCBF_CONT 0x00000001 |
don't stop on decoding, or any other kind of error | |
#define | FCBF_ERR_REPL 0x00000002 |
in case of an error, use a CP_REPLCHAR instead of a hex representation of the problematic byte | |
#define | FCBF_FF_LIT 0x00000004 |
in case of codepoints == 0xFF, use it as-is (i.e., LATIN SMALL LETTER Y WITH DIAERESIS). More... | |
#define | FCBF_DELIM 0x00000008 |
add the 'ash'-specified delimiters around the generated data. More... | |
idaman ea_t ida_export | calc_dataseg (const insn_t &insn, int n=-1, int rgnum=-1) |
Get data segment for the instruction operand. More... | |
ea_t | map_data_ea (const insn_t &insn, ea_t addr, int opnum=-1) |
Map a data address. More... | |
ea_t | map_data_ea (const insn_t &insn, const op_t &op) |
idaman ea_t ida_export | map_code_ea (const insn_t &insn, ea_t addr, int opnum) |
Map a code address. More... | |
ea_t | map_code_ea (const insn_t &insn, const op_t &op) |
ea_t | map_ea (const insn_t &insn, const op_t &op, bool iscode) |
ea_t | map_ea (const insn_t &insn, ea_t addr, int opnum, bool iscode) |
idaman outctx_base_t *ida_export | create_outctx (ea_t ea, flags64_t F=0, int suspop=0) |
Create a new output context. More... | |
idaman bool ida_export | print_insn_mnem (qstring *out, ea_t ea) |
Print instruction mnemonics. More... | |
idaman bool ida_export | format_charlit (qstring *out, const uchar **ptr, size_t size, uint32 flags=0, int encidx=0) |
Format character literal. More... | |
idaman bool ida_export | print_fpval (char *buf, size_t bufsize, const void *v, int size) |
Print a floating point value. More... | |
idaman flags64_t ida_export | get_dtype_flag (op_dtype_t dtype) |
Get flags for op_t::dtype field. | |
idaman size_t ida_export | get_dtype_size (op_dtype_t dtype) |
Get size of opt_::dtype field. | |
idaman op_dtype_t ida_export | get_dtype_by_size (asize_t size) |
Get op_t::dtype from size. | |
bool | is_floating_dtype (op_dtype_t dtype) |
Is a floating type operand? | |
idaman int ida_export | create_insn (ea_t ea, insn_t *out=nullptr) |
Create an instruction at the specified address. More... | |
idaman int ida_export | decode_insn (insn_t *out, ea_t ea) |
Analyze the specified address and fill 'out'. More... | |
bool | can_decode (ea_t ea) |
Can the bytes at address 'ea' be decoded as instruction? More... | |
idaman bool ida_export | print_operand (qstring *out, ea_t ea, int n, int getn_flags=0, struct printop_t *newtype=nullptr) |
Generate text representation for operand #n. More... | |
idaman ea_t ida_export | decode_prev_insn (insn_t *out, ea_t ea) |
Decode previous instruction if it exists, fill 'out'. More... | |
idaman ea_t ida_export | decode_preceding_insn (insn_t *out, ea_t ea, bool *p_farref=nullptr) |
Decode preceding instruction in the execution flow. More... | |
idaman bool ida_export | construct_macro2 (macro_constructor_t *_this, insn_t *insn, bool enable) |
idaman int ida_export | get_spoiled_reg (const insn_t &insn, const uint32 *regs, size_t n) |
Does the instruction spoil any register from 'regs'?. More... | |
Detailed Description
Functions that deal with the disassembling of program instructions.
There are 2 kinds of functions:
- functions that are called from the kernel to disassemble an instruction. These functions call IDP module for it.
- functions that are called from IDP module to disassemble an instruction. We will call them 'helper functions'.
Disassembly of an instruction is made in three steps:
- analysis: ana.cpp
- emulation: emu.cpp
- conversion to text: out.cpp
The kernel calls the IDP module to perform these steps. At first, the kernel always calls the analysis. The analyzer must decode the instruction and fill the insn_t instance that it receives through its callback. It must not change anything in the database.
The second step, the emulation, is called for each instruction. This step must make necessary changes to the database, plan analysis of subsequent instructions, track register values, memory contents, etc. Please keep in mind that the kernel may call the emulation step for any address in the program - there is no ordering of addresses. Usually, the emulation is called for consecutive addresses but this is not guaranteed.
The last step, conversion to text, is called each time an instruction is displayed on the screen. The kernel will always call the analysis step before calling the text conversion step. The emulation and the text conversion steps should use the information stored in the insn_t instance they receive. They should not access the bytes of the instruction and decode it again - this should only be done in the analysis step.
Macro Definition Documentation
◆ OF_NO_BASE_DISP
#define OF_NO_BASE_DISP 0x80 |
base displacement doesn't exist.
meaningful only for o_displ type. if set, base displacement (op_t::addr) doesn't exist.
◆ OF_OUTER_DISP
#define OF_OUTER_DISP 0x40 |
outer displacement exists.
meaningful only for o_displ type. if set, outer displacement (op_t::value) exists.
◆ PACK_FORM_DEF
#define PACK_FORM_DEF 0x20 |
packed factor defined.
(!o_reg + dt_packreal)
◆ DECLARE_INSN_HELPERS
#define DECLARE_INSN_HELPERS | ( | decl | ) |
◆ STKVAR_VALID_SIZE
#define STKVAR_VALID_SIZE 0x0001 |
x.dtype contains correct variable type (for insns like 'lea' this bit must be off).
in general, dr_O references do not allow to determine the variable size
◆ OOF_ZSTROFF
#define OOF_ZSTROFF 0x0200 |
meaningful only if is_stroff(uFlag); append a struct field name if the field offset is zero? if AFL_ZSTROFF is set, then this flag is ignored.
◆ FCBF_FF_LIT
#define FCBF_FF_LIT 0x00000004 |
in case of codepoints == 0xFF, use it as-is (i.e., LATIN SMALL LETTER Y WITH DIAERESIS).
If both this, and FCBF_REPL are specified, this will take precedence
◆ FCBF_DELIM
#define FCBF_DELIM 0x00000008 |
add the 'ash'-specified delimiters around the generated data.
Note: if those are not defined and the INFFL_ALLASM is not set, format_charlit() will return an error
Function Documentation
◆ get_immvals()
idaman size_t ida_export get_immvals | ( | uval_t * | out, |
ea_t | ea, | ||
int | n, | ||
flags64_t | F, | ||
insn_t * | cache = nullptr |
||
) |
Get immediate values at the specified address.
This function decodes instruction at the specified address or inspects the data item. It finds immediate values and copies them to 'out'. This function will store the original value of the operands in 'out', unless the last bits of 'F' are "...0 11111111", in which case the transformed values (as needed for printing) will be stored instead.
- Parameters
-
out array of immediate values (at least 2*UA_MAXOP elements) ea address to analyze n number of operand (0..UA_MAXOP-1), -1 means all operands F flags for the specified address cache optional already decoded instruction or buffer for it. if the cache does not contain the decoded instruction, it will be updated (useful if we call get_immvals for the same address multiple times)
- Returns
- number of immediate values (0..2*UA_MAXOP)
◆ get_printable_immvals()
|
inline |
Get immediate ready-to-print values at the specified address.
- Parameters
-
out array of immediate values (at least 2*UA_MAXOP elements) ea address to analyze n number of operand (0..UA_MAXOP-1), -1 means all operands F flags for the specified address cache optional already decoded instruction or buffer for it. if the cache does not contain the decoded instruction, it will be updated (useful if we call get_immvals for the same address multiple times)
- Returns
- number of immediate values (0..2*UA_MAXOP)
◆ get_lookback()
idaman int ida_export get_lookback | ( | void | ) |
Number of instructions to look back.
This variable is not used by the kernel. Its value may be specified in ida.cfg: LOOKBACK = <number>. IDP may use it as you like it. (TMS module uses it)
◆ calc_dataseg()
idaman ea_t ida_export calc_dataseg | ( | const insn_t & | insn, |
int | n = -1 , |
||
int | rgnum = -1 |
||
) |
Get data segment for the instruction operand.
'opnum' and 'rgnum' are meaningful only if the processor has segment registers.
◆ map_data_ea()
|
inline |
Map a data address.
- Parameters
-
insn the current instruction addr the referenced address to map opnum operand number
◆ map_code_ea()
idaman ea_t ida_export map_code_ea | ( | const insn_t & | insn, |
ea_t | addr, | ||
int | opnum | ||
) |
Map a code address.
This function takes into account the segment translations.
- Parameters
-
insn the current instruction addr the referenced address to map opnum operand number
◆ create_outctx()
idaman outctx_base_t *ida_export create_outctx | ( | ea_t | ea, |
flags64_t | F = 0 , |
||
int | suspop = 0 |
||
) |
Create a new output context.
To delete it, just use "delete pctx"
◆ print_insn_mnem()
idaman bool ida_export print_insn_mnem | ( | qstring * | out, |
ea_t | ea | ||
) |
Print instruction mnemonics.
- Parameters
-
out output buffer ea linear address of the instruction
- Returns
- success
◆ format_charlit()
idaman bool ida_export format_charlit | ( | qstring * | out, |
const uchar ** | ptr, | ||
size_t | size, | ||
uint32 | flags = 0 , |
||
int | encidx = 0 |
||
) |
Format character literal.
Try and format 'size' bytes pointed to by '*ptr', as literal characters, using the 'encidx' encoding, and with the specified 'flags' directives.
By default, format_charlit() will fail and return an error, in any of the following cases:
- a byte cannot be decoded using the specified (or default) encoding
- a codepoint is < 0x20 (i.e., ' ')
- a codepoint is present in 'ash.esccodes'
- a codepoint is 0xFF
- a codepoint is >= 0x80, and AS_NHIAS was specified in ash.flag The function can be told to keep going instead of bailing out, for any of these situations, by using one of the FCBF_*_OK flags.
If the function is told to proceed on a specific error, by default it will format the byte as a C-encoded byte value (i.e., '\xNN'), unless the corresponding FCBF_*_REPL flag is passed, in which case the problematic byte/codepoint will be replaced by the Unicode replacement character in the output.
- Parameters
-
out output buffer (can be nullptr) ptr pointer to pointer to bytes to print (will be advanced by the number of bytes that were successfully printed) size size of input value in bytes flags format flags encidx the 1 byte-per-unit encoding to use (or 0 to use the default 1 BPU encoding)
- Returns
- success
◆ print_fpval()
idaman bool ida_export print_fpval | ( | char * | buf, |
size_t | bufsize, | ||
const void * | v, | ||
int | size | ||
) |
Print a floating point value.
- Parameters
-
buf output buffer. may be nullptr bufsize size of the output buffer v floating point value in processor native format size size of the value in bytes
- Returns
- true ok
- false can't represent as floating point number
◆ create_insn()
idaman int ida_export create_insn | ( | ea_t | ea, |
insn_t * | out = nullptr |
||
) |
Create an instruction at the specified address.
This function checks if an instruction is present at the specified address and will try to create one if there is none. It will fail if there is a data item or other items hindering the creation of the new instruction. This function will also fill the 'out' structure.
- Parameters
-
ea linear address out the resulting instruction
- Returns
- the length of the instruction or 0
◆ decode_insn()
idaman int ida_export decode_insn | ( | insn_t * | out, |
ea_t | ea | ||
) |
Analyze the specified address and fill 'out'.
This function does not modify the database. It just tries to interpret the specified address as an instruction and fills the 'out' structure.
- Parameters
-
out the resulting instruction ea linear address
- Returns
- the length of the (possible) instruction or 0
◆ can_decode()
|
inline |
Can the bytes at address 'ea' be decoded as instruction?
- Parameters
-
ea linear address
- Returns
- whether or not the contents at that address could be a valid instruction
◆ print_operand()
idaman bool ida_export print_operand | ( | qstring * | out, |
ea_t | ea, | ||
int | n, | ||
int | getn_flags = 0 , |
||
struct printop_t * | newtype = nullptr |
||
) |
Generate text representation for operand #n.
This function will generate the text representation of the specified operand (includes color codes.)
- Parameters
-
out output buffer ea the item address (instruction or data) n operand number (0,1,2...). meaningful only for instructions getn_flags Name expression flags Currently only GETN_NODUMMY is accepted. newtype if specified, print the operand using the specified type
- Returns
- success
◆ decode_prev_insn()
idaman ea_t ida_export decode_prev_insn | ( | insn_t * | out, |
ea_t | ea | ||
) |
Decode previous instruction if it exists, fill 'out'.
- Parameters
-
out the resulting instruction ea the address to decode the previous instruction from
- Returns
- the previous instruction address (BADADDR-no such insn)
◆ decode_preceding_insn()
idaman ea_t ida_export decode_preceding_insn | ( | insn_t * | out, |
ea_t | ea, | ||
bool * | p_farref = nullptr |
||
) |
Decode preceding instruction in the execution flow.
Prefer far xrefs from addresses < the current to ordinary flows.
- Parameters
-
out the resulting instruction ea the address to decode the preceding instruction from p_farref will contain 'true' if followed an xref, false otherwise.
- Returns
- the preceding instruction address (BADADDR-no such insn) and 'out'.
◆ get_spoiled_reg()
Does the instruction spoil any register from 'regs'?.
This function checks the Instruction feature bits flags from the instructions array. Only o_reg operand types are consulted.
- Parameters
-
insn the instruction regs array with register indexes n size of 'regs'
- Returns
- index in the 'regs' array or -1
Variable Documentation
◆ o_reg
o_reg = 1 |
General Register (al,ax,es,ds...).
The register number should be stored in op_t::reg.
All processor registers, including special registers, can be represented by this operand type.
◆ o_mem
o_mem = 2 |
Direct Memory Reference (DATA).
A direct memory data reference whose target address is known at compilation time.
The target virtual address is stored in op_t::addr and the full address is calculated as to_ea( \insn_t{cs}, op_t::addr ). For the processors with complex memory organization the final address can be calculated using other segment registers. For flat memories, op_t::addr is the final address and \insn_t{cs} is usually equal to zero. In any case, the address within the segment should be stored in op_t::addr.
◆ o_phrase
o_phrase = 3 |
Memory Ref [Base Reg + Index Reg].
A memory reference using register contents.
Indexed, register based, and other addressing modes can be represented with the operand type. This addressing mode cannot contain immediate values (use o_displ instead). The phrase number should be stored in op_t::phrase. To denote the pre-increment and similar features please use additional operand fields like op_t::specflag... Usually op_t::phrase contains the register number and additional information is stored in op_t::specflags... Please note that this operand type cannot contain immediate values (except the scaling coefficients).
◆ o_displ
o_displ = 4 |
Memory Ref [Base Reg + Index Reg + Displacement].
A memory reference using register contents with displacement.
The displacement should be stored in the op_t::addr field. The rest of information is stored the same way as in o_phrase.
◆ o_imm
o_imm = 5 |
Immediate Value.
Any operand consisting of only a number is represented by this operand type.
The value should be stored in op_t::value. You may sign extend short (1-2 byte) values. In any case don't forget to specify op_t::dtype (should be set for all operand types).
◆ o_far
o_far = 6 |
Immediate Far Address (CODE).
If the current processor has a special addressing mode for inter-segment references, then this operand type should be used instead of o_near.
If you want, you may use PR_CHK_XREF in processor_t::flag to disable inter-segment calls if o_near operand type is used. Currently only IBM PC uses this flag.
◆ o_near
o_near = 7 |
Immediate Near Address (CODE).
A direct memory code reference whose target address is known at the compilation time.
The target virtual address is stored in op_t::addr and the final address is always to_ea(\insn_t{cs}, op_t::addr). Usually this operand type is used for the branches and calls whose target address is known. If the current processor has 2 different types of references for inter-segment and intra-segment references, then this should be used only for intra-segment references.
If the above operand types do not cover all possible addressing modes, then use o_idpspec... operand types.
◆ o_idpspec5
const optype_t o_idpspec5 = 13 |
processor specific type.
(there can be more processor specific types)
Generated by