Safe Haskell | None |
---|---|
Language | Haskell2010 |
Data.Macaw.Discovery
Description
This module discovers the Functions and their internal Block CFG in target binaries.
Synopsis
- data DiscoveryState arch
- emptyDiscoveryState :: Memory (ArchAddrWidth arch) -> AddrSymMap (ArchAddrWidth arch) -> ArchitectureInfo arch -> DiscoveryState arch
- trustedFunctionEntryPoints :: forall arch f. Functor f => (Map (ArchSegmentOff arch) NoReturnFunStatus -> f (Map (ArchSegmentOff arch) NoReturnFunStatus)) -> DiscoveryState arch -> f (DiscoveryState arch)
- exploreFnPred :: forall arch f. Functor f => ((ArchSegmentOff arch -> Bool) -> f (ArchSegmentOff arch -> Bool)) -> DiscoveryState arch -> f (DiscoveryState arch)
- type AddrSymMap (w :: Nat) = Map (MemSegmentOff w) ByteString
- funInfo :: forall arch f. Functor f => (Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch)) -> f (Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch)))) -> DiscoveryState arch -> f (DiscoveryState arch)
- exploredFunctions :: DiscoveryState arch -> [Some (DiscoveryFunInfo arch)]
- ppDiscoveryStateBlocks :: DiscoveryState arch -> Doc ann
- unexploredFunctions :: forall arch f. Functor f => (UnexploredFunctionMap arch -> f (UnexploredFunctionMap arch)) -> DiscoveryState arch -> f (DiscoveryState arch)
- cfgFromAddrs :: ArchitectureInfo arch -> Memory (ArchAddrWidth arch) -> AddrSymMap (ArchAddrWidth arch) -> [ArchSegmentOff arch] -> [(ArchSegmentOff arch, ArchSegmentOff arch)] -> DiscoveryState arch
- cfgFromAddrsAndState :: DiscoveryState arch -> [ArchSegmentOff arch] -> [(ArchSegmentOff arch, ArchSegmentOff arch)] -> DiscoveryState arch
- markAddrAsFunction :: FunctionExploreReason (ArchAddrWidth arch) -> ArchSegmentOff arch -> DiscoveryState arch -> DiscoveryState arch
- markAddrsAsFunction :: Foldable t => FunctionExploreReason (ArchAddrWidth arch) -> t (ArchSegmentOff arch) -> DiscoveryState arch -> DiscoveryState arch
- data FunctionExploreReason (w :: Nat)
- = PossibleWriteEntry !(MemSegmentOff w)
- | CallTarget !(MemSegmentOff w)
- | InitAddr
- | CodePointerInMem !(MemSegmentOff w)
- | UserRequest
- ppFunReason :: forall (w :: Nat). FunctionExploreReason w -> String
- data BlockExploreReason (w :: Nat)
- = NextIP !(MemSegmentOff w)
- | FunctionEntryPoint
- | SplitAt !(MemSegmentOff w) !(BlockExploreReason w)
- analyzeFunction :: ArchSegmentOff arch -> FunctionExploreReason (ArchAddrWidth arch) -> DiscoveryState arch -> (DiscoveryState arch, Some (DiscoveryFunInfo arch))
- analyzeDiscoveredFunctions :: DiscoveryState arch -> DiscoveryState arch
- addDiscoveredFunctionBlockTargets :: DiscoveryState arch -> DiscoveryFunInfo arch ids -> [(ArchSegmentOff arch, [ArchSegmentOff arch])] -> DiscoveryState arch
- discoverFunction :: DiscoveryOptions -> ArchSegmentOff arch -> FunctionExploreReason (ArchAddrWidth arch) -> DiscoveryState arch -> [(ArchSegmentOff arch, [ArchSegmentOff arch])] -> IncComp (DiscoveryEvent arch) (DiscoveryState arch, Some (DiscoveryFunInfo arch))
- completeDiscoveryState :: DiscoveryState arch -> DiscoveryOptions -> IO (DiscoveryState arch)
- incCompleteDiscovery :: DiscoveryState arch -> DiscoveryOptions -> IncCompM (DiscoveryEvent arch) r (DiscoveryState arch)
- data DiscoveryOptions = DiscoveryOptions {}
- defaultDiscoveryOptions :: DiscoveryOptions
- data DiscoveryEvent arch
- = ReportAnalyzeFunction !(ArchSegmentOff arch)
- | ReportAnalyzeFunctionDone (DiscoveryFunInfo arch ids)
- | ReportIdentifyFunction !(ArchSegmentOff arch) !(ArchSegmentOff arch) !(FunctionExploreReason (ArchAddrWidth arch))
- | ReportAnalyzeBlock !(ArchSegmentOff arch) !(ArchSegmentOff arch) !(Maybe (BlockExploreReason (ArchAddrWidth arch)))
- logDiscoveryEvent :: MemWidth (ArchAddrWidth arch) => AddrSymMap (ArchAddrWidth arch) -> DiscoveryEvent arch -> IO ()
- data DiscoveryFunInfo arch ids
- discoveredFunAddr :: DiscoveryFunInfo arch ids -> ArchSegmentOff arch
- discoveredFunName :: MemWidth (ArchAddrWidth arch) => DiscoveryFunInfo arch ids -> ByteString
- discoveredFunSymbol :: DiscoveryFunInfo arch ids -> Maybe ByteString
- discoveredClassifyFailureResolutions :: DiscoveryFunInfo arch ids -> [(ArchSegmentOff arch, [ArchSegmentOff arch])]
- parsedBlocks :: forall arch ids f. Functor f => (Map (ArchSegmentOff arch) (ParsedBlock arch ids) -> f (Map (ArchSegmentOff arch) (ParsedBlock arch ids))) -> DiscoveryFunInfo arch ids -> f (DiscoveryFunInfo arch ids)
- data NoReturnFunStatus
- data ParsedBlock arch ids
- pblockAddr :: ParsedBlock arch ids -> ArchSegmentOff arch
- blockSize :: ParsedBlock arch ids -> Int
- blockReason :: ParsedBlock arch ids -> BlockExploreReason (ArchAddrWidth arch)
- blockAbstractState :: ParsedBlock arch ids -> AbsBlockState (ArchReg arch)
- pblockStmts :: ParsedBlock arch ids -> [Stmt arch ids]
- pblockTermStmt :: ParsedBlock arch ids -> ParsedTermStmt arch ids
- data ParsedTermStmt arch ids
- = ParsedCall !(RegState (ArchReg arch) (Value arch ids)) !(Maybe (ArchSegmentOff arch))
- | PLTStub !(MapF (ArchReg arch) (Value arch ids)) !(ArchSegmentOff arch) !VersionedSymbol
- | ParsedJump !(RegState (ArchReg arch) (Value arch ids)) !(ArchSegmentOff arch)
- | ParsedBranch !(RegState (ArchReg arch) (Value arch ids)) !(Value arch ids BoolType) !(ArchSegmentOff arch) !(ArchSegmentOff arch)
- | ParsedLookupTable !(JumpTableLayout arch) !(RegState (ArchReg arch) (Value arch ids)) !(ArchAddrValue arch ids) !(Vector (ArchSegmentOff arch))
- | ParsedReturn !(RegState (ArchReg arch) (Value arch ids))
- | ParsedArchTermStmt !(ArchTermStmt arch (Value arch ids)) !(RegState (ArchReg arch) (Value arch ids)) !(Maybe (ArchSegmentOff arch))
- | ParsedTranslateError !Text
- | ClassifyFailure !(RegState (ArchReg arch) (Value arch ids)) [String]
- data JumpTableLayout arch
- jtlBackingAddr :: JumpTableLayout arch -> ArchSegmentOff arch
- jtlBackingSize :: JumpTableLayout arch -> Word64
- type BlockClassifier arch ids = BlockClassifierM arch ids (ParsedContents arch ids)
- defaultClassifier :: BlockClassifier arch ids
- branchClassifier :: BlockClassifier arch ids
- callClassifier :: BlockClassifier arch ids
- returnClassifier :: BlockClassifier arch ids
- directJumpClassifier :: BlockClassifier arch ids
- noreturnCallClassifier :: BlockClassifier arch ids
- tailCallClassifier :: BlockClassifier arch ids
- pltStubClassifier :: BlockClassifier arch ids
- jumpTableClassifier :: BlockClassifier arch ids
- eliminateDeadStmts :: ArchitectureInfo arch -> Block arch ids -> Block arch ids
- type ArchAddrWidth arch = RegAddrWidth (ArchReg arch)
DiscoveryInfo
data DiscoveryState arch Source #
Information discovered about the program
Arguments
:: Memory (ArchAddrWidth arch) | State of memory |
-> AddrSymMap (ArchAddrWidth arch) | Map from addresses to their symbol name (if any) |
-> ArchitectureInfo arch | architecture/OS specific information |
-> DiscoveryState arch |
Create empty discovery information.
trustedFunctionEntryPoints :: forall arch f. Functor f => (Map (ArchSegmentOff arch) NoReturnFunStatus -> f (Map (ArchSegmentOff arch) NoReturnFunStatus)) -> DiscoveryState arch -> f (DiscoveryState arch) Source #
Retrieves functions that are trusted entry points.
exploreFnPred :: forall arch f. Functor f => ((ArchSegmentOff arch -> Bool) -> f (ArchSegmentOff arch -> Bool)) -> DiscoveryState arch -> f (DiscoveryState arch) Source #
type AddrSymMap (w :: Nat) = Map (MemSegmentOff w) ByteString Source #
Maps code addresses to the associated symbol name if any.
funInfo :: forall arch f. Functor f => (Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch)) -> f (Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch)))) -> DiscoveryState arch -> f (DiscoveryState arch) Source #
Get information for specific functions
exploredFunctions :: DiscoveryState arch -> [Some (DiscoveryFunInfo arch)] Source #
Return list of all functions discovered so far.
ppDiscoveryStateBlocks :: DiscoveryState arch -> Doc ann Source #
unexploredFunctions :: forall arch f. Functor f => (UnexploredFunctionMap arch -> f (UnexploredFunctionMap arch)) -> DiscoveryState arch -> f (DiscoveryState arch) Source #
List of functions to explore next.
Arguments
:: ArchitectureInfo arch | Architecture-specific information needed for doing control-flow exploration. |
-> Memory (ArchAddrWidth arch) | Memory to use when decoding instructions. |
-> AddrSymMap (ArchAddrWidth arch) | Map from addresses to the associated symbol name. |
-> [ArchSegmentOff arch] | Initial function entry points. |
-> [(ArchSegmentOff arch, ArchSegmentOff arch)] | Function entry points in memory to be explored after exploring function entry points. Each entry contains an address and the value stored in it. |
-> DiscoveryState arch |
Construct an empty discovery state and populate it by exploring from a given set of function entry points
Arguments
:: DiscoveryState arch | |
-> [ArchSegmentOff arch] | Initial function entry points. |
-> [(ArchSegmentOff arch, ArchSegmentOff arch)] | Function entry points in memory to be explored after exploring function entry points. Each entry contains an address and the value stored in it. |
-> DiscoveryState arch |
Expand an initial discovery state by exploring from a given set of function entry points.
Arguments
:: FunctionExploreReason (ArchAddrWidth arch) | Information about why the code address was discovered Used for debugging |
-> ArchSegmentOff arch | |
-> DiscoveryState arch | |
-> DiscoveryState arch |
Mark a escaped code pointer as a function entry.
markAddrsAsFunction :: Foldable t => FunctionExploreReason (ArchAddrWidth arch) -> t (ArchSegmentOff arch) -> DiscoveryState arch -> DiscoveryState arch Source #
Mark a list of addresses as function entries with the same reason.
data FunctionExploreReason (w :: Nat) Source #
This describes why we started exploring a given function.
Constructors
PossibleWriteEntry !(MemSegmentOff w) | Exploring because code at the given block writes it to memory. |
CallTarget !(MemSegmentOff w) | Exploring because address terminates with a call that jumps here. |
InitAddr | Identified as an entry point from initial information |
CodePointerInMem !(MemSegmentOff w) | A code pointer that was stored at the given address. |
UserRequest | The user requested that we analyze this address as a function. |
Instances
MemWidth w => Show (FunctionExploreReason w) Source # | |
Defined in Data.Macaw.Discovery.State Methods showsPrec :: Int -> FunctionExploreReason w -> ShowS # show :: FunctionExploreReason w -> String # showList :: [FunctionExploreReason w] -> ShowS # | |
Eq (FunctionExploreReason w) Source # | |
Defined in Data.Macaw.Discovery.State Methods (==) :: FunctionExploreReason w -> FunctionExploreReason w -> Bool # (/=) :: FunctionExploreReason w -> FunctionExploreReason w -> Bool # |
ppFunReason :: forall (w :: Nat). FunctionExploreReason w -> String Source #
Print exploration reason.
data BlockExploreReason (w :: Nat) Source #
This describes why we are exploring a given block within a function.
Constructors
NextIP !(MemSegmentOff w) | Exploring because the given block jumps here. |
FunctionEntryPoint | Identified as an entry point from initial information |
SplitAt !(MemSegmentOff w) !(BlockExploreReason w) | Added because the address split this block after it had been disassembled. Also includes the reason we thought the block should be there before we split it. |
Instances
MemWidth w => Show (BlockExploreReason w) Source # | |
Defined in Data.Macaw.Discovery.ParsedContents Methods showsPrec :: Int -> BlockExploreReason w -> ShowS # show :: BlockExploreReason w -> String # showList :: [BlockExploreReason w] -> ShowS # | |
Eq (BlockExploreReason w) Source # | |
Defined in Data.Macaw.Discovery.ParsedContents Methods (==) :: BlockExploreReason w -> BlockExploreReason w -> Bool # (/=) :: BlockExploreReason w -> BlockExploreReason w -> Bool # | |
MemWidth w => Pretty (BlockExploreReason w) Source # | |
Defined in Data.Macaw.Discovery.ParsedContents |
Arguments
:: ArchSegmentOff arch | The address to explore |
-> FunctionExploreReason (ArchAddrWidth arch) | Reason to provide for why we are analyzing this function This can be used to figure out why we decided a given address identified a code location. |
-> DiscoveryState arch | The current binary information. |
-> (DiscoveryState arch, Some (DiscoveryFunInfo arch)) |
This analyzes the function at a given address, possibly discovering new candidates.
This returns the updated state and the discovered control flow graph for this function.
analyzeDiscoveredFunctions :: DiscoveryState arch -> DiscoveryState arch Source #
Analyze addresses that we have marked as functions, but not yet analyzed to identify basic blocks, and discover new function candidates until we have analyzed all function entry points.
If an exploreFnPred function exists in the DiscoveryState, then do not analyze unexploredFunctions at addresses that do not satisfy this predicate.
addDiscoveredFunctionBlockTargets Source #
Arguments
:: DiscoveryState arch | |
-> DiscoveryFunInfo arch ids | The function for which we have learned additional information |
-> [(ArchSegmentOff arch, [ArchSegmentOff arch])] | |
-> DiscoveryState arch |
Extend the analysis of a previously analyzed function with new information about the transfers for a block in that function. The assumption is that the block in question previously had an unknown transfer state condition, and that the new transfer addresses were discovered by other means (e.g. SMT analysis). The block in question's terminal statement will be replaced by an ITE (from IP -> new addresses) and the new addresses will be added to the frontier for additional discovery.
Arguments
:: DiscoveryOptions | Options controlling discovery |
-> ArchSegmentOff arch | The address to explore |
-> FunctionExploreReason (ArchAddrWidth arch) | Reason to provide for why we are analyzing this function This can be used to figure out why we decided a given address identified a code location. |
-> DiscoveryState arch | The current binary information. |
-> [(ArchSegmentOff arch, [ArchSegmentOff arch])] | Additional identified intraprocedural jump targets The pairs are: (address of the block jumped from, jump targets) |
-> IncComp (DiscoveryEvent arch) (DiscoveryState arch, Some (DiscoveryFunInfo arch)) |
Run tfunction discovery to explore blocks.
Top level utilities
completeDiscoveryState Source #
Arguments
:: DiscoveryState arch | |
-> DiscoveryOptions | Options controlling discovery |
-> IO (DiscoveryState arch) |
Explore until we have found all functions we can.
This function is intended to make it easy to explore functions, and
can be controlled via DiscoveryOptions
.
Arguments
:: DiscoveryState arch | |
-> DiscoveryOptions | Options controlling discovery |
-> IncCompM (DiscoveryEvent arch) r (DiscoveryState arch) |
Explore until we have found all functions we can.
This function is a version of completeDiscoveryState that uses the new incremental computation monad rather than IO.
data DiscoveryOptions Source #
Options controlling completeDiscoveryState
.
Constructors
DiscoveryOptions | |
Fields
|
defaultDiscoveryOptions :: DiscoveryOptions Source #
Some default options
data DiscoveryEvent arch Source #
Events reported by discovery process.
Constructors
ReportAnalyzeFunction !(ArchSegmentOff arch) | Report that discovery has now started analyzing the function at the given offset. |
ReportAnalyzeFunctionDone (DiscoveryFunInfo arch ids) |
|
ReportIdentifyFunction !(ArchSegmentOff arch) !(ArchSegmentOff arch) !(FunctionExploreReason (ArchAddrWidth arch)) |
|
ReportAnalyzeBlock !(ArchSegmentOff arch) !(ArchSegmentOff arch) !(Maybe (BlockExploreReason (ArchAddrWidth arch))) |
N.B. This event is only emitted when |
logDiscoveryEvent :: MemWidth (ArchAddrWidth arch) => AddrSymMap (ArchAddrWidth arch) -> DiscoveryEvent arch -> IO () Source #
DiscoveryFunInfo
data DiscoveryFunInfo arch ids Source #
Information discovered about a particular function
Instances
ArchConstraints arch => Pretty (DiscoveryFunInfo arch ids) Source # | |
Defined in Data.Macaw.Discovery.State Methods pretty :: DiscoveryFunInfo arch ids -> Doc ann prettyList :: [DiscoveryFunInfo arch ids] -> Doc ann |
discoveredFunAddr :: DiscoveryFunInfo arch ids -> ArchSegmentOff arch Source #
Address of function entry block.
discoveredFunName :: MemWidth (ArchAddrWidth arch) => DiscoveryFunInfo arch ids -> ByteString Source #
Returns the "name" associated with a function.
This is either the symbol or the address.
discoveredFunSymbol :: DiscoveryFunInfo arch ids -> Maybe ByteString Source #
A symbol associated with the definition.
discoveredClassifyFailureResolutions :: DiscoveryFunInfo arch ids -> [(ArchSegmentOff arch, [ArchSegmentOff arch])] Source #
A side mapping that records jump targets for
ClassifyFailure
block terminators that have been
gleaned from an external source. When interpreting
the function, this map can be used to complete the
control flow of functions with ClassifyFailure
s.
parsedBlocks :: forall arch ids f. Functor f => (Map (ArchSegmentOff arch) (ParsedBlock arch ids) -> f (Map (ArchSegmentOff arch) (ParsedBlock arch ids))) -> DiscoveryFunInfo arch ids -> f (DiscoveryFunInfo arch ids) Source #
data NoReturnFunStatus Source #
Flags whether a function is labeled no return or not.
Constructors
NoReturnFun | Function labeled no return |
MayReturnFun | Function may retun |
Instances
Show NoReturnFunStatus Source # | |
Defined in Data.Macaw.Architecture.Info Methods showsPrec :: Int -> NoReturnFunStatus -> ShowS # show :: NoReturnFunStatus -> String # showList :: [NoReturnFunStatus] -> ShowS # | |
Pretty NoReturnFunStatus Source # | |
Defined in Data.Macaw.Architecture.Info |
Parsed block
data ParsedBlock arch ids Source #
A contiguous region of instructions in memory.
Instances
(ArchConstraints arch, Show (ArchBlockPrecond arch)) => Show (ParsedBlock arch ids) Source # | |
Defined in Data.Macaw.Discovery.ParsedContents Methods showsPrec :: Int -> ParsedBlock arch ids -> ShowS # show :: ParsedBlock arch ids -> String # showList :: [ParsedBlock arch ids] -> ShowS # | |
ArchConstraints arch => Pretty (ParsedBlock arch ids) Source # | |
Defined in Data.Macaw.Discovery.ParsedContents |
pblockAddr :: ParsedBlock arch ids -> ArchSegmentOff arch Source #
Address of region
blockSize :: ParsedBlock arch ids -> Int Source #
The size of the region of memory covered by this.
blockReason :: ParsedBlock arch ids -> BlockExploreReason (ArchAddrWidth arch) Source #
Reason that we marked this address as the start of a basic block.
blockAbstractState :: ParsedBlock arch ids -> AbsBlockState (ArchReg arch) Source #
Abstract state prior to the execution of this region.
pblockStmts :: ParsedBlock arch ids -> [Stmt arch ids] Source #
The non-terminal statements in the block
pblockTermStmt :: ParsedBlock arch ids -> ParsedTermStmt arch ids Source #
The terminal statement in the block.
data ParsedTermStmt arch ids Source #
This term statement is used to describe higher level expressions of how block ending with a a FetchAndExecute statement should be interpreted.
Constructors
ParsedCall !(RegState (ArchReg arch) (Value arch ids)) !(Maybe (ArchSegmentOff arch)) | A call with the current register values and location to return
to or Note that the semantics of this instruction assume that the program has already stored the return address in the appropriate location (which depends on the ABI). For example on X86_64 this is the top of the stack while on ARM this is the link register. |
PLTStub !(MapF (ArchReg arch) (Value arch ids)) !(ArchSegmentOff arch) !VersionedSymbol |
This is a special case of a tail call. It has been added separately because it occurs frequently in dynamically linked code, and we can use this to recognize PLT stubs. The first argument maps registers that were changed to their
value. Other registers have the initial value. This should
typically be empty on The second argument is the address in the .GOT that the target function is stored at. The PLT stub sets the PC to the address stored here. The third and fourth arguments are used to resolve where the function should jump to. |
ParsedJump !(RegState (ArchReg arch) (Value arch ids)) !(ArchSegmentOff arch) | A jump to an explicit address within a function. |
ParsedBranch !(RegState (ArchReg arch) (Value arch ids)) !(Value arch ids BoolType) !(ArchSegmentOff arch) !(ArchSegmentOff arch) |
The value assigned to the IP in |
ParsedLookupTable !(JumpTableLayout arch) !(RegState (ArchReg arch) (Value arch ids)) !(ArchAddrValue arch ids) !(Vector (ArchSegmentOff arch)) | A lookup table that branches to one of a vector of addresses. The registers store the registers, the value contains the index to jump to, and the possible addresses as a table. If the index (when interpreted as an unsigned number) is larger than the number of entries in the vector, then the result is undefined. |
ParsedReturn !(RegState (ArchReg arch) (Value arch ids)) | A return with the given registers. |
ParsedArchTermStmt !(ArchTermStmt arch (Value arch ids)) !(RegState (ArchReg arch) (Value arch ids)) !(Maybe (ArchSegmentOff arch)) | An architecture-specific statement with the registers prior to execution, and the given next control flow address. |
ParsedTranslateError !Text | An error occured in translating the block |
ClassifyFailure !(RegState (ArchReg arch) (Value arch ids)) [String] | The classifier failed to identity the block. Includes registers with list of reasons for each classifer to fail |
Instances
ArchConstraints arch => Show (ParsedTermStmt arch ids) Source # | |
Defined in Data.Macaw.Discovery.ParsedContents Methods showsPrec :: Int -> ParsedTermStmt arch ids -> ShowS # show :: ParsedTermStmt arch ids -> String # showList :: [ParsedTermStmt arch ids] -> ShowS # |
data JumpTableLayout arch Source #
This describes the layout of a jump table. Beware: on some architectures, after reading from the jump table, the resulting addresses must be aligned. See the IPAlignment class.
Instances
RegisterInfo (ArchReg arch) => Show (JumpTableLayout arch) Source # | |
Defined in Data.Macaw.Discovery.ParsedContents Methods showsPrec :: Int -> JumpTableLayout arch -> ShowS # show :: JumpTableLayout arch -> String # showList :: [JumpTableLayout arch] -> ShowS # |
jtlBackingAddr :: JumpTableLayout arch -> ArchSegmentOff arch Source #
Return base address of table storing contents of jump table.
jtlBackingSize :: JumpTableLayout arch -> Word64 Source #
Returns the number of bytes in the layout
Block classifiers
type BlockClassifier arch ids = BlockClassifierM arch ids (ParsedContents arch ids) Source #
defaultClassifier :: BlockClassifier arch ids Source #
This is a good default set of block classifiers
Block classifiers determine how the code discovery engine interprets the final instruction in each block. The individual classifiers are also exported so that architecture backends (or even end users) can provide their own classifiers.
See Classifier
for the primitives necessary to define
new classifiers (e.g., classifiers that can produce architecture-specific
terminators).
Note that classifiers are an instance of Alternative
, so the
order they are applied in matters. While several are non-overlapping, at
least the order that the direct jump and tail call classifiers are applied in
matters, as they look substantially the same to the analysis. Being too eager
to flag jumps as tail calls classifies the jump targets as known function
entry points, which can interfere with other classifiers later in the
function.
branchClassifier :: BlockClassifier arch ids Source #
The classifier for conditional and unconditional branches
Note that this classifier can convert a conditional branch to an unconditional branch if (and only if) the condition is syntactically true or false after constant propagation. It never attempts sophisticated path trimming.
callClassifier :: BlockClassifier arch ids Source #
Use the architecture-specific callback to check if last statement was a call.
Note that in some cases the call is known not to return, and thus this code
will never jump to the return value; in that case, the
noreturnCallClassifier
should fire. As such, callClassifier
should always
be attempted *after* noreturnCallClassifier
.
returnClassifier :: BlockClassifier arch ids Source #
Check this block ends with a return as identified by the
architecture-specific processing. Basic return identification
can be performed by detecting when the Instruction Pointer
(ip_reg) contains the ReturnAddr
symbolic value (initially
placed on the top of the stack or in the Link Register by the
architecture-specific state initializer). However, some
architectures perform expression evaluations on this value before
loading the IP (e.g. ARM will clear the low bit in T32 mode or
the low 2 bits in A32 mode), so the actual detection process is
deferred to architecture-specific functionality.
directJumpClassifier :: BlockClassifier arch ids Source #
Classifies jumps to concrete addresses as unconditional jumps. Note that
this logic is substantially similar to the tailCallClassifier
in cases
where the function does not establish a stack frame (i.e., leaf functions).
Note that known call targets are not eligible to be intra-procedural jump
targets (see classifyDirectJump
). This means that we need to conservatively
prefer to mis-classify terminators as jumps rather than tail calls. The
downside of this choice is that code that could be considered a tail-called
function may be duplicated in some cases (i.e., considered part of multiple
functions).
The alternative interpretation (eagerly preferring tail calls) can cause a
section of a function to be marked as a tail-called function, thereby
blocking the directJumpClassifier
or the branchClassifier
from
recognizing the "callee" as an intra-procedural jump. This results in
classification failures that we don't have any mitigations for.
noreturnCallClassifier :: BlockClassifier arch ids Source #
Attempt to recognize a call to a function that is known to not return. These are effectively tail calls, even if the compiler did not obviously generate a tail call instruction sequence.
This classifier is important because compilers often place garbage instructions (for alignment, or possibly the next function) after calls to no-return functions. Without knowledge of no-return functions, macaw would otherwise think that the callee could return to the garbage instructions, causing later classification failures.
This functionality depends on a set of known non-return functions are
specified as an input to the code discovery process (see pctxKnownFnEntries
).
Note that this classifier should always be run before the callClassifier
.
tailCallClassifier :: BlockClassifier arch ids Source #
Attempt to recognize tail call
The current heuristic is that the target looks like a call, except the stack height in the caller is 0.
Note that, in leaf functions (i.e., with no stack usage), tail calls and jumps look substantially similar. We typically apply the jump classifier first to prefer them, which means that we very rarely recognize tail calls in leaf functions.
pltStubClassifier :: BlockClassifier arch ids Source #
A classifier that attempts to recognize PLT stubs
jumpTableClassifier :: BlockClassifier arch ids Source #
A classifier for jump tables
This classifier employs a number of heuristics, but is of course incomplete
Simplification
eliminateDeadStmts :: ArchitectureInfo arch -> Block arch ids -> Block arch ids Source #
Eliminate all dead statements in blocks
Re-exports
type ArchAddrWidth arch = RegAddrWidth (ArchReg arch) Source #
Number of bits in addreses for architecture.