Safe Haskell	None
Language	Haskell2010

Data.Macaw.Discovery

Contents

DiscoveryInfo
Top level utilities
DiscoveryFunInfo
Parsed block
Block classifiers
Simplification
Re-exports

Description

This module discovers the Functions and their internal Block CFG in target binaries.

Synopsis

data DiscoveryState arch
emptyDiscoveryState :: Memory (ArchAddrWidth arch) -> AddrSymMap (ArchAddrWidth arch) -> ArchitectureInfo arch -> DiscoveryState arch
trustedFunctionEntryPoints :: forall arch f. Functor f => (Map (ArchSegmentOff arch) NoReturnFunStatus -> f (Map (ArchSegmentOff arch) NoReturnFunStatus)) -> DiscoveryState arch -> f (DiscoveryState arch)
exploreFnPred :: forall arch f. Functor f => ((ArchSegmentOff arch -> Bool) -> f (ArchSegmentOff arch -> Bool)) -> DiscoveryState arch -> f (DiscoveryState arch)
type AddrSymMap (w :: Nat) = Map (MemSegmentOff w) ByteString
funInfo :: forall arch f. Functor f => (Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch)) -> f (Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch)))) -> DiscoveryState arch -> f (DiscoveryState arch)
exploredFunctions :: DiscoveryState arch -> [Some (DiscoveryFunInfo arch)]
ppDiscoveryStateBlocks :: DiscoveryState arch -> Doc ann
unexploredFunctions :: forall arch f. Functor f => (UnexploredFunctionMap arch -> f (UnexploredFunctionMap arch)) -> DiscoveryState arch -> f (DiscoveryState arch)
cfgFromAddrs :: ArchitectureInfo arch -> Memory (ArchAddrWidth arch) -> AddrSymMap (ArchAddrWidth arch) -> [ArchSegmentOff arch] -> [(ArchSegmentOff arch, ArchSegmentOff arch)] -> DiscoveryState arch
cfgFromAddrsAndState :: DiscoveryState arch -> [ArchSegmentOff arch] -> [(ArchSegmentOff arch, ArchSegmentOff arch)] -> DiscoveryState arch
markAddrAsFunction :: FunctionExploreReason (ArchAddrWidth arch) -> ArchSegmentOff arch -> DiscoveryState arch -> DiscoveryState arch
markAddrsAsFunction :: Foldable t => FunctionExploreReason (ArchAddrWidth arch) -> t (ArchSegmentOff arch) -> DiscoveryState arch -> DiscoveryState arch
data FunctionExploreReason (w :: Nat)
- = PossibleWriteEntry !(MemSegmentOff w)
- | CallTarget !(MemSegmentOff w)
- | InitAddr
- | CodePointerInMem !(MemSegmentOff w)
- | UserRequest
ppFunReason :: forall (w :: Nat). FunctionExploreReason w -> String
data BlockExploreReason (w :: Nat)
- = NextIP !(MemSegmentOff w)
- | FunctionEntryPoint
- | SplitAt !(MemSegmentOff w) !(BlockExploreReason w)
analyzeFunction :: ArchSegmentOff arch -> FunctionExploreReason (ArchAddrWidth arch) -> DiscoveryState arch -> (DiscoveryState arch, Some (DiscoveryFunInfo arch))
analyzeDiscoveredFunctions :: DiscoveryState arch -> DiscoveryState arch
addDiscoveredFunctionBlockTargets :: DiscoveryState arch -> DiscoveryFunInfo arch ids -> [(ArchSegmentOff arch, [ArchSegmentOff arch])] -> DiscoveryState arch
discoverFunction :: DiscoveryOptions -> ArchSegmentOff arch -> FunctionExploreReason (ArchAddrWidth arch) -> DiscoveryState arch -> [(ArchSegmentOff arch, [ArchSegmentOff arch])] -> IncComp (DiscoveryEvent arch) (DiscoveryState arch, Some (DiscoveryFunInfo arch))
completeDiscoveryState :: DiscoveryState arch -> DiscoveryOptions -> IO (DiscoveryState arch)
incCompleteDiscovery :: DiscoveryState arch -> DiscoveryOptions -> IncCompM (DiscoveryEvent arch) r (DiscoveryState arch)
data DiscoveryOptions = DiscoveryOptions {
- exploreFunctionSymbols :: !Bool
- exploreCodeAddrInMem :: !Bool
- logAtAnalyzeFunction :: !Bool
- logAtAnalyzeBlock :: !Bool
}
defaultDiscoveryOptions :: DiscoveryOptions
data DiscoveryEvent arch
- = ReportAnalyzeFunction !(ArchSegmentOff arch)
- | ReportAnalyzeFunctionDone (DiscoveryFunInfo arch ids)
- | ReportIdentifyFunction !(ArchSegmentOff arch) !(ArchSegmentOff arch) !(FunctionExploreReason (ArchAddrWidth arch))
- | ReportAnalyzeBlock !(ArchSegmentOff arch) !(ArchSegmentOff arch) !(Maybe (BlockExploreReason (ArchAddrWidth arch)))
logDiscoveryEvent :: MemWidth (ArchAddrWidth arch) => AddrSymMap (ArchAddrWidth arch) -> DiscoveryEvent arch -> IO ()
data DiscoveryFunInfo arch ids
discoveredFunAddr :: DiscoveryFunInfo arch ids -> ArchSegmentOff arch
discoveredFunName :: MemWidth (ArchAddrWidth arch) => DiscoveryFunInfo arch ids -> ByteString
discoveredFunSymbol :: DiscoveryFunInfo arch ids -> Maybe ByteString
discoveredClassifyFailureResolutions :: DiscoveryFunInfo arch ids -> [(ArchSegmentOff arch, [ArchSegmentOff arch])]
parsedBlocks :: forall arch ids f. Functor f => (Map (ArchSegmentOff arch) (ParsedBlock arch ids) -> f (Map (ArchSegmentOff arch) (ParsedBlock arch ids))) -> DiscoveryFunInfo arch ids -> f (DiscoveryFunInfo arch ids)
data NoReturnFunStatus
- = NoReturnFun
- | MayReturnFun
data ParsedBlock arch ids
pblockAddr :: ParsedBlock arch ids -> ArchSegmentOff arch
blockSize :: ParsedBlock arch ids -> Int
blockReason :: ParsedBlock arch ids -> BlockExploreReason (ArchAddrWidth arch)
blockAbstractState :: ParsedBlock arch ids -> AbsBlockState (ArchReg arch)
pblockStmts :: ParsedBlock arch ids -> [Stmt arch ids]
pblockTermStmt :: ParsedBlock arch ids -> ParsedTermStmt arch ids
data ParsedTermStmt arch ids
- = ParsedCall !(RegState (ArchReg arch) (Value arch ids)) !(Maybe (ArchSegmentOff arch))
- | PLTStub !(MapF (ArchReg arch) (Value arch ids)) !(ArchSegmentOff arch) !VersionedSymbol
- | ParsedJump !(RegState (ArchReg arch) (Value arch ids)) !(ArchSegmentOff arch)
- | ParsedBranch !(RegState (ArchReg arch) (Value arch ids)) !(Value arch ids BoolType) !(ArchSegmentOff arch) !(ArchSegmentOff arch)
- | ParsedLookupTable !(JumpTableLayout arch) !(RegState (ArchReg arch) (Value arch ids)) !(ArchAddrValue arch ids) !(Vector (ArchSegmentOff arch))
- | ParsedReturn !(RegState (ArchReg arch) (Value arch ids))
- | ParsedArchTermStmt !(ArchTermStmt arch (Value arch ids)) !(RegState (ArchReg arch) (Value arch ids)) !(Maybe (ArchSegmentOff arch))
- | ParsedTranslateError !Text
- | ClassifyFailure !(RegState (ArchReg arch) (Value arch ids)) [String]
data JumpTableLayout arch
jtlBackingAddr :: JumpTableLayout arch -> ArchSegmentOff arch
jtlBackingSize :: JumpTableLayout arch -> Word64
type BlockClassifier arch ids = BlockClassifierM arch ids (ParsedContents arch ids)
defaultClassifier :: BlockClassifier arch ids
branchClassifier :: BlockClassifier arch ids
callClassifier :: BlockClassifier arch ids
returnClassifier :: BlockClassifier arch ids
directJumpClassifier :: BlockClassifier arch ids
noreturnCallClassifier :: BlockClassifier arch ids
tailCallClassifier :: BlockClassifier arch ids
pltStubClassifier :: BlockClassifier arch ids
jumpTableClassifier :: BlockClassifier arch ids
eliminateDeadStmts :: ArchitectureInfo arch -> Block arch ids -> Block arch ids
type ArchAddrWidth arch = RegAddrWidth (ArchReg arch)

DiscoveryInfo

data DiscoveryState arch Source #

Information discovered about the program

emptyDiscoveryState Source #

Arguments

:: Memory (ArchAddrWidth arch)	State of memory
-> AddrSymMap (ArchAddrWidth arch)	Map from addresses to their symbol name (if any)
-> ArchitectureInfo arch	architecture/OS specific information
-> DiscoveryState arch

Create empty discovery information.

trustedFunctionEntryPoints :: forall arch f. Functor f => (Map (ArchSegmentOff arch) NoReturnFunStatus -> f (Map (ArchSegmentOff arch) NoReturnFunStatus)) -> DiscoveryState arch -> f (DiscoveryState arch) Source #

Retrieves functions that are trusted entry points.

exploreFnPred :: forall arch f. Functor f => ((ArchSegmentOff arch -> Bool) -> f (ArchSegmentOff arch -> Bool)) -> DiscoveryState arch -> f (DiscoveryState arch) Source #

type AddrSymMap (w :: Nat) = Map (MemSegmentOff w) ByteString Source #

Maps code addresses to the associated symbol name if any.

funInfo :: forall arch f. Functor f => (Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch)) -> f (Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch)))) -> DiscoveryState arch -> f (DiscoveryState arch) Source #

Get information for specific functions

exploredFunctions :: DiscoveryState arch -> [Some (DiscoveryFunInfo arch)] Source #

Return list of all functions discovered so far.

ppDiscoveryStateBlocks :: DiscoveryState arch -> Doc ann Source #

unexploredFunctions :: forall arch f. Functor f => (UnexploredFunctionMap arch -> f (UnexploredFunctionMap arch)) -> DiscoveryState arch -> f (DiscoveryState arch) Source #

List of functions to explore next.

cfgFromAddrs Source #

Arguments

:: ArchitectureInfo arch	Architecture-specific information needed for doing control-flow exploration.
-> Memory (ArchAddrWidth arch)	Memory to use when decoding instructions.
-> AddrSymMap (ArchAddrWidth arch)	Map from addresses to the associated symbol name.
-> [ArchSegmentOff arch]	Initial function entry points.
-> [(ArchSegmentOff arch, ArchSegmentOff arch)]	Function entry points in memory to be explored after exploring function entry points. Each entry contains an address and the value stored in it.
-> DiscoveryState arch

Construct an empty discovery state and populate it by exploring from a given set of function entry points

cfgFromAddrsAndState Source #

Arguments

:: DiscoveryState arch
-> [ArchSegmentOff arch]	Initial function entry points.
-> [(ArchSegmentOff arch, ArchSegmentOff arch)]	Function entry points in memory to be explored after exploring function entry points. Each entry contains an address and the value stored in it.
-> DiscoveryState arch

Expand an initial discovery state by exploring from a given set of function entry points.

markAddrAsFunction Source #

Arguments

:: FunctionExploreReason (ArchAddrWidth arch)	Information about why the code address was discovered Used for debugging
-> ArchSegmentOff arch
-> DiscoveryState arch
-> DiscoveryState arch

Mark a escaped code pointer as a function entry.

markAddrsAsFunction :: Foldable t => FunctionExploreReason (ArchAddrWidth arch) -> t (ArchSegmentOff arch) -> DiscoveryState arch -> DiscoveryState arch Source #

Mark a list of addresses as function entries with the same reason.

data FunctionExploreReason (w :: Nat) Source #

This describes why we started exploring a given function.

Constructors

PossibleWriteEntry !(MemSegmentOff w)	Exploring because code at the given block writes it to memory.
CallTarget !(MemSegmentOff w)	Exploring because address terminates with a call that jumps here.
InitAddr	Identified as an entry point from initial information
CodePointerInMem !(MemSegmentOff w)	A code pointer that was stored at the given address.
UserRequest	The user requested that we analyze this address as a function.

Instances

Instances details

MemWidth w => Show (FunctionExploreReason w) Source #
Instance details Defined in Data.Macaw.Discovery.State Methods showsPrec :: Int -> FunctionExploreReason w -> ShowS # show :: FunctionExploreReason w -> String # showList :: [FunctionExploreReason w] -> ShowS #
Eq (FunctionExploreReason w) Source #
Instance details Defined in Data.Macaw.Discovery.State Methods (==) :: FunctionExploreReason w -> FunctionExploreReason w -> Bool # (/=) :: FunctionExploreReason w -> FunctionExploreReason w -> Bool #

ppFunReason :: forall (w :: Nat). FunctionExploreReason w -> String Source #

Print exploration reason.

data BlockExploreReason (w :: Nat) Source #

This describes why we are exploring a given block within a function.

Constructors

NextIP !(MemSegmentOff w)	Exploring because the given block jumps here.
FunctionEntryPoint	Identified as an entry point from initial information
SplitAt !(MemSegmentOff w) !(BlockExploreReason w)	Added because the address split this block after it had been disassembled. Also includes the reason we thought the block should be there before we split it.

Instances

Instances details

MemWidth w => Show (BlockExploreReason w) Source #
Instance details Defined in Data.Macaw.Discovery.ParsedContents Methods showsPrec :: Int -> BlockExploreReason w -> ShowS # show :: BlockExploreReason w -> String # showList :: [BlockExploreReason w] -> ShowS #
Eq (BlockExploreReason w) Source #
Instance details Defined in Data.Macaw.Discovery.ParsedContents Methods (==) :: BlockExploreReason w -> BlockExploreReason w -> Bool # (/=) :: BlockExploreReason w -> BlockExploreReason w -> Bool #
MemWidth w => Pretty (BlockExploreReason w) Source #
Instance details Defined in Data.Macaw.Discovery.ParsedContents Methods pretty :: BlockExploreReason w -> Doc ann prettyList :: [BlockExploreReason w] -> Doc ann

analyzeFunction Source #

Arguments

:: ArchSegmentOff arch	The address to explore
-> FunctionExploreReason (ArchAddrWidth arch)	Reason to provide for why we are analyzing this function This can be used to figure out why we decided a given address identified a code location.
-> DiscoveryState arch	The current binary information.
-> (DiscoveryState arch, Some (DiscoveryFunInfo arch))

This analyzes the function at a given address, possibly discovering new candidates.

This returns the updated state and the discovered control flow graph for this function.

analyzeDiscoveredFunctions :: DiscoveryState arch -> DiscoveryState arch Source #

Analyze addresses that we have marked as functions, but not yet analyzed to identify basic blocks, and discover new function candidates until we have analyzed all function entry points.

If an exploreFnPred function exists in the DiscoveryState, then do not analyze unexploredFunctions at addresses that do not satisfy this predicate.

addDiscoveredFunctionBlockTargets Source #

Arguments

:: DiscoveryState arch
-> DiscoveryFunInfo arch ids	The function for which we have learned additional information
-> [(ArchSegmentOff arch, [ArchSegmentOff arch])]
-> DiscoveryState arch

Extend the analysis of a previously analyzed function with new information about the transfers for a block in that function. The assumption is that the block in question previously had an unknown transfer state condition, and that the new transfer addresses were discovered by other means (e.g. SMT analysis). The block in question's terminal statement will be replaced by an ITE (from IP -> new addresses) and the new addresses will be added to the frontier for additional discovery.

discoverFunction Source #

Arguments

:: DiscoveryOptions	Options controlling discovery
-> ArchSegmentOff arch	The address to explore
-> FunctionExploreReason (ArchAddrWidth arch)	Reason to provide for why we are analyzing this function This can be used to figure out why we decided a given address identified a code location.
-> DiscoveryState arch	The current binary information.
-> [(ArchSegmentOff arch, [ArchSegmentOff arch])]	Additional identified intraprocedural jump targets The pairs are: (address of the block jumped from, jump targets)
-> IncComp (DiscoveryEvent arch) (DiscoveryState arch, Some (DiscoveryFunInfo arch))

Run tfunction discovery to explore blocks.

Top level utilities

completeDiscoveryState Source #

Arguments

:: DiscoveryState arch
-> DiscoveryOptions	Options controlling discovery
-> IO (DiscoveryState arch)

Explore until we have found all functions we can.

This function is intended to make it easy to explore functions, and can be controlled via DiscoveryOptions.

incCompleteDiscovery Source #

Arguments

:: DiscoveryState arch
-> DiscoveryOptions	Options controlling discovery
-> IncCompM (DiscoveryEvent arch) r (DiscoveryState arch)

Explore until we have found all functions we can.

This function is a version of completeDiscoveryState that uses the new incremental computation monad rather than IO.

data DiscoveryOptions Source #

Options controlling completeDiscoveryState.

Constructors

DiscoveryOptions

Fields

exploreFunctionSymbols :: !Bool
If True, completeDiscoveryState should automatically explore all addresses in the address-to-symbol map.
exploreCodeAddrInMem :: !Bool
If True, completeDiscoveryState will explore all potential code addresses in memory after exploring other potential functions.
This is effectively a hack that sometimes allows discovering functions. If you need it, let the author's of Macaw know so that we can find a more principled way.
logAtAnalyzeFunction :: !Bool
Print a message each time we apply discovery analysis to a new function.
logAtAnalyzeBlock :: !Bool
Print a message each time we analyze a block within a function.

defaultDiscoveryOptions :: DiscoveryOptions Source #

Some default options

data DiscoveryEvent arch Source #

Events reported by discovery process.

Constructors

ReportAnalyzeFunction !(ArchSegmentOff arch)	Report that discovery has now started analyzing the function at the given offset.
ReportAnalyzeFunctionDone (DiscoveryFunInfo arch ids)	`ReoptAnalyzeFunctionDone f` we completed discovery and yielded function `f`.
ReportIdentifyFunction !(ArchSegmentOff arch) !(ArchSegmentOff arch) !(FunctionExploreReason (ArchAddrWidth arch))	`ReportIdentifyFunction src tgt rsn` indicates Macaw identified a candidate funciton entry point `tgt` from analyzing `src` for the given reason `rsn`.
ReportAnalyzeBlock !(ArchSegmentOff arch) !(ArchSegmentOff arch) !(Maybe (BlockExploreReason (ArchAddrWidth arch)))	`ReportAnalyzeBlock faddr baddr reason` indicates discovery identified a block at `baddr` in `faddr`. `reason` is the reason why this block is explored (or sometimes re-explored). N.B. This event is only emitted when `logAtAnalyzeBlock` is true.

logDiscoveryEvent :: MemWidth (ArchAddrWidth arch) => AddrSymMap (ArchAddrWidth arch) -> DiscoveryEvent arch -> IO () Source #

DiscoveryFunInfo

data DiscoveryFunInfo arch ids Source #

Information discovered about a particular function

Instances

Instances details

ArchConstraints arch => Pretty (DiscoveryFunInfo arch ids) Source #
Instance details Defined in Data.Macaw.Discovery.State Methods pretty :: DiscoveryFunInfo arch ids -> Doc ann prettyList :: [DiscoveryFunInfo arch ids] -> Doc ann

discoveredFunAddr :: DiscoveryFunInfo arch ids -> ArchSegmentOff arch Source #

Address of function entry block.

discoveredFunName :: MemWidth (ArchAddrWidth arch) => DiscoveryFunInfo arch ids -> ByteString Source #

Returns the "name" associated with a function.

This is either the symbol or the address.

discoveredFunSymbol :: DiscoveryFunInfo arch ids -> Maybe ByteString Source #

A symbol associated with the definition.

discoveredClassifyFailureResolutions :: DiscoveryFunInfo arch ids -> [(ArchSegmentOff arch, [ArchSegmentOff arch])] Source #

A side mapping that records jump targets for ClassifyFailure block terminators that have been gleaned from an external source. When interpreting the function, this map can be used to complete the control flow of functions with ClassifyFailures.

parsedBlocks :: forall arch ids f. Functor f => (Map (ArchSegmentOff arch) (ParsedBlock arch ids) -> f (Map (ArchSegmentOff arch) (ParsedBlock arch ids))) -> DiscoveryFunInfo arch ids -> f (DiscoveryFunInfo arch ids) Source #

data NoReturnFunStatus Source #

Flags whether a function is labeled no return or not.

Constructors

NoReturnFun	Function labeled no return
MayReturnFun	Function may retun

Instances

Instances details

Show NoReturnFunStatus Source #
Instance details Defined in Data.Macaw.Architecture.Info Methods showsPrec :: Int -> NoReturnFunStatus -> ShowS # show :: NoReturnFunStatus -> String # showList :: [NoReturnFunStatus] -> ShowS #
Pretty NoReturnFunStatus Source #
Instance details Defined in Data.Macaw.Architecture.Info Methods pretty :: NoReturnFunStatus -> Doc ann prettyList :: [NoReturnFunStatus] -> Doc ann

Parsed block

data ParsedBlock arch ids Source #

A contiguous region of instructions in memory.

Instances

Instances details

(ArchConstraints arch, Show (ArchBlockPrecond arch)) => Show (ParsedBlock arch ids) Source #
Instance details Defined in Data.Macaw.Discovery.ParsedContents Methods showsPrec :: Int -> ParsedBlock arch ids -> ShowS # show :: ParsedBlock arch ids -> String # showList :: [ParsedBlock arch ids] -> ShowS #
ArchConstraints arch => Pretty (ParsedBlock arch ids) Source #
Instance details Defined in Data.Macaw.Discovery.ParsedContents Methods pretty :: ParsedBlock arch ids -> Doc ann prettyList :: [ParsedBlock arch ids] -> Doc ann

pblockAddr :: ParsedBlock arch ids -> ArchSegmentOff arch Source #

Address of region

blockSize :: ParsedBlock arch ids -> Int Source #

The size of the region of memory covered by this.

blockReason :: ParsedBlock arch ids -> BlockExploreReason (ArchAddrWidth arch) Source #

Reason that we marked this address as the start of a basic block.

blockAbstractState :: ParsedBlock arch ids -> AbsBlockState (ArchReg arch) Source #

Abstract state prior to the execution of this region.

pblockStmts :: ParsedBlock arch ids -> [Stmt arch ids] Source #

The non-terminal statements in the block

pblockTermStmt :: ParsedBlock arch ids -> ParsedTermStmt arch ids Source #

The terminal statement in the block.

data ParsedTermStmt arch ids Source #

This term statement is used to describe higher level expressions of how block ending with a a FetchAndExecute statement should be interpreted.

Constructors

ParsedCall !(RegState (ArchReg arch) (Value arch ids)) !(Maybe (ArchSegmentOff arch))	A call with the current register values and location to return to or `Nothing` if this is a tail call. Note that the semantics of this instruction assume that the program has already stored the return address in the appropriate location (which depends on the ABI). For example on X86_64 this is the top of the stack while on ARM this is the link register.
PLTStub !(MapF (ArchReg arch) (Value arch ids)) !(ArchSegmentOff arch) !VersionedSymbol	`PLTStub regs addr sym symVer` denotes a terminal statement that has been identified as a PLT stub for jumping to the given symbol (with optional version information). This is a special case of a tail call. It has been added separately because it occurs frequently in dynamically linked code, and we can use this to recognize PLT stubs. The first argument maps registers that were changed to their value. Other registers have the initial value. This should typically be empty on `X86_64` PLT stubs. The second argument is the address in the .GOT that the target function is stored at. The PLT stub sets the PC to the address stored here. The third and fourth arguments are used to resolve where the function should jump to.
ParsedJump !(RegState (ArchReg arch) (Value arch ids)) !(ArchSegmentOff arch)	A jump to an explicit address within a function.
ParsedBranch !(RegState (ArchReg arch) (Value arch ids)) !(Value arch ids BoolType) !(ArchSegmentOff arch) !(ArchSegmentOff arch)	`ParsedBranch regs cond trueAddr falseAddr` represents a conditional branch that jumps to `trueAddr` if `cond` is true and `falseAddr` otherwise. The value assigned to the IP in `regs` should reflect this if-then-else structure.
ParsedLookupTable !(JumpTableLayout arch) !(RegState (ArchReg arch) (Value arch ids)) !(ArchAddrValue arch ids) !(Vector (ArchSegmentOff arch))	A lookup table that branches to one of a vector of addresses. The registers store the registers, the value contains the index to jump to, and the possible addresses as a table. If the index (when interpreted as an unsigned number) is larger than the number of entries in the vector, then the result is undefined.
ParsedReturn !(RegState (ArchReg arch) (Value arch ids))	A return with the given registers.
ParsedArchTermStmt !(ArchTermStmt arch (Value arch ids)) !(RegState (ArchReg arch) (Value arch ids)) !(Maybe (ArchSegmentOff arch))	An architecture-specific statement with the registers prior to execution, and the given next control flow address.
ParsedTranslateError !Text	An error occured in translating the block
ClassifyFailure !(RegState (ArchReg arch) (Value arch ids)) [String]	The classifier failed to identity the block. Includes registers with list of reasons for each classifer to fail

Instances

Instances details

ArchConstraints arch => Show (ParsedTermStmt arch ids) Source #
Instance details Defined in Data.Macaw.Discovery.ParsedContents Methods showsPrec :: Int -> ParsedTermStmt arch ids -> ShowS # show :: ParsedTermStmt arch ids -> String # showList :: [ParsedTermStmt arch ids] -> ShowS #

data JumpTableLayout arch Source #

This describes the layout of a jump table. Beware: on some architectures, after reading from the jump table, the resulting addresses must be aligned. See the IPAlignment class.

Instances

Instances details

RegisterInfo (ArchReg arch) => Show (JumpTableLayout arch) Source #
Instance details Defined in Data.Macaw.Discovery.ParsedContents Methods showsPrec :: Int -> JumpTableLayout arch -> ShowS # show :: JumpTableLayout arch -> String # showList :: [JumpTableLayout arch] -> ShowS #

jtlBackingAddr :: JumpTableLayout arch -> ArchSegmentOff arch Source #

Return base address of table storing contents of jump table.

jtlBackingSize :: JumpTableLayout arch -> Word64 Source #

Returns the number of bytes in the layout

Block classifiers

type BlockClassifier arch ids = BlockClassifierM arch ids (ParsedContents arch ids) Source #

defaultClassifier :: BlockClassifier arch ids Source #

This is a good default set of block classifiers

Block classifiers determine how the code discovery engine interprets the final instruction in each block. The individual classifiers are also exported so that architecture backends (or even end users) can provide their own classifiers.

See Classifier for the primitives necessary to define new classifiers (e.g., classifiers that can produce architecture-specific terminators).

Note that classifiers are an instance of Alternative, so the order they are applied in matters. While several are non-overlapping, at least the order that the direct jump and tail call classifiers are applied in matters, as they look substantially the same to the analysis. Being too eager to flag jumps as tail calls classifies the jump targets as known function entry points, which can interfere with other classifiers later in the function.

branchClassifier :: BlockClassifier arch ids Source #

The classifier for conditional and unconditional branches

Note that this classifier can convert a conditional branch to an unconditional branch if (and only if) the condition is syntactically true or false after constant propagation. It never attempts sophisticated path trimming.

callClassifier :: BlockClassifier arch ids Source #

Use the architecture-specific callback to check if last statement was a call.

Note that in some cases the call is known not to return, and thus this code will never jump to the return value; in that case, the noreturnCallClassifier should fire. As such, callClassifier should always be attempted *after* noreturnCallClassifier.

returnClassifier :: BlockClassifier arch ids Source #

Check this block ends with a return as identified by the architecture-specific processing. Basic return identification can be performed by detecting when the Instruction Pointer (ip_reg) contains the ReturnAddr symbolic value (initially placed on the top of the stack or in the Link Register by the architecture-specific state initializer). However, some architectures perform expression evaluations on this value before loading the IP (e.g. ARM will clear the low bit in T32 mode or the low 2 bits in A32 mode), so the actual detection process is deferred to architecture-specific functionality.

directJumpClassifier :: BlockClassifier arch ids Source #

Classifies jumps to concrete addresses as unconditional jumps. Note that this logic is substantially similar to the tailCallClassifier in cases where the function does not establish a stack frame (i.e., leaf functions).

Note that known call targets are not eligible to be intra-procedural jump targets (see classifyDirectJump). This means that we need to conservatively prefer to mis-classify terminators as jumps rather than tail calls. The downside of this choice is that code that could be considered a tail-called function may be duplicated in some cases (i.e., considered part of multiple functions).

The alternative interpretation (eagerly preferring tail calls) can cause a section of a function to be marked as a tail-called function, thereby blocking the directJumpClassifier or the branchClassifier from recognizing the "callee" as an intra-procedural jump. This results in classification failures that we don't have any mitigations for.

noreturnCallClassifier :: BlockClassifier arch ids Source #

Attempt to recognize a call to a function that is known to not return. These are effectively tail calls, even if the compiler did not obviously generate a tail call instruction sequence.

This classifier is important because compilers often place garbage instructions (for alignment, or possibly the next function) after calls to no-return functions. Without knowledge of no-return functions, macaw would otherwise think that the callee could return to the garbage instructions, causing later classification failures.

This functionality depends on a set of known non-return functions are specified as an input to the code discovery process (see pctxKnownFnEntries).

Note that this classifier should always be run before the callClassifier.

tailCallClassifier :: BlockClassifier arch ids Source #

Attempt to recognize tail call

The current heuristic is that the target looks like a call, except the stack height in the caller is 0.

Note that, in leaf functions (i.e., with no stack usage), tail calls and jumps look substantially similar. We typically apply the jump classifier first to prefer them, which means that we very rarely recognize tail calls in leaf functions.

pltStubClassifier :: BlockClassifier arch ids Source #

A classifier that attempts to recognize PLT stubs

jumpTableClassifier :: BlockClassifier arch ids Source #

A classifier for jump tables

This classifier employs a number of heuristics, but is of course incomplete

Simplification

eliminateDeadStmts :: ArchitectureInfo arch -> Block arch ids -> Block arch ids Source #

Eliminate all dead statements in blocks

Re-exports

type ArchAddrWidth arch = RegAddrWidth (ArchReg arch) Source #

Number of bits in addreses for architecture.