{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}

-- | Functionality for computing the names and addresses of PLT stub functions
-- in a dynamically linked ELF binary.
--
-- Note that the API in this library is somewhat experimental, and as we further
-- develop the underlying heuristics involved (see @Note [PLT stub names]@), we
-- may need to change the API accordingly.
module Data.Macaw.Memory.ElfLoader.PLTStubs
  ( PLTStubInfo(..)
  , pltStubSymbols
  , noPLTStubInfo
  , NoRelocationType(..)
  ) where

import qualified Data.ByteString.Char8 as BSC
import qualified Data.ElfEdit as EE
import           Data.Foldable (Foldable(..))
import qualified Data.Map as Map
import           Data.Maybe ( fromMaybe, listToMaybe )
import           Data.Word ( Word32 )
import           GHC.TypeLits ( Nat )

import qualified Data.Macaw.Memory as DMM
import qualified Data.Macaw.Memory.LoadCommon as MML

-- | Heuristics about how large (in bytes) the sizes of PLT-related stub
-- functions are. See @Note [PLT stub names]@ for more information. These
-- heuristics are tailored to each architecture's dynamic relocations, which is
-- why this is parameterized by a @reloc@ type.
--
-- For more information about how to add your own value of type 'PLTStubInfo',
-- see @Note [Creating a new PLTStubInfo value]@.
data PLTStubInfo reloc = PLTStubInfo
  { forall reloc. PLTStubInfo reloc -> Integer
pltFunSize :: Integer
    -- ^ The size of the @.plt@ function, which is the first function in the
    -- @.plt@ section in most cases.
  , forall reloc. PLTStubInfo reloc -> Integer
pltStubSize :: Integer
    -- ^ The size of each PLT stub in the @.plt@ section.
  , forall reloc. PLTStubInfo reloc -> Integer
pltGotStubSize :: Integer
    -- ^ The size of each PLT stub in the @.plt.got@ section. Note that not
    -- all architectures put stubs in a @.plt.got@ section, so it is
    -- permissible to implement this using 'error' on those architectures.
  }

-- | Match up names to PLT stub entries in a dynamically linked ELF binary.
--
-- Calls to functions in shared libraries are issued through PLT stubs. These
-- are short sequences included in the binary by the compiler that jump to the
-- *real* function implementation in the shared library via the Global Offset
-- Table. The GOT is populated by the dynamic loader.
--
-- See @Note [PLT stub names]@ for more details.
pltStubSymbols
  :: forall reloc w
   . ( w ~ EE.RelocationWidth reloc
     , DMM.MemWidth w
     , EE.IsRelocationType reloc
     )
  => PLTStubInfo reloc
     -- ^ Heuristics about how large PLT stubs should be.
  -> MML.LoadOptions
     -- ^ Options configuring how to load the address of each PLT stub.
  -> EE.ElfHeaderInfo w
     -- ^ The dynamically linked ELF binary.
  -> Map.Map (DMM.MemWord w) (EE.VersionedSymbol (EE.ElfWordType w))
pltStubSymbols :: forall reloc (w :: Nat).
(w ~ RelocationWidth reloc, MemWidth w, IsRelocationType reloc) =>
PLTStubInfo reloc
-> LoadOptions
-> ElfHeaderInfo w
-> Map (MemWord w) (VersionedSymbol (ElfWordType w))
pltStubSymbols PLTStubInfo reloc
pltStubInfo LoadOptions
loadOptions ElfHeaderInfo w
ehi =
  ElfClass w
-> (ElfWidthConstraints w =>
    Map (MemWord w) (VersionedSymbol (ElfWordType w)))
-> Map (MemWord w) (VersionedSymbol (ElfWordType w))
forall (w :: Nat) a.
ElfClass w -> (ElfWidthConstraints w => a) -> a
EE.elfClassInstances ElfClass w
elfClass ((ElfWidthConstraints w =>
  Map (MemWord w) (VersionedSymbol (ElfWordType w)))
 -> Map (MemWord w) (VersionedSymbol (ElfWordType w)))
-> (ElfWidthConstraints w =>
    Map (MemWord w) (VersionedSymbol (ElfWordType w)))
-> Map (MemWord w) (VersionedSymbol (ElfWordType w))
forall a b. (a -> b) -> a -> b
$
  [(MemWord w, VersionedSymbol (ElfWordType w))]
-> Map (MemWord w) (VersionedSymbol (ElfWordType w))
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList ([(MemWord w, VersionedSymbol (ElfWordType w))]
 -> Map (MemWord w) (VersionedSymbol (ElfWordType w)))
-> [(MemWord w, VersionedSymbol (ElfWordType w))]
-> Map (MemWord w) (VersionedSymbol (ElfWordType w))
forall a b. (a -> b) -> a -> b
$ [(MemWord w, VersionedSymbol (ElfWordType w))]
-> Maybe [(MemWord w, VersionedSymbol (ElfWordType w))]
-> [(MemWord w, VersionedSymbol (ElfWordType w))]
forall a. a -> Maybe a -> a
fromMaybe [] (Maybe [(MemWord w, VersionedSymbol (ElfWordType w))]
 -> [(MemWord w, VersionedSymbol (ElfWordType w))])
-> Maybe [(MemWord w, VersionedSymbol (ElfWordType w))]
-> [(MemWord w, VersionedSymbol (ElfWordType w))]
forall a b. (a -> b) -> a -> b
$ do
    VirtAddrMap w
vam <- ByteString -> [Phdr w] -> Maybe (VirtAddrMap w)
forall (t :: Type -> Type) (w :: Nat).
(Foldable t, Integral (ElfWordType w)) =>
ByteString -> t (Phdr w) -> Maybe (VirtAddrMap w)
EE.virtAddrMap ByteString
elfBytes [Phdr w]
elfPhdrs

    ByteString
dynBytes <- case (Phdr w -> Bool) -> [Phdr w] -> [Phdr w]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Phdr w
p -> Phdr w -> PhdrType
forall (w :: Nat). Phdr w -> PhdrType
EE.phdrSegmentType Phdr w
p PhdrType -> PhdrType -> Bool
forall a. Eq a => a -> a -> Bool
== PhdrType
EE.PT_DYNAMIC) [Phdr w]
elfPhdrs of
      [Phdr w
dynPhdr] -> ByteString -> Maybe ByteString
forall a. a -> Maybe a
forall (m :: Type -> Type) a. Monad m => a -> m a
return (FileRange (ElfWordType w) -> ByteString -> ByteString
forall w. Integral w => FileRange w -> ByteString -> ByteString
EE.slice (Phdr w -> FileRange (ElfWordType w)
forall (w :: Nat). Phdr w -> FileRange (ElfWordType w)
EE.phdrFileRange Phdr w
dynPhdr) ByteString
elfBytes)
      [Phdr w]
_         -> Maybe ByteString
forall a. Maybe a
Nothing

    DynamicSection w
dynSec <- case ElfData
-> ElfClass w
-> ByteString
-> Either DynamicError (DynamicSection w)
forall (w :: Nat).
ElfData
-> ElfClass w
-> ByteString
-> Either DynamicError (DynamicSection w)
EE.dynamicEntries (Elf w -> ElfData
forall (w :: Nat). Elf w -> ElfData
EE.elfData Elf w
elf) ElfClass w
elfClass ByteString
dynBytes of
      Left DynamicError
_dynErr -> Maybe (DynamicSection w)
forall a. Maybe a
Nothing
      Right DynamicSection w
dynSec -> DynamicSection w -> Maybe (DynamicSection w)
forall a. a -> Maybe a
forall (m :: Type -> Type) a. Monad m => a -> m a
return DynamicSection w
dynSec

    VersionDefMap
vdefm <- case DynamicSection w
-> VirtAddrMap w -> Either DynamicError VersionDefMap
forall (w :: Nat).
DynamicSection w
-> VirtAddrMap w -> Either DynamicError VersionDefMap
EE.dynVersionDefMap DynamicSection w
dynSec VirtAddrMap w
vam of
      Left DynamicError
_dynErr -> Maybe VersionDefMap
forall a. Maybe a
Nothing
      Right VersionDefMap
vm -> VersionDefMap -> Maybe VersionDefMap
forall a. a -> Maybe a
forall (m :: Type -> Type) a. Monad m => a -> m a
return VersionDefMap
vm

    VersionDefMap
vreqm <- case DynamicSection w
-> VirtAddrMap w -> Either DynamicError VersionDefMap
forall (w :: Nat).
DynamicSection w
-> VirtAddrMap w -> Either DynamicError VersionDefMap
EE.dynVersionReqMap DynamicSection w
dynSec VirtAddrMap w
vam of
      Left DynamicError
_dynErr -> Maybe VersionDefMap
forall a. Maybe a
Nothing
      Right VersionDefMap
vm -> VersionDefMap -> Maybe VersionDefMap
forall a. a -> Maybe a
forall (m :: Type -> Type) a. Monad m => a -> m a
return VersionDefMap
vm

    let pltAddrs :: [(MemWord w, VersionedSymbol (ElfWordType w))]
pltAddrs = case Integral (ElfWordType w) =>
DynamicSection w
-> VirtAddrMap w
-> VersionDefMap
-> VersionDefMap
-> Maybe [(MemWord w, VersionedSymbol (ElfWordType w))]
DynamicSection w
-> VirtAddrMap w
-> VersionDefMap
-> VersionDefMap
-> Maybe [(MemWord w, VersionedSymbol (ElfWordType w))]
extractPltAddrs DynamicSection w
dynSec VirtAddrMap w
vam VersionDefMap
vdefm VersionDefMap
vreqm of
          Maybe [(MemWord w, VersionedSymbol (ElfWordType w))]
Nothing -> []
          Just [(MemWord w, VersionedSymbol (ElfWordType w))]
addrs -> [(MemWord w, VersionedSymbol (ElfWordType w))]
addrs

    let pltGotAddrs :: [(MemWord w, VersionedSymbol (ElfWordType w))]
pltGotAddrs = case Integral (ElfWordType w) =>
DynamicSection w
-> VirtAddrMap w
-> VersionDefMap
-> VersionDefMap
-> Maybe [(MemWord w, VersionedSymbol (ElfWordType w))]
DynamicSection w
-> VirtAddrMap w
-> VersionDefMap
-> VersionDefMap
-> Maybe [(MemWord w, VersionedSymbol (ElfWordType w))]
extractPltGotAddrs DynamicSection w
dynSec VirtAddrMap w
vam VersionDefMap
vdefm VersionDefMap
vreqm of
          Maybe [(MemWord w, VersionedSymbol (ElfWordType w))]
Nothing -> []
          Just [(MemWord w, VersionedSymbol (ElfWordType w))]
addrs -> [(MemWord w, VersionedSymbol (ElfWordType w))]
addrs

    [(MemWord w, VersionedSymbol (ElfWordType w))]
-> Maybe [(MemWord w, VersionedSymbol (ElfWordType w))]
forall a. a -> Maybe a
forall (m :: Type -> Type) a. Monad m => a -> m a
return ([(MemWord w, VersionedSymbol (ElfWordType w))]
pltAddrs [(MemWord w, VersionedSymbol (ElfWordType w))]
-> [(MemWord w, VersionedSymbol (ElfWordType w))]
-> [(MemWord w, VersionedSymbol (ElfWordType w))]
forall a. [a] -> [a] -> [a]
++ [(MemWord w, VersionedSymbol (ElfWordType w))]
pltGotAddrs)
  where
    ([ElfParseError]
_, Elf w
elf) = ElfHeaderInfo w -> ([ElfParseError], Elf w)
forall (w :: Nat). ElfHeaderInfo w -> ([ElfParseError], Elf w)
EE.getElf ElfHeaderInfo w
ehi
    elfPhdrs :: [Phdr w]
elfPhdrs = ElfHeaderInfo w -> [Phdr w]
forall (w :: Nat). ElfHeaderInfo w -> [Phdr w]
EE.headerPhdrs ElfHeaderInfo w
ehi
    elfBytes :: ByteString
elfBytes = ElfHeaderInfo w -> ByteString
forall (w :: Nat). ElfHeaderInfo w -> ByteString
EE.headerFileContents ElfHeaderInfo w
ehi
    elfClass :: ElfClass w
elfClass = Elf w -> ElfClass w
forall (w :: Nat). Elf w -> ElfClass w
EE.elfClass Elf w
elf

    pltStubAddress :: forall relOrRela
                    . EE.DynamicSection w
                   -> EE.VirtAddrMap w
                   -> EE.VersionDefMap
                   -> EE.VersionReqMap
                   -> (relOrRela reloc -> Word32)
                      -- ^ The function for extracting the index of a REL or
                      -- RELA relocation in the Global Offset Table
                   -> [EE.VersionedSymbol (EE.ElfWordType w)]
                      -- The list of PLT-related function symbols accumulated
                      -- so far
                   -> relOrRela reloc
                      -- ^ The REL or RELA relocation, from which we will find
                      -- the corresponding PLT stub
                   -> [EE.VersionedSymbol (EE.ElfWordType w)]
    pltStubAddress :: forall (relOrRela :: Type -> Type).
DynamicSection w
-> VirtAddrMap w
-> VersionDefMap
-> VersionDefMap
-> (relOrRela reloc -> Word32)
-> [VersionedSymbol (ElfWordType w)]
-> relOrRela reloc
-> [VersionedSymbol (ElfWordType w)]
pltStubAddress DynamicSection w
dynSec VirtAddrMap w
vam VersionDefMap
vdefm VersionDefMap
vreqm relOrRela reloc -> Word32
getRelSymIdx [VersionedSymbol (ElfWordType w)]
accum relOrRela reloc
rel
      | Right VersionedSymbol (ElfWordType w)
entry <- DynamicSection w
-> VirtAddrMap w
-> VersionDefMap
-> VersionDefMap
-> Word32
-> Either DynamicError (VersionedSymbol (ElfWordType w))
forall (w :: Nat).
DynamicSection w
-> VirtAddrMap w
-> VersionDefMap
-> VersionDefMap
-> Word32
-> Either DynamicError (VersionedSymbol (ElfWordType w))
EE.dynSymEntry DynamicSection w
dynSec VirtAddrMap w
vam VersionDefMap
vdefm VersionDefMap
vreqm (relOrRela reloc -> Word32
getRelSymIdx relOrRela reloc
rel) =
          VersionedSymbol (ElfWordType w)
entry VersionedSymbol (ElfWordType w)
-> [VersionedSymbol (ElfWordType w)]
-> [VersionedSymbol (ElfWordType w)]
forall a. a -> [a] -> [a]
: [VersionedSymbol (ElfWordType w)]
accum
      | Bool
otherwise = [VersionedSymbol (ElfWordType w)]
accum

    -- Build an association list of PLT stub addresses and their corresponding
    -- function symbols.
    buildAssocList :: forall versym
                    . [(Integer, versym)]
                      -- ^ The PLT stub addresses (as raw Integers) and their
                      -- corresponding symbol names.
                   -> Integer
                      -- ^ The starting address of the section containing the PLT stubs
                   -> Integer
                      -- ^ The size of a PLT stub
                   -> [(DMM.MemWord w, versym)]
    buildAssocList :: forall versym.
[(Integer, versym)] -> Integer -> Integer -> [(MemWord w, versym)]
buildAssocList [(Integer, versym)]
nameRelaMap Integer
baseAddr Integer
stubSize =
      let loadOffset :: Integer
loadOffset = Word64 -> Integer
forall a. Integral a => a -> Integer
toInteger (Word64 -> Integer) -> Word64 -> Integer
forall a b. (a -> b) -> a -> b
$ Word64 -> Maybe Word64 -> Word64
forall a. a -> Maybe a -> a
fromMaybe Word64
0 (LoadOptions -> Maybe Word64
MML.loadOffset LoadOptions
loadOptions) in
      [ (Word64 -> MemWord w
forall (w :: Nat). MemWidth w => Word64 -> MemWord w
DMM.memWord (Integer -> Word64
forall a. Num a => Integer -> a
fromInteger Integer
addr), versym
versym)
      | (Integer
idx, versym
versym) <- [(Integer, versym)]
nameRelaMap
      , let addr :: Integer
addr = Integer
loadOffset Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Integer
baseAddr Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Integer
idx Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
stubSize
      ]

    -- Build an association list from addresses of stubs in the .plt section to
    -- their function names.
    extractPltAddrs :: Integral (EE.ElfWordType w)
                    => EE.DynamicSection w
                    -> EE.VirtAddrMap w
                    -> EE.VersionDefMap
                    -> EE.VersionReqMap
                    -> Maybe [(DMM.MemWord w, EE.VersionedSymbol (EE.ElfWordType w))]
    extractPltAddrs :: Integral (ElfWordType w) =>
DynamicSection w
-> VirtAddrMap w
-> VersionDefMap
-> VersionDefMap
-> Maybe [(MemWord w, VersionedSymbol (ElfWordType w))]
extractPltAddrs DynamicSection w
dynSec VirtAddrMap w
vam VersionDefMap
vdefm VersionDefMap
vreqm = do
      SomeRel [relOrRela reloc]
rels relOrRela reloc -> Word32
getRelSymIdx <- case forall tp.
IsRelocationType tp =>
DynamicSection (RelocationWidth tp)
-> VirtAddrMap (RelocationWidth tp)
-> Either DynamicError (PLTEntries tp)
EE.dynPLTRel @reloc DynamicSection w
DynamicSection (RelocationWidth reloc)
dynSec VirtAddrMap w
VirtAddrMap (RelocationWidth reloc)
vam of
        Right (EE.PLTRela [RelaEntry reloc]
relas) -> SomeRel reloc -> Maybe (SomeRel reloc)
forall a. a -> Maybe a
forall (m :: Type -> Type) a. Monad m => a -> m a
return ([RelaEntry reloc] -> (RelaEntry reloc -> Word32) -> SomeRel reloc
forall (relOrRela :: Type -> Type) reloc.
[relOrRela reloc] -> (relOrRela reloc -> Word32) -> SomeRel reloc
SomeRel [RelaEntry reloc]
relas RelaEntry reloc -> Word32
forall tp. RelaEntry tp -> Word32
EE.relaSym)
        Right (EE.PLTRel [RelEntry reloc]
rels) -> SomeRel reloc -> Maybe (SomeRel reloc)
forall a. a -> Maybe a
forall (m :: Type -> Type) a. Monad m => a -> m a
return ([RelEntry reloc] -> (RelEntry reloc -> Word32) -> SomeRel reloc
forall (relOrRela :: Type -> Type) reloc.
[relOrRela reloc] -> (relOrRela reloc -> Word32) -> SomeRel reloc
SomeRel [RelEntry reloc]
rels RelEntry reloc -> Word32
forall tp. RelEntry tp -> Word32
EE.relSym)
        Either DynamicError (PLTEntries reloc)
_ -> Maybe (SomeRel reloc)
forall a. Maybe a
Nothing
      let revNameRelaMap :: [VersionedSymbol (ElfWordType w)]
revNameRelaMap = ([VersionedSymbol (ElfWordType w)]
 -> relOrRela reloc -> [VersionedSymbol (ElfWordType w)])
-> [VersionedSymbol (ElfWordType w)]
-> [relOrRela reloc]
-> [VersionedSymbol (ElfWordType w)]
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: Type -> Type) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (DynamicSection w
-> VirtAddrMap w
-> VersionDefMap
-> VersionDefMap
-> (relOrRela reloc -> Word32)
-> [VersionedSymbol (ElfWordType w)]
-> relOrRela reloc
-> [VersionedSymbol (ElfWordType w)]
forall (relOrRela :: Type -> Type).
DynamicSection w
-> VirtAddrMap w
-> VersionDefMap
-> VersionDefMap
-> (relOrRela reloc -> Word32)
-> [VersionedSymbol (ElfWordType w)]
-> relOrRela reloc
-> [VersionedSymbol (ElfWordType w)]
pltStubAddress DynamicSection w
dynSec VirtAddrMap w
vam VersionDefMap
vdefm VersionDefMap
vreqm relOrRela reloc -> Word32
getRelSymIdx) [] [relOrRela reloc]
rels
      let nameRelaMap :: [(Integer, VersionedSymbol (ElfWordType w))]
nameRelaMap = [Integer]
-> [VersionedSymbol (ElfWordType w)]
-> [(Integer, VersionedSymbol (ElfWordType w))]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0..] ([VersionedSymbol (ElfWordType w)]
-> [VersionedSymbol (ElfWordType w)]
forall a. [a] -> [a]
reverse [VersionedSymbol (ElfWordType w)]
revNameRelaMap)
      ElfSection (ElfWordType w)
pltSec <- [ElfSection (ElfWordType w)] -> Maybe (ElfSection (ElfWordType w))
forall a. [a] -> Maybe a
listToMaybe (ByteString -> Elf w -> [ElfSection (ElfWordType w)]
forall (w :: Nat).
ByteString -> Elf w -> [ElfSection (ElfWordType w)]
EE.findSectionByName (String -> ByteString
BSC.pack String
".plt") Elf w
elf)
      let pltBase :: ElfWordType w
pltBase = ElfSection (ElfWordType w) -> ElfWordType w
forall w. ElfSection w -> w
EE.elfSectionAddr ElfSection (ElfWordType w)
pltSec
      let pltFunSz :: Integer
pltFunSz  = PLTStubInfo reloc -> Integer
forall reloc. PLTStubInfo reloc -> Integer
pltFunSize PLTStubInfo reloc
pltStubInfo
      let pltStubSz :: Integer
pltStubSz = PLTStubInfo reloc -> Integer
forall reloc. PLTStubInfo reloc -> Integer
pltStubSize PLTStubInfo reloc
pltStubInfo
      [(MemWord w, VersionedSymbol (ElfWordType w))]
-> Maybe [(MemWord w, VersionedSymbol (ElfWordType w))]
forall a. a -> Maybe a
forall (m :: Type -> Type) a. Monad m => a -> m a
return ([(MemWord w, VersionedSymbol (ElfWordType w))]
 -> Maybe [(MemWord w, VersionedSymbol (ElfWordType w))])
-> [(MemWord w, VersionedSymbol (ElfWordType w))]
-> Maybe [(MemWord w, VersionedSymbol (ElfWordType w))]
forall a b. (a -> b) -> a -> b
$ [(Integer, VersionedSymbol (ElfWordType w))]
-> Integer
-> Integer
-> [(MemWord w, VersionedSymbol (ElfWordType w))]
forall versym.
[(Integer, versym)] -> Integer -> Integer -> [(MemWord w, versym)]
buildAssocList [(Integer, VersionedSymbol (ElfWordType w))]
nameRelaMap (Integer
pltFunSz Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ ElfWordType w -> Integer
forall a. Integral a => a -> Integer
toInteger ElfWordType w
pltBase) Integer
pltStubSz

    -- Build an association list from addresses of stubs in the .plt.got section
    -- to their function names.
    extractPltGotAddrs :: Integral (EE.ElfWordType w)
                       => EE.DynamicSection w
                       -> EE.VirtAddrMap w
                       -> EE.VersionDefMap
                       -> EE.VersionReqMap
                       -> Maybe [(DMM.MemWord w, EE.VersionedSymbol (EE.ElfWordType w))]
    extractPltGotAddrs :: Integral (ElfWordType w) =>
DynamicSection w
-> VirtAddrMap w
-> VersionDefMap
-> VersionDefMap
-> Maybe [(MemWord w, VersionedSymbol (ElfWordType w))]
extractPltGotAddrs DynamicSection w
dynSec VirtAddrMap w
vam VersionDefMap
vdefm VersionDefMap
vreqm = do
      [RelaEntry reloc]
relsGot <- case forall tp.
IsRelocationType tp =>
DynamicSection (RelocationWidth tp)
-> VirtAddrMap (RelocationWidth tp)
-> Either DynamicError [RelaEntry tp]
EE.dynRelaEntries @reloc DynamicSection w
DynamicSection (RelocationWidth reloc)
dynSec VirtAddrMap w
VirtAddrMap (RelocationWidth reloc)
vam of
        Right [RelaEntry reloc]
relas -> [RelaEntry reloc] -> Maybe [RelaEntry reloc]
forall a. a -> Maybe a
forall (m :: Type -> Type) a. Monad m => a -> m a
return [RelaEntry reloc]
relas
        Left DynamicError
_ -> Maybe [RelaEntry reloc]
forall a. Maybe a
Nothing
      let revNameRelaMapGot :: [VersionedSymbol (ElfWordType w)]
revNameRelaMapGot = ([VersionedSymbol (ElfWordType w)]
 -> RelaEntry reloc -> [VersionedSymbol (ElfWordType w)])
-> [VersionedSymbol (ElfWordType w)]
-> [RelaEntry reloc]
-> [VersionedSymbol (ElfWordType w)]
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: Type -> Type) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (DynamicSection w
-> VirtAddrMap w
-> VersionDefMap
-> VersionDefMap
-> (RelaEntry reloc -> Word32)
-> [VersionedSymbol (ElfWordType w)]
-> RelaEntry reloc
-> [VersionedSymbol (ElfWordType w)]
forall (relOrRela :: Type -> Type).
DynamicSection w
-> VirtAddrMap w
-> VersionDefMap
-> VersionDefMap
-> (relOrRela reloc -> Word32)
-> [VersionedSymbol (ElfWordType w)]
-> relOrRela reloc
-> [VersionedSymbol (ElfWordType w)]
pltStubAddress DynamicSection w
dynSec VirtAddrMap w
vam VersionDefMap
vdefm VersionDefMap
vreqm RelaEntry reloc -> Word32
forall tp. RelaEntry tp -> Word32
EE.relaSym) [] [RelaEntry reloc]
relsGot
      let nameRelaMapGot :: [(Integer, VersionedSymbol (ElfWordType w))]
nameRelaMapGot = [Integer]
-> [VersionedSymbol (ElfWordType w)]
-> [(Integer, VersionedSymbol (ElfWordType w))]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0..] ([VersionedSymbol (ElfWordType w)]
-> [VersionedSymbol (ElfWordType w)]
forall a. [a] -> [a]
reverse [VersionedSymbol (ElfWordType w)]
revNameRelaMapGot)
      ElfSection (ElfWordType w)
pltGotSec <- [ElfSection (ElfWordType w)] -> Maybe (ElfSection (ElfWordType w))
forall a. [a] -> Maybe a
listToMaybe (ByteString -> Elf w -> [ElfSection (ElfWordType w)]
forall (w :: Nat).
ByteString -> Elf w -> [ElfSection (ElfWordType w)]
EE.findSectionByName (String -> ByteString
BSC.pack String
".plt.got") Elf w
elf)
      let pltGotBase :: ElfWordType w
pltGotBase = ElfSection (ElfWordType w) -> ElfWordType w
forall w. ElfSection w -> w
EE.elfSectionAddr ElfSection (ElfWordType w)
pltGotSec
      let pltGotStubSz :: Integer
pltGotStubSz = PLTStubInfo reloc -> Integer
forall reloc. PLTStubInfo reloc -> Integer
pltGotStubSize PLTStubInfo reloc
pltStubInfo
      [(MemWord w, VersionedSymbol (ElfWordType w))]
-> Maybe [(MemWord w, VersionedSymbol (ElfWordType w))]
forall a. a -> Maybe a
forall (m :: Type -> Type) a. Monad m => a -> m a
return ([(MemWord w, VersionedSymbol (ElfWordType w))]
 -> Maybe [(MemWord w, VersionedSymbol (ElfWordType w))])
-> [(MemWord w, VersionedSymbol (ElfWordType w))]
-> Maybe [(MemWord w, VersionedSymbol (ElfWordType w))]
forall a b. (a -> b) -> a -> b
$ [(Integer, VersionedSymbol (ElfWordType w))]
-> Integer
-> Integer
-> [(MemWord w, VersionedSymbol (ElfWordType w))]
forall versym.
[(Integer, versym)] -> Integer -> Integer -> [(MemWord w, versym)]
buildAssocList [(Integer, VersionedSymbol (ElfWordType w))]
nameRelaMapGot (ElfWordType w -> Integer
forall a. Integral a => a -> Integer
toInteger ElfWordType w
pltGotBase) Integer
pltGotStubSz

-- | A wrapper type that existentially closes over whether we are dealing with a
-- REL or RELA relocation. This makes it easier to extract and process both
-- relocation types at the same time.
data SomeRel reloc where
  SomeRel :: [relOrRela reloc] -> (relOrRela reloc -> Word32) -> SomeRel reloc

-- | A dummy relocation type that is used for architectures that do not yet have
-- a dynamic relocation type defined in @elf-edit@. The corresponding
-- 'EE.IsRelocationType' instance will simply error.
data NoRelocationType (w :: Nat) = NoRelocationType
  deriving Int -> NoRelocationType w -> ShowS
[NoRelocationType w] -> ShowS
NoRelocationType w -> String
(Int -> NoRelocationType w -> ShowS)
-> (NoRelocationType w -> String)
-> ([NoRelocationType w] -> ShowS)
-> Show (NoRelocationType w)
forall (w :: Nat). Int -> NoRelocationType w -> ShowS
forall (w :: Nat). [NoRelocationType w] -> ShowS
forall (w :: Nat). NoRelocationType w -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall (w :: Nat). Int -> NoRelocationType w -> ShowS
showsPrec :: Int -> NoRelocationType w -> ShowS
$cshow :: forall (w :: Nat). NoRelocationType w -> String
show :: NoRelocationType w -> String
$cshowList :: forall (w :: Nat). [NoRelocationType w] -> ShowS
showList :: [NoRelocationType w] -> ShowS
Show

noRelocationTypeError :: a
noRelocationTypeError :: forall a. a
noRelocationTypeError = String -> a
forall a. HasCallStack => String -> a
error (String -> a) -> String -> a
forall a b. (a -> b) -> a -> b
$
  String
"Attempting to use dynamic relocations on an architecture " String -> ShowS
forall a. [a] -> [a] -> [a]
++
  String
"that has not yet been configured to use them."

-- | A dummy 'EE.IsRelocationType' instance that will simply error if used.
instance Show (EE.ElfWordType w) => EE.IsRelocationType (NoRelocationType w) where
  -- The particular choice of width doesn't matter here, but we do need /some/
  -- choice of Nat to make IsRelocationType's superclass instances work out.
  type RelocationWidth (NoRelocationType w) = w

  relaWidth :: NoRelocationType w
-> ElfClass (RelocationWidth (NoRelocationType w))
relaWidth = NoRelocationType w -> ElfClass w
NoRelocationType w
-> ElfClass (RelocationWidth (NoRelocationType w))
forall a. a
noRelocationTypeError
  toRelocType :: Word32 -> NoRelocationType w
toRelocType = Word32 -> NoRelocationType w
forall a. a
noRelocationTypeError
  isRelative :: NoRelocationType w -> Bool
isRelative = NoRelocationType w -> Bool
forall a. a
noRelocationTypeError
  relocTargetBits :: NoRelocationType w -> Int
relocTargetBits = NoRelocationType w -> Int
forall a. a
noRelocationTypeError

noPLTStubInfo :: String -> PLTStubInfo (NoRelocationType w)
noPLTStubInfo :: forall (w :: Nat). String -> PLTStubInfo (NoRelocationType w)
noPLTStubInfo String
arch = String -> PLTStubInfo (NoRelocationType w)
forall a. HasCallStack => String -> a
error (String -> PLTStubInfo (NoRelocationType w))
-> String -> PLTStubInfo (NoRelocationType w)
forall a b. (a -> b) -> a -> b
$
  String
"The " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
arch String -> ShowS
forall a. [a] -> [a] -> [a]
++
  String
" architecture has not yet been configured to support PLT stubs."

{-
Note [PLT stub names]
~~~~~~~~~~~~~~~~~~~~~
In a dynamically linked binary, the compiler issues calls to shared library
functions by jumping to a PLT stub. The PLT stub jumps to an address taken from
the Global Offset Table (GOT), which is populated by the dynamic loader based
on where the shared library is mapped into memory.

These PLT stubs are not inherently assigned names, but we would like to have
sensible names that we can simulate the functions of the same names in the
corresponding shared library.

PLT stubs do not have their own names in any symbol table. Instead, entries in
the Global Offset Table have names in the form of dynamic PLT relocations.  We
get those from elf-edit via the 'dynPLTRel' function. Note that these
relocations do not have their own directly associated names; instead, there is
a list of rela entries and a separate list of symbols. The list of rela entries
contains function relocations while the list of dynamic symbols ('dynSymEntry')
contains both function and object symbols. To align them, we must just discard
the non-function entries. We do this by checking if the current symbol entry is
of function type; if it is not, we just grab the next function symbol in the
list.

That step gives us names for global offset table entries, but *not* names for
PLT stubs. We rely on the fact that the list of PLT stubs is in the same order
as the list of global offset table entries. The previous step gives us the
*index* of each entry and a name for that entry. To get the name of the PLT
stub itself, we just compute the relevant offset from the base of the .plt
section. Each PLT stub is 16 bytes on most architectures. For example, on
x86_64 the address of the PLT stub of an entry is @addrOf(.plt) + 16 * idx@.

Ultimately, the approach above relies on the assumption that PLT stubs will
always be the same size in each binary on a particular architecture.
Unfortunately, this is not true in practice, as the exact size of a PLT stub
can vary depending on factors such as:

* What linker you use. For example, the `mold` linker produces PLT stubs that
  are 8 bytes large on x86-64 (instead of 16 bytes) by default. To override
  this, one must pass -Wl,-z,lazy to `mold`.

* Whether you compile with instrumentation of control-flow transfers. This
  is something that recent versions of Ubuntu use in their distribution of
  `gcc`, which has the side effect of producing PLT stubs that are larger
  than 16 bytes. To override this, one must pass `-fcf-protection=none` to
  `gcc`.

Getting this right in all cases would likely require doing a more detailed
analysis of the underlying machine code, which is what tools like `angr` do.
(See this comment in `angr`, which is very relevant to this discussion:
https://github.com/angr/cle/blob/4a7e4f7a6f1151f5587bf8bfa919da0064bd2449/cle/backends/elf/metaelf.py#L110-L116 )
For now, we settle for getting the "common-case" heuristics right.

Note [Creating a new PLTStubInfo value]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We factor out the heuristics used to compute the sizes of PLT-related stubs
into the PLTStubInfo data type. Currently, not all architectures have a
corresponding PLTStubInfo value, and it is possible that you will need to add
such a value for the architecture you are using. If that is the case, then you
can do so by following these steps. As a running example, we will trace through
the steps needed to define a PLTStubInfo value for x86-64:

1. Make sure that the @elf-edit@ library defines a dynamic relocation type for
   the architecture that you care about. For instance, @elf-edit@ provides an
   @X86_64_RelocationType@ for x86-64 in Data.ElfEdit.Relocations.X86_64, along
   with a corresponding IsRelocationType instance. The pltStubSymbols function
   makes use of this instance, so make sure to define it.

2. Next, obtain a C compiler for your architecture. In the case of x86-64, it
   should be relatively straightforward to download one from your package
   manager. For other architectures, the https://musl.cc website contains a
   wide variety of cross-compilers for different architectures.

3. Next, we need to see how large the sizes of PLT stubs are so that we can
   develop appropriate heuristics. One way to accomplish this is to use this
   C program as a smoke test:

     #include <stdlib.h>
     #include <unistd.h>
     #include <sys/types.h>

     int main(void) {
       void* (*m)(size_t) = &malloc;
       void* (*c)(size_t, size_t) = &calloc;

       int* x = malloc(sizeof(int));
       int* y = calloc(sizeof(int), 1);

       pid_t pid = getpid();
       pid_t ppid = getppid();

       return 0;
     }

   This directly invokes two functions defined in a shared library (libc), and
   it also invokes two shared library functions indirectly by way of function
   pointers. This provides a healthy combination of different ways to
   dynamically link against functions, which will be helpful for our heuristics.

   Compile this program like so:

     <path-to-c-compiler> -no-pie -fcf-protection=none test.c -o test.exe

   We pass `-no-pie` and `-fcf-protection=none` here to get the "common case"
   for how the compiler lays out its PLT-related address space. (Again, we
   are developing heuristics, and these heuristics aren't perfect.)

4. Now run `objdump` (which comes shipped with https://musl.cc binary
   distributions) to disassemble the test executable:

     <path-to-objdump> -d test.exe

   You will see output like this:

     test.exe:     file format elf64-x86-64


     Disassembly of section .init:

     0000000000401000 <_init>:
       401000:       50                      push   %rax
       401001:       58                      pop    %rax
       401002:       c3                      retq

     Disassembly of section .plt:

     0000000000401010 <.plt>:
       401010:       ff 35 f2 2f 00 00       pushq  0x2ff2(%rip)        # 404008 <_GLOBAL_OFFSET_TABLE_+0x8>
       401016:       ff 25 f4 2f 00 00       jmpq   *0x2ff4(%rip)        # 404010 <_GLOBAL_OFFSET_TABLE_+0x10>
       40101c:       0f 1f 40 00             nopl   0x0(%rax)

     0000000000401020 <getpid@plt>:
       401020:       ff 25 f2 2f 00 00       jmpq   *0x2ff2(%rip)        # 404018 <getpid>
       401026:       68 00 00 00 00          pushq  $0x0
       40102b:       e9 e0 ff ff ff          jmpq   401010 <.plt>

     0000000000401030 <getppid@plt>:
       401030:       ff 25 ea 2f 00 00       jmpq   *0x2fea(%rip)        # 404020 <getppid>
       401036:       68 01 00 00 00          pushq  $0x1
       40103b:       e9 d0 ff ff ff          jmpq   401010 <.plt>

     0000000000401040 <__libc_start_main@plt>:
       401040:       ff 25 e2 2f 00 00       jmpq   *0x2fe2(%rip)        # 404028 <__libc_start_main>
       401046:       68 02 00 00 00          pushq  $0x2
       40104b:       e9 c0 ff ff ff          jmpq   401010 <.plt>

     Disassembly of section .plt.got:

     0000000000401050 <__cxa_finalize@plt>:
       401050:       ff 25 82 2f 00 00       jmpq   *0x2f82(%rip)        # 403fd8 <__cxa_finalize>
       401056:       66 90                   xchg   %ax,%ax

     0000000000401058 <malloc@plt>:
       401058:       ff 25 82 2f 00 00       jmpq   *0x2f82(%rip)        # 403fe0 <malloc>
       40105e:       66 90                   xchg   %ax,%ax

     0000000000401060 <calloc@plt>:
       401060:       ff 25 82 2f 00 00       jmpq   *0x2f82(%rip)        # 403fe8 <calloc>
       401066:       66 90                   xchg   %ax,%ax

     Disassembly of section .text:

     ...

   We will be using the information above to compute the three values in the
   PLTStubInfo data constructor.

5. First, we need to compute pltFunSize, which is the size (in bytes) of the
   <.plt> function above. This is straightforward enough to do in GHCi:

     > 0x401020 - 0x401010
     16

   Here, 0x401010 is the address for <.plt>, and 0x401020 is the address of the
   <getpid@plt> stub that directly follows <.plt>. GHCi tells us that the
   difference is 16 bytes, so that is the value that we use for pltFunSize.

6. Second, we need to compute pltStubSize, which is the size of each stub in the
   .plt section. Each stub function's name ends with @plt, e.g., <getpid@plt>.
   We can again use GHCi to compute the size of <getpid@plt>:

     > 0x401030 - 0x401020
     16

7. Finally, we need to compute pltGotStubSize, which is the size of each stub in
   the .plt.got section. This is a special section that some architectures
   reserve for things like function pointers, such as &malloc and &calloc in
   the program above. GHCi tells us the size of <malloc@plt>:

     > 0x401060 - 0x401058
     8

   Note that not all architectures have .plt.got sections (e.g., AArch32).
   For these architectures, the pltStubSymbols function will never make use of
   the value of pltGotStubSize, so it is permissible to implement it using the
   `error` function.

Putting all of these together, we arrive at the definition of x86_64PLTStubInfo
in Data.Macaw.X86:

  x86_64PLTStubInfo :: PLTStubInfo X86_64_RelocationType
  x86_64PLTStubInfo = PLTStubInfo
    { pltFunSize     = 16
    , pltStubSize    = 16
    , pltGotStubSize = 8
    }
-}