diff options
Diffstat (limited to 'bindings/python/llvm')
-rw-r--r-- | bindings/python/llvm/__init__.py | 0 | ||||
-rw-r--r-- | bindings/python/llvm/common.py | 106 | ||||
-rw-r--r-- | bindings/python/llvm/core.py | 98 | ||||
-rw-r--r-- | bindings/python/llvm/disassembler.py | 134 | ||||
-rw-r--r-- | bindings/python/llvm/enumerations.py | 211 | ||||
-rw-r--r-- | bindings/python/llvm/object.py | 523 | ||||
-rw-r--r-- | bindings/python/llvm/tests/__init__.py | 0 | ||||
-rw-r--r-- | bindings/python/llvm/tests/base.py | 32 | ||||
-rw-r--r-- | bindings/python/llvm/tests/test_core.py | 23 | ||||
-rw-r--r-- | bindings/python/llvm/tests/test_disassembler.py | 28 | ||||
-rw-r--r-- | bindings/python/llvm/tests/test_object.py | 67 |
11 files changed, 1222 insertions, 0 deletions
diff --git a/bindings/python/llvm/__init__.py b/bindings/python/llvm/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/bindings/python/llvm/__init__.py diff --git a/bindings/python/llvm/common.py b/bindings/python/llvm/common.py new file mode 100644 index 0000000..0c5fcd0 --- /dev/null +++ b/bindings/python/llvm/common.py @@ -0,0 +1,106 @@ +#===- common.py - Python LLVM Bindings -----------------------*- python -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +from ctypes import POINTER +from ctypes import c_void_p +from ctypes import cdll + +import ctypes.util + +__all__ = [ + 'c_object_p', + 'find_library', + 'get_library', +] + +c_object_p = POINTER(c_void_p) + +class LLVMObject(object): + """Base class for objects that are backed by an LLVM data structure. + + This class should never be instantiated outside of this package. + """ + def __init__(self, ptr, ownable=True, disposer=None): + assert isinstance(ptr, c_object_p) + + self._ptr = self._as_parameter_ = ptr + + self._self_owned = True + self._ownable = ownable + self._disposer = disposer + + self._owned_objects = [] + + def take_ownership(self, obj): + """Take ownership of another object. + + When you take ownership of another object, you are responsible for + destroying that object. In addition, a reference to that object is + placed inside this object so the Python garbage collector will not + collect the object while it is still alive in libLLVM. + + This method should likely only be called from within modules inside + this package. + """ + assert isinstance(obj, LLVMObject) + + self._owned_objects.append(obj) + obj._self_owned = False + + def from_param(self): + """ctypes function that converts this object to a function parameter.""" + return self._as_parameter_ + + def __del__(self): + if not hasattr(self, '_self_owned') or not hasattr(self, '_disposer'): + return + + if self._self_owned and self._disposer: + self._disposer(self) + +class CachedProperty(object): + """Decorator that caches the result of a property lookup. + + This is a useful replacement for @property. It is recommended to use this + decorator on properties that invoke C API calls for which the result of the + call will be idempotent. + """ + def __init__(self, wrapped): + self.wrapped = wrapped + try: + self.__doc__ = wrapped.__doc__ + except: # pragma: no cover + pass + + def __get__(self, instance, instance_type=None): + if instance is None: + return self + + value = self.wrapped(instance) + setattr(instance, self.wrapped.__name__, value) + + return value + +def find_library(): + # FIXME should probably have build system define absolute path of shared + # library at install time. + for lib in ['LLVM-3.1svn', 'libLLVM-3.1svn', 'LLVM', 'libLLVM']: + result = ctypes.util.find_library(lib) + if result: + return result + + return None + +def get_library(): + """Obtain a reference to the llvm library.""" + lib = find_library() + if not lib: + raise Exception('LLVM shared library not found!') + + return cdll.LoadLibrary(lib) diff --git a/bindings/python/llvm/core.py b/bindings/python/llvm/core.py new file mode 100644 index 0000000..6756637 --- /dev/null +++ b/bindings/python/llvm/core.py @@ -0,0 +1,98 @@ +#===- core.py - Python LLVM Bindings -------------------------*- python -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +from .common import LLVMObject +from .common import c_object_p +from .common import get_library + +from . import enumerations + +from ctypes import POINTER +from ctypes import byref +from ctypes import c_char_p + +__all__ = [ + "lib", + "MemoryBuffer", +] + +lib = get_library() + +class OpCode(object): + """Represents an individual OpCode enumeration.""" + + _value_map = {} + + def __init__(self, name, value): + self.name = name + self.value = value + + def __repr__(self): + return 'OpCode.%s' % self.name + + @staticmethod + def from_value(value): + """Obtain an OpCode instance from a numeric value.""" + result = OpCode._value_map.get(value, None) + + if result is None: + raise ValueError('Unknown OpCode: %d' % value) + + return result + + @staticmethod + def register(name, value): + """Registers a new OpCode enumeration. + + This is called by this module for each enumeration defined in + enumerations. You should not need to call this outside this module. + """ + if value in OpCode._value_map: + raise ValueError('OpCode value already registered: %d' % value) + + opcode = OpCode(name, value) + OpCode._value_map[value] = opcode + setattr(OpCode, name, opcode) + +class MemoryBuffer(LLVMObject): + """Represents an opaque memory buffer.""" + + def __init__(self, filename=None): + """Create a new memory buffer. + + Currently, we support creating from the contents of a file at the + specified filename. + """ + if filename is None: + raise Exception("filename argument must be defined") + + memory = c_object_p() + out = c_char_p(None) + + result = lib.LLVMCreateMemoryBufferWithContentsOfFile(filename, + byref(memory), byref(out)) + + if result: + raise Exception("Could not create memory buffer: %s" % out.value) + + LLVMObject.__init__(self, memory, disposer=lib.LLVMDisposeMemoryBuffer) + +def register_library(library): + library.LLVMCreateMemoryBufferWithContentsOfFile.argtypes = [c_char_p, + POINTER(c_object_p), POINTER(c_char_p)] + library.LLVMCreateMemoryBufferWithContentsOfFile.restype = bool + + library.LLVMDisposeMemoryBuffer.argtypes = [MemoryBuffer] + +def register_enumerations(): + for name, value in enumerations.OpCodes: + OpCode.register(name, value) + +register_library(lib) +register_enumerations() diff --git a/bindings/python/llvm/disassembler.py b/bindings/python/llvm/disassembler.py new file mode 100644 index 0000000..5030b98 --- /dev/null +++ b/bindings/python/llvm/disassembler.py @@ -0,0 +1,134 @@ +#===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +from ctypes import CFUNCTYPE +from ctypes import POINTER +from ctypes import addressof +from ctypes import byref +from ctypes import c_byte +from ctypes import c_char_p +from ctypes import c_int +from ctypes import c_size_t +from ctypes import c_ubyte +from ctypes import c_uint64 +from ctypes import c_void_p +from ctypes import cast + +from .common import LLVMObject +from .common import c_object_p +from .common import get_library + +__all__ = [ + 'Disassembler', +] + +lib = get_library() +callbacks = {} + +class Disassembler(LLVMObject): + """Represents a disassembler instance. + + Disassembler instances are tied to specific "triple," which must be defined + at creation time. + + Disassembler instances can disassemble instructions from multiple sources. + """ + def __init__(self, triple): + """Create a new disassembler instance. + + The triple argument is the triple to create the disassembler for. This + is something like 'i386-apple-darwin9'. + """ + ptr = lib.LLVMCreateDisasm(c_char_p(triple), c_void_p(None), c_int(0), + callbacks['op_info'](0), callbacks['symbol_lookup'](0)) + if not ptr.contents: + raise Exception('Could not obtain disassembler for triple: %s' % + triple) + + LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisasmDispose) + + def get_instruction(self, source, pc=0): + """Obtain the next instruction from an input source. + + The input source should be a str or bytearray or something that + represents a sequence of bytes. + + This function will start reading bytes from the beginning of the + source. + + The pc argument specifies the address that the first byte is at. + + This returns a 2-tuple of: + + long number of bytes read. 0 if no instruction was read. + str representation of instruction. This will be the assembly that + represents the instruction. + """ + buf = cast(c_char_p(source), POINTER(c_ubyte)) + out_str = cast((c_byte * 255)(), c_char_p) + + result = lib.LLVMDisasmInstruction(self, buf, c_uint64(len(source)), + c_uint64(pc), out_str, 255) + + return (result, out_str.value) + + def get_instructions(self, source, pc=0): + """Obtain multiple instructions from an input source. + + This is like get_instruction() except it is a generator for all + instructions within the source. It starts at the beginning of the + source and reads instructions until no more can be read. + + This generator returns 3-tuple of: + + long address of instruction. + long size of instruction, in bytes. + str representation of instruction. + """ + source_bytes = c_char_p(source) + out_str = cast((c_byte * 255)(), c_char_p) + + # This could probably be written cleaner. But, it does work. + buf = cast(source_bytes, POINTER(c_ubyte * len(source))).contents + offset = 0 + address = pc + end_address = pc + len(source) + while address < end_address: + b = cast(addressof(buf) + offset, POINTER(c_ubyte)) + result = lib.LLVMDisasmInstruction(self, b, + c_uint64(len(source) - offset), c_uint64(address), + out_str, 255) + + if result == 0: + break + + yield (address, result, out_str.value) + + address += result + offset += result + + +def register_library(library): + library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int, + callbacks['op_info'], callbacks['symbol_lookup']] + library.LLVMCreateDisasm.restype = c_object_p + + library.LLVMDisasmDispose.argtypes = [Disassembler] + + library.LLVMDisasmInstruction.argtypes = [Disassembler, POINTER(c_ubyte), + c_uint64, c_uint64, c_char_p, c_size_t] + library.LLVMDisasmInstruction.restype = c_size_t + +callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64, + c_int, c_void_p) +callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64, + POINTER(c_uint64), c_uint64, + POINTER(c_char_p)) + +register_library(lib) diff --git a/bindings/python/llvm/enumerations.py b/bindings/python/llvm/enumerations.py new file mode 100644 index 0000000..f49d2fa --- /dev/null +++ b/bindings/python/llvm/enumerations.py @@ -0,0 +1,211 @@ +#===- enumerations.py - Python LLVM Enumerations -------------*- python -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +r""" +LLVM Enumerations +================= + +This file defines enumerations from LLVM. + +Each enumeration is exposed as a list of 2-tuples. These lists are consumed by +dedicated types elsewhere in the package. The enumerations are centrally +defined in this file so they are easier to locate and maintain. +""" + +__all__ = [ + 'Attributes', + 'OpCodes', + 'TypeKinds', + 'Linkages', + 'Visibility', + 'CallConv', + 'IntPredicate', + 'RealPredicate', + 'LandingPadClauseTy', +] + +Attributes = [ + ('ZExt', 1 << 0), + ('MSExt', 1 << 1), + ('NoReturn', 1 << 2), + ('InReg', 1 << 3), + ('StructRet', 1 << 4), + ('NoUnwind', 1 << 5), + ('NoAlias', 1 << 6), + ('ByVal', 1 << 7), + ('Nest', 1 << 8), + ('ReadNone', 1 << 9), + ('ReadOnly', 1 << 10), + ('NoInline', 1 << 11), + ('AlwaysInline', 1 << 12), + ('OptimizeForSize', 1 << 13), + ('StackProtect', 1 << 14), + ('StackProtectReq', 1 << 15), + ('Alignment', 31 << 16), + ('NoCapture', 1 << 21), + ('NoRedZone', 1 << 22), + ('ImplicitFloat', 1 << 23), + ('Naked', 1 << 24), + ('InlineHint', 1 << 25), + ('StackAlignment', 7 << 26), + ('ReturnsTwice', 1 << 29), + ('UWTable', 1 << 30), + ('NonLazyBind', 1 << 31), +] + +OpCodes = [ + ('Ret', 1), + ('Br', 2), + ('Switch', 3), + ('IndirectBr', 4), + ('Invoke', 5), + ('Unreachable', 7), + ('Add', 8), + ('FAdd', 9), + ('Sub', 10), + ('FSub', 11), + ('Mul', 12), + ('FMul', 13), + ('UDiv', 14), + ('SDiv', 15), + ('FDiv', 16), + ('URem', 17), + ('SRem', 18), + ('FRem', 19), + ('Shl', 20), + ('LShr', 21), + ('AShr', 22), + ('And', 23), + ('Or', 24), + ('Xor', 25), + ('Alloca', 26), + ('Load', 27), + ('Store', 28), + ('GetElementPtr', 29), + ('Trunc', 30), + ('ZExt', 31), + ('SExt', 32), + ('FPToUI', 33), + ('FPToSI', 34), + ('UIToFP', 35), + ('SIToFP', 36), + ('FPTrunc', 37), + ('FPExt', 38), + ('PtrToInt', 39), + ('IntToPtr', 40), + ('BitCast', 41), + ('ICmp', 42), + ('FCmpl', 43), + ('PHI', 44), + ('Call', 45), + ('Select', 46), + ('UserOp1', 47), + ('UserOp2', 48), + ('AArg', 49), + ('ExtractElement', 50), + ('InsertElement', 51), + ('ShuffleVector', 52), + ('ExtractValue', 53), + ('InsertValue', 54), + ('Fence', 55), + ('AtomicCmpXchg', 56), + ('AtomicRMW', 57), + ('Resume', 58), + ('LandingPad', 59), +] + +TypeKinds = [ + ('Void', 0), + ('Half', 1), + ('Float', 2), + ('Double', 3), + ('X86_FP80', 4), + ('FP128', 5), + ('PPC_FP128', 6), + ('Label', 7), + ('Integer', 8), + ('Function', 9), + ('Struct', 10), + ('Array', 11), + ('Pointer', 12), + ('Vector', 13), + ('Metadata', 14), + ('X86_MMX', 15), +] + +Linkages = [ + ('External', 0), + ('AvailableExternally', 1), + ('LinkOnceAny', 2), + ('LinkOnceODR', 3), + ('WeakAny', 4), + ('WeakODR', 5), + ('Appending', 6), + ('Internal', 7), + ('Private', 8), + ('DLLImport', 9), + ('DLLExport', 10), + ('ExternalWeak', 11), + ('Ghost', 12), + ('Common', 13), + ('LinkerPrivate', 14), + ('LinkerPrivateWeak', 15), + ('LinkerPrivateWeakDefAuto', 16), +] + +Visibility = [ + ('Default', 0), + ('Hidden', 1), + ('Protected', 2), +] + +CallConv = [ + ('CCall', 0), + ('FastCall', 8), + ('ColdCall', 9), + ('X86StdcallCall', 64), + ('X86FastcallCall', 65), +] + +IntPredicate = [ + ('EQ', 32), + ('NE', 33), + ('UGT', 34), + ('UGE', 35), + ('ULT', 36), + ('ULE', 37), + ('SGT', 38), + ('SGE', 39), + ('SLT', 40), + ('SLE', 41), +] + +RealPredicate = [ + ('PredicateFalse', 0), + ('OEQ', 1), + ('OGT', 2), + ('OGE', 3), + ('OLT', 4), + ('OLE', 5), + ('ONE', 6), + ('ORD', 7), + ('UNO', 8), + ('UEQ', 9), + ('UGT', 10), + ('UGE', 11), + ('ULT', 12), + ('ULE', 13), + ('UNE', 14), + ('PredicateTrue', 15), +] + +LandingPadClauseTy = [ + ('Catch', 0), + ('Filter', 1), +] diff --git a/bindings/python/llvm/object.py b/bindings/python/llvm/object.py new file mode 100644 index 0000000..473aa3a --- /dev/null +++ b/bindings/python/llvm/object.py @@ -0,0 +1,523 @@ +#===- object.py - Python Object Bindings --------------------*- python -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +r""" +Object File Interface +===================== + +This module provides an interface for reading information from object files +(e.g. binary executables and libraries). + +Using this module, you can obtain information about an object file's sections, +symbols, and relocations. These are represented by the classes ObjectFile, +Section, Symbol, and Relocation, respectively. + +Usage +----- + +The only way to use this module is to start by creating an ObjectFile. You can +create an ObjectFile by loading a file (specified by its path) or by creating a +llvm.core.MemoryBuffer and loading that. + +Once you have an object file, you can inspect its sections and symbols directly +by calling get_sections() and get_symbols() respectively. To inspect +relocations, call get_relocations() on a Section instance. + +Iterator Interface +------------------ + +The LLVM bindings expose iteration over sections, symbols, and relocations in a +way that only allows one instance to be operated on at a single time. This is +slightly annoying from a Python perspective, as it isn't very Pythonic to have +objects that "expire" but are still active from a dynamic language. + +To aid working around this limitation, each Section, Symbol, and Relocation +instance caches its properties after first access. So, if the underlying +iterator is advanced, the properties can still be obtained provided they have +already been retrieved. + +In addition, we also provide a "cache" method on each class to cache all +available data. You can call this on each obtained instance. Or, you can pass +cache=True to the appropriate get_XXX() method to have this done for you. + +Here are some examples on how to perform iteration: + + obj = ObjectFile(filename='/bin/ls') + + # This is OK. Each Section is only accessed inside its own iteration slot. + section_names = [] + for section in obj.get_sections(): + section_names.append(section.name) + + # This is NOT OK. You perform a lookup after the object has expired. + symbols = list(obj.get_symbols()) + for symbol in symbols: + print symbol.name # This raises because the object has expired. + + # In this example, we mix a working and failing scenario. + symbols = [] + for symbol in obj.get_symbols(): + symbols.append(symbol) + print symbol.name + + for symbol in symbols: + print symbol.name # OK + print symbol.address # NOT OK. We didn't look up this property before. + + # Cache everything up front. + symbols = list(obj.get_symbols(cache=True)) + for symbol in symbols: + print symbol.name # OK + +""" + +from ctypes import c_char_p +from ctypes import c_uint64 + +from .common import CachedProperty +from .common import LLVMObject +from .common import c_object_p +from .common import get_library +from .core import MemoryBuffer + +__all__ = [ + "lib", + "ObjectFile", + "Relocation", + "Section", + "Symbol", +] + +class ObjectFile(LLVMObject): + """Represents an object/binary file.""" + + def __init__(self, filename=None, contents=None): + """Construct an instance from a filename or binary data. + + filename must be a path to a file that can be opened with open(). + contents can be either a native Python buffer type (like str) or a + llvm.core.MemoryBuffer instance. + """ + if contents: + assert isinstance(contents, MemoryBuffer) + + if filename is not None: + contents = MemoryBuffer(filename=filename) + + if contents is None: + raise Exception('No input found.') + + ptr = lib.LLVMCreateObjectFile(contents) + LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisposeObjectFile) + self.take_ownership(contents) + + def get_sections(self, cache=False): + """Obtain the sections in this object file. + + This is a generator for llvm.object.Section instances. + + Sections are exposed as limited-use objects. See the module's + documentation on iterators for more. + """ + sections = lib.LLVMGetSections(self) + last = None + while True: + if lib.LLVMIsSectionIteratorAtEnd(self, sections): + break + + last = Section(sections) + if cache: + last.cache() + + yield last + + lib.LLVMMoveToNextSection(sections) + last.expire() + + if last is not None: + last.expire() + + lib.LLVMDisposeSectionIterator(sections) + + def get_symbols(self, cache=False): + """Obtain the symbols in this object file. + + This is a generator for llvm.object.Symbol instances. + + Each Symbol instance is a limited-use object. See this module's + documentation on iterators for more. + """ + symbols = lib.LLVMGetSymbols(self) + last = None + while True: + if lib.LLVMIsSymbolIteratorAtEnd(self, symbols): + break + + last = Symbol(symbols, self) + if cache: + last.cache() + + yield last + + lib.LLVMMoveToNextSymbol(symbols) + last.expire() + + if last is not None: + last.expire() + + lib.LLVMDisposeSymbolIterator(symbols) + +class Section(LLVMObject): + """Represents a section in an object file.""" + + def __init__(self, ptr): + """Construct a new section instance. + + Section instances can currently only be created from an ObjectFile + instance. Therefore, this constructor should not be used outside of + this module. + """ + LLVMObject.__init__(self, ptr) + + self.expired = False + + @CachedProperty + def name(self): + """Obtain the string name of the section. + + This is typically something like '.dynsym' or '.rodata'. + """ + if self.expired: + raise Exception('Section instance has expired.') + + return lib.LLVMGetSectionName(self) + + @CachedProperty + def size(self): + """The size of the section, in long bytes.""" + if self.expired: + raise Exception('Section instance has expired.') + + return lib.LLVMGetSectionSize(self) + + @CachedProperty + def contents(self): + if self.expired: + raise Exception('Section instance has expired.') + + return lib.LLVMGetSectionContents(self) + + @CachedProperty + def address(self): + """The address of this section, in long bytes.""" + if self.expired: + raise Exception('Section instance has expired.') + + return lib.LLVMGetSectionAddress(self) + + def has_symbol(self, symbol): + """Returns whether a Symbol instance is present in this Section.""" + if self.expired: + raise Exception('Section instance has expired.') + + assert isinstance(symbol, Symbol) + return lib.LLVMGetSectionContainsSymbol(self, symbol) + + def get_relocations(self, cache=False): + """Obtain the relocations in this Section. + + This is a generator for llvm.object.Relocation instances. + + Each instance is a limited used object. See this module's documentation + on iterators for more. + """ + if self.expired: + raise Exception('Section instance has expired.') + + relocations = lib.LLVMGetRelocations(self) + last = None + while True: + if lib.LLVMIsRelocationIteratorAtEnd(self, relocations): + break + + last = Relocation(relocations) + if cache: + last.cache() + + yield last + + lib.LLVMMoveToNextRelocation(relocations) + last.expire() + + if last is not None: + last.expire() + + lib.LLVMDisposeRelocationIterator(relocations) + + def cache(self): + """Cache properties of this Section. + + This can be called as a workaround to the single active Section + limitation. When called, the properties of the Section are fetched so + they are still available after the Section has been marked inactive. + """ + getattr(self, 'name') + getattr(self, 'size') + getattr(self, 'contents') + getattr(self, 'address') + + def expire(self): + """Expire the section. + + This is called internally by the section iterator. + """ + self.expired = True + +class Symbol(LLVMObject): + """Represents a symbol in an object file.""" + def __init__(self, ptr, object_file): + assert isinstance(ptr, c_object_p) + assert isinstance(object_file, ObjectFile) + + LLVMObject.__init__(self, ptr) + + self.expired = False + self._object_file = object_file + + @CachedProperty + def name(self): + """The str name of the symbol. + + This is often a function or variable name. Keep in mind that name + mangling could be in effect. + """ + if self.expired: + raise Exception('Symbol instance has expired.') + + return lib.LLVMGetSymbolName(self) + + @CachedProperty + def address(self): + """The address of this symbol, in long bytes.""" + if self.expired: + raise Exception('Symbol instance has expired.') + + return lib.LLVMGetSymbolAddress(self) + + @CachedProperty + def file_offset(self): + """The offset of this symbol in the file, in long bytes.""" + if self.expired: + raise Exception('Symbol instance has expired.') + + return lib.LLVMGetSymbolFileOffset(self) + + @CachedProperty + def size(self): + """The size of the symbol, in long bytes.""" + if self.expired: + raise Exception('Symbol instance has expired.') + + return lib.LLVMGetSymbolSize(self) + + @CachedProperty + def section(self): + """The Section to which this Symbol belongs. + + The returned Section instance does not expire, unlike Sections that are + commonly obtained through iteration. + + Because this obtains a new section iterator each time it is accessed, + calling this on a number of Symbol instances could be expensive. + """ + sections = lib.LLVMGetSections(self._object_file) + lib.LLVMMoveToContainingSection(sections, self) + + return Section(sections) + + def cache(self): + """Cache all cacheable properties.""" + getattr(self, 'name') + getattr(self, 'address') + getattr(self, 'file_offset') + getattr(self, 'size') + + def expire(self): + """Mark the object as expired to prevent future API accesses. + + This is called internally by this module and it is unlikely that + external callers have a legitimate reason for using it. + """ + self.expired = True + +class Relocation(LLVMObject): + """Represents a relocation definition.""" + def __init__(self, ptr): + """Create a new relocation instance. + + Relocations are created from objects derived from Section instances. + Therefore, this constructor should not be called outside of this + module. See Section.get_relocations() for the proper method to obtain + a Relocation instance. + """ + assert isinstance(ptr, c_object_p) + + LLVMObject.__init__(self, ptr) + + self.expired = False + + @CachedProperty + def address(self): + """The address of this relocation, in long bytes.""" + if self.expired: + raise Exception('Relocation instance has expired.') + + return lib.LLVMGetRelocationAddress(self) + + @CachedProperty + def offset(self): + """The offset of this relocation, in long bytes.""" + if self.expired: + raise Exception('Relocation instance has expired.') + + return lib.LLVMGetRelocationOffset(self) + + @CachedProperty + def symbol(self): + """The Symbol corresponding to this Relocation.""" + if self.expired: + raise Exception('Relocation instance has expired.') + + ptr = lib.LLVMGetRelocationSymbol(self) + return Symbol(ptr) + + @CachedProperty + def type_number(self): + """The relocation type, as a long.""" + if self.expired: + raise Exception('Relocation instance has expired.') + + return lib.LLVMGetRelocationType(self) + + @CachedProperty + def type_name(self): + """The relocation type's name, as a str.""" + if self.expired: + raise Exception('Relocation instance has expired.') + + return lib.LLVMGetRelocationTypeName(self) + + @CachedProperty + def value_string(self): + if self.expired: + raise Exception('Relocation instance has expired.') + + return lib.LLVMGetRelocationValueString(self) + + def expire(self): + """Expire this instance, making future API accesses fail.""" + self.expired = True + + def cache(self): + """Cache all cacheable properties on this instance.""" + getattr(self, 'address') + getattr(self, 'offset') + getattr(self, 'symbol') + getattr(self, 'type') + getattr(self, 'type_name') + getattr(self, 'value_string') + +def register_library(library): + """Register function prototypes with LLVM library instance.""" + + # Object.h functions + library.LLVMCreateObjectFile.argtypes = [MemoryBuffer] + library.LLVMCreateObjectFile.restype = c_object_p + + library.LLVMDisposeObjectFile.argtypes = [ObjectFile] + + library.LLVMGetSections.argtypes = [ObjectFile] + library.LLVMGetSections.restype = c_object_p + + library.LLVMDisposeSectionIterator.argtypes = [c_object_p] + + library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile, c_object_p] + library.LLVMIsSectionIteratorAtEnd.restype = bool + + library.LLVMMoveToNextSection.argtypes = [c_object_p] + + library.LLVMMoveToContainingSection.argtypes = [c_object_p, c_object_p] + + library.LLVMGetSymbols.argtypes = [ObjectFile] + library.LLVMGetSymbols.restype = c_object_p + + library.LLVMDisposeSymbolIterator.argtypes = [c_object_p] + + library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile, c_object_p] + library.LLVMIsSymbolIteratorAtEnd.restype = bool + + library.LLVMMoveToNextSymbol.argtypes = [c_object_p] + + library.LLVMGetSectionName.argtypes = [c_object_p] + library.LLVMGetSectionName.restype = c_char_p + + library.LLVMGetSectionSize.argtypes = [c_object_p] + library.LLVMGetSectionSize.restype = c_uint64 + + library.LLVMGetSectionContents.argtypes = [c_object_p] + library.LLVMGetSectionContents.restype = c_char_p + + library.LLVMGetSectionAddress.argtypes = [c_object_p] + library.LLVMGetSectionAddress.restype = c_uint64 + + library.LLVMGetSectionContainsSymbol.argtypes = [c_object_p, c_object_p] + library.LLVMGetSectionContainsSymbol.restype = bool + + library.LLVMGetRelocations.argtypes = [c_object_p] + library.LLVMGetRelocations.restype = c_object_p + + library.LLVMDisposeRelocationIterator.argtypes = [c_object_p] + + library.LLVMIsRelocationIteratorAtEnd.argtypes = [c_object_p, c_object_p] + library.LLVMIsRelocationIteratorAtEnd.restype = bool + + library.LLVMMoveToNextRelocation.argtypes = [c_object_p] + + library.LLVMGetSymbolName.argtypes = [Symbol] + library.LLVMGetSymbolName.restype = c_char_p + + library.LLVMGetSymbolAddress.argtypes = [Symbol] + library.LLVMGetSymbolAddress.restype = c_uint64 + + library.LLVMGetSymbolFileOffset.argtypes = [Symbol] + library.LLVMGetSymbolFileOffset.restype = c_uint64 + + library.LLVMGetSymbolSize.argtypes = [Symbol] + library.LLVMGetSymbolSize.restype = c_uint64 + + library.LLVMGetRelocationAddress.argtypes = [c_object_p] + library.LLVMGetRelocationAddress.restype = c_uint64 + + library.LLVMGetRelocationOffset.argtypes = [c_object_p] + library.LLVMGetRelocationOffset.restype = c_uint64 + + library.LLVMGetRelocationSymbol.argtypes = [c_object_p] + library.LLVMGetRelocationSymbol.restype = c_object_p + + library.LLVMGetRelocationType.argtypes = [c_object_p] + library.LLVMGetRelocationType.restype = c_uint64 + + library.LLVMGetRelocationTypeName.argtypes = [c_object_p] + library.LLVMGetRelocationTypeName.restype = c_char_p + + library.LLVMGetRelocationValueString.argtypes = [c_object_p] + library.LLVMGetRelocationValueString.restype = c_char_p + +lib = get_library() +register_library(lib) diff --git a/bindings/python/llvm/tests/__init__.py b/bindings/python/llvm/tests/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/bindings/python/llvm/tests/__init__.py diff --git a/bindings/python/llvm/tests/base.py b/bindings/python/llvm/tests/base.py new file mode 100644 index 0000000..ff9eb2f --- /dev/null +++ b/bindings/python/llvm/tests/base.py @@ -0,0 +1,32 @@ +import os.path +import unittest + +POSSIBLE_TEST_BINARIES = [ + 'libreadline.so.5', + 'libreadline.so.6', +] + +POSSIBLE_TEST_BINARY_PATHS = [ + '/usr/lib/debug', + '/lib', + '/usr/lib', + '/usr/local/lib', + '/lib/i386-linux-gnu', +] + +class TestBase(unittest.TestCase): + def get_test_binary(self): + """Helper to obtain a test binary for object file testing. + + FIXME Support additional, highly-likely targets or create one + ourselves. + """ + for d in POSSIBLE_TEST_BINARY_PATHS: + for lib in POSSIBLE_TEST_BINARIES: + path = os.path.join(d, lib) + + if os.path.exists(path): + return path + + raise Exception('No suitable test binaries available!') + get_test_binary.__test__ = False diff --git a/bindings/python/llvm/tests/test_core.py b/bindings/python/llvm/tests/test_core.py new file mode 100644 index 0000000..545abc8 --- /dev/null +++ b/bindings/python/llvm/tests/test_core.py @@ -0,0 +1,23 @@ +from .base import TestBase +from ..core import OpCode +from ..core import MemoryBuffer + +class TestCore(TestBase): + def test_opcode(self): + self.assertTrue(hasattr(OpCode, 'Ret')) + self.assertTrue(isinstance(OpCode.Ret, OpCode)) + self.assertEqual(OpCode.Ret.value, 1) + + op = OpCode.from_value(1) + self.assertTrue(isinstance(op, OpCode)) + self.assertEqual(op, OpCode.Ret) + + def test_memory_buffer_create_from_file(self): + source = self.get_test_binary() + + MemoryBuffer(filename=source) + + def test_memory_buffer_failing(self): + with self.assertRaises(Exception): + MemoryBuffer(filename="/hopefully/this/path/doesnt/exist") + diff --git a/bindings/python/llvm/tests/test_disassembler.py b/bindings/python/llvm/tests/test_disassembler.py new file mode 100644 index 0000000..545e866 --- /dev/null +++ b/bindings/python/llvm/tests/test_disassembler.py @@ -0,0 +1,28 @@ +from .base import TestBase + +from ..disassembler import Disassembler + +class TestDisassembler(TestBase): + def test_instantiate(self): + Disassembler('i686-apple-darwin9') + + def test_basic(self): + sequence = '\x67\xe3\x81' # jcxz -127 + triple = 'i686-apple-darwin9' + + disassembler = Disassembler(triple) + + count, s = disassembler.get_instruction(sequence) + self.assertEqual(count, 3) + self.assertEqual(s, '\tjcxz\t-127') + + def test_get_instructions(self): + sequence = '\x67\xe3\x81\x01\xc7' # jcxz -127; addl %eax, %edi + + disassembler = Disassembler('i686-apple-darwin9') + + instructions = list(disassembler.get_instructions(sequence)) + self.assertEqual(len(instructions), 2) + + self.assertEqual(instructions[0], (0, 3, '\tjcxz\t-127')) + self.assertEqual(instructions[1], (3, 2, '\taddl\t%eax, %edi')) diff --git a/bindings/python/llvm/tests/test_object.py b/bindings/python/llvm/tests/test_object.py new file mode 100644 index 0000000..7ff981b --- /dev/null +++ b/bindings/python/llvm/tests/test_object.py @@ -0,0 +1,67 @@ +from .base import TestBase +from ..object import ObjectFile +from ..object import Relocation +from ..object import Section +from ..object import Symbol + +class TestObjectFile(TestBase): + def get_object_file(self): + source = self.get_test_binary() + return ObjectFile(filename=source) + + def test_create_from_file(self): + self.get_object_file() + + def test_get_sections(self): + o = self.get_object_file() + + count = 0 + for section in o.get_sections(): + count += 1 + assert isinstance(section, Section) + assert isinstance(section.name, str) + assert isinstance(section.size, long) + assert isinstance(section.contents, str) + assert isinstance(section.address, long) + + self.assertGreater(count, 0) + + for section in o.get_sections(): + section.cache() + + def test_get_symbols(self): + o = self.get_object_file() + + count = 0 + for symbol in o.get_symbols(): + count += 1 + assert isinstance(symbol, Symbol) + assert isinstance(symbol.name, str) + assert isinstance(symbol.address, long) + assert isinstance(symbol.size, long) + assert isinstance(symbol.file_offset, long) + + self.assertGreater(count, 0) + + for symbol in o.get_symbols(): + symbol.cache() + + def test_symbol_section_accessor(self): + o = self.get_object_file() + + for symbol in o.get_symbols(): + section = symbol.section + assert isinstance(section, Section) + + break + + def test_get_relocations(self): + o = self.get_object_file() + for section in o.get_sections(): + for relocation in section.get_relocations(): + assert isinstance(relocation, Relocation) + assert isinstance(relocation.address, long) + assert isinstance(relocation.offset, long) + assert isinstance(relocation.type_number, long) + assert isinstance(relocation.type_name, str) + assert isinstance(relocation.value_string, str) |