Source code for mrcrowbar.utils

"""General utility functions useful for reverse engineering."""

import io
import json
import logging
import math
import mmap
import re
import time
logger = logging.getLogger( __name__ )

from mrcrowbar import ansi, colour, encoding as enco, statistics, sound
from mrcrowbar.common import is_bytes, read, bounds

globals().update( enco._load_raw_types() )

[docs]def enable_logging( level='WARNING' ): """Enable sending logs to stderr. Useful for shell sessions. level Logging threshold, as defined in the logging module of the Python standard library. Defaults to 'WARNING'. """ log = logging.getLogger( 'mrcrowbar' ) log.setLevel( level ) out = logging.StreamHandler() out.setLevel( level ) form = logging.Formatter( '[%(levelname)s] %(name)s - %(message)s' ) out.setFormatter( form ) log.addHandler( out )
DIFF_COLOUR_MAP = (9, 10)
[docs]def grep_iter( pattern, source, encoding='utf8', fixed_string=False, hex_format=False, ignore_case=False ): """Return an iterator that finds the contents of a byte string that matches a regular expression pattern. pattern Pattern to match, as a Python string source Byte string to inspect encoding Convert strings in the pattern to a specific Python encoding (default: utf8) fixed_string Interpret the pattern as a fixed string (disable regular expressions) hex_format Interpret the pattern as raw hexidecimal (default: false) ignore_case Perform a case-insensitive search """ assert isinstance( pattern, str ) assert is_bytes( source ) flags = re.DOTALL if ignore_case: flags |= re.IGNORECASE regex = re.compile( enco.regex_pattern_to_bytes( pattern, encoding=encoding, fixed_string=fixed_string, hex_format=hex_format ), flags ) return regex.finditer( source )
[docs]def grep( pattern, source, encoding='utf8', fixed_string=False, hex_format=False, ignore_case=False ): """Find the contents of a byte string that matches a regular expression pattern. pattern Pattern to match, as a Python string source Source byte string to search encoding Convert strings in the pattern to a specific Python encoding (default: utf8) fixed_string Interpret the pattern as a fixed string (disable regular expressions) hex_format Interpret the pattern as raw hexidecimal (default: false) ignore_case Perform a case-insensitive search """ return [x for x in grep_iter( pattern, source, encoding, fixed_string, hex_format, ignore_case )]
[docs]def find_iter( substring, source, start=None, end=None, length=None, overlap=False, ignore_case=False, encodings=['utf_8'], brute=False, char_size=1 ): """Return an iterator that finds every location of a substring in a source byte string, checking against multiple encodings. substring Substring(s) to match, as a single Python string/byte string or a list source Source Python string or byte string to search start Start offset to read from (default: start) end End offset to stop reading at (default: end) length Length to read in (optional replacement for end) overlap Return overlapping matches (default: false) ignore_case Perform a case-insensitive search (default: false) encodings A list of encodings to try, or 'all' for every supported encoding (default: ['utf_8']) brute Brute-force an encoding based on recurring letter patterns (default: false) char_size Size in bytes for each character for brute-forcing (default: 1) """ start, end = bounds( start, end, length, len( source ) ) substrings = substring if isinstance( substring, list ) else [] if ignore_case: source = source.lower() substrings = [s.lower() for s in substrings] if not brute: for substring in substrings: subs = {} if isinstance( source, str ) and isinstance( substring, str ): subs[substring] = 'str' elif isinstance( source, bytes ) and isinstance( substring, bytes ): subs[substring] = 'bytes' elif not is_bytes( source ): raise TypeError( 'Source should be of type bytes, or both source and substring should be of type str!' ) else: if encodings == 'all' or encodings == ['all']: encodings = enco.CODECS elif isinstance( encodings, str ): encodings = [encodings] for encoding in encodings: # strip out the automatic byte-order mark encoding_test = encoding.lower().replace( ' ', '' ).replace( '-', '' ).replace( '_', '' ) if encoding_test == 'utf16': encoding = 'utf-16-le' elif encoding_test == 'utf32': encoding = 'utf-32-le' try: test = substring.encode( encoding ) if test not in subs: subs[test] = [] subs[test].append( encoding ) except UnicodeEncodeError: continue for test in subs: pointer = start result = source.find( test, pointer, end ) while result != -1: yield (result, result + len( test ), substring, subs[test]) if overlap: pointer = result + 1 else: pointer = result + len( test ) result = source.find( test, pointer, end ) else: matches = [] # create a mapping of substring letters to substrings letter_map = {} if len( substrings ) > 1: for index, sub in enumerate( substrings ): for letter in sub: if letter not in letter_map: letter_map[letter] = set() letter_map[letter].add( index ) for substring in substrings: submatches = [] unk_dict, pattern = enco.regex_unknown_encoding_match( substring, char_size=char_size ) if overlap: pattern = b'(?=(' + pattern + b'))' regex = re.compile( pattern, flags=re.DOTALL ) for match in regex.finditer( source[start:end] ): groups = match.groups() span_0, span_1 = match.span() if overlap: span_1 += len( groups[0] ) groups = groups[1:] submatches.append((span_0 + start, span_1 + start, substring, {key: groups[unk_dict[key]] for key in sorted( unk_dict.keys() )})) matches.append(submatches) # repeat until all eliminations are made changed = True while changed: changed = False # filter out matches that don't have a letter occuring in all substrings for letter, subs in letter_map.items(): subs = list( subs ) if len( subs ) == 1: continue # find the letter mappings that appear in all substrings mappings = set( m[3][letter] for m in matches[subs[0]] ) for sub_id in subs[1:]: mappings.intersection_update( set( m[3][letter] for m in matches[sub_id] ) ) # remove the letter mappings which are unique for sub_id in subs: new_matches = [m for m in matches[sub_id] if m[3][letter] in mappings] if len( new_matches ) != len( matches[sub_id] ): changed = True matches[sub_id] = new_matches for submatches in matches: yield from submatches
[docs]def find( substring, source, start=None, end=None, length=None, overlap=False, ignore_case=False, encodings=['utf_8'], brute=False, char_size=1 ): """Find every location of a substring in a source byte string, checking against multiple encodings. substring Substring to match, as a single Python string/byte string or a list source Source Python string or byte string to search start Start offset to read from (default: start) end End offset to stop reading at (default: end) length Length to read in (optional replacement for end) overlap Return overlapping matches (default: false) ignore_case Perform a case-insensitive search encodings A list of encodings to try, or 'all' for every supported encoding (default: ['utf_8']) brute Brute-force an encoding based on recurring letter patterns (default: false) char_size Size in bytes for each character for brute-forcing (default: 1) """ return [x for x in find_iter( substring, source, start, end, length, overlap, ignore_case, encodings, brute, char_size )]
[docs]def grepdump_iter( pattern, source, start=None, end=None, length=None, encoding='utf8', fixed_string=False, hex_format=False, ignore_case=False, major_len=8, minor_len=4, colour=True, address_base=None, before=2, after=2, title=None, format='hex' ): """Return an iterator that finds the contents of a byte string that matches a regular expression pattern and renders the result. pattern Pattern to match, as a Python string source The byte string to print. start Start offset to read from (default: start) end End offset to stop reading at (default: end) length Length to read in (optional replacement for end) encoding Convert strings in the pattern to a specific Python encoding (default: utf8) fixed_string Interpret the pattern as a fixed string (disable regular expressions) hex_format Interpret the pattern as raw hexidecimal (default: false) ignore_case Perform a case-insensitive search major_len Number of hexadecimal groups per line minor_len Number of bytes per hexadecimal group colour Add ANSI colour formatting to output (default: true) address_base Base address to use for labels (default: start) before Number of lines of context preceeding a match to show after Number of lines of context following a match to show title Name to print as a heading if there's a match. Useful for file names. format Output format; one of 'hex', 'text' or 'json' (default: 'hex') Raises ValueError if both end and length are defined. """ assert is_bytes( source ) start, end = bounds( start, end, length, len( source ) ) if len( source ) == 0 or (start == end == 0): return address_base_offset = address_base-start if address_base is not None else 0 stride = minor_len*major_len assert format in ('hex', 'text', 'json') regex_iter = grep_iter( pattern, source[start:end], encoding, fixed_string, hex_format, ignore_case ) if format == 'hex': hb = ansi.HexdumpHighlightBuffer( source, start, end, None, major_len, minor_len, colour, address_base, before, after, title ) for match in regex_iter: hb.add_span( (match.span()[0]+start, match.span()[1]+start) ) yield from hb.flush() yield from hb.flush( final=True ) elif format == 'text': for match in regex_iter: start_off = match.span()[0]+start+address_base_offset end_off = match.span()[1]+start+address_base_offset digits = '{:0'+str( max( 8, math.floor( math.log( end+address_base_offset )/math.log( 16 ) ) ) )+'x}' line = (digits+':'+digits).format( start_off, end_off ) line += ':{}'.format( repr( match.group( 0 ) ) ) if title: line = '{}:'.format( title ) + line yield line elif format == 'json': for match in regex_iter: yield json.dumps({ 'title': title, 'start_offset': match.span()[0] + start + address_base_offset, 'end_offset': match.span()[1] + start + address_base_offset, 'match': match.group( 0 ).hex() })
[docs]def grepdump( pattern, source, start=None, end=None, length=None, encoding='utf8', fixed_string=False, hex_format=False, ignore_case=False, major_len=8, minor_len=4, colour=True, address_base=None, before=2, after=2, title=None, format='hex' ): """Find the contents of a byte string that matches a regular expression pattern and renders the result. pattern Pattern to match, as a Python string source The byte string to print. start Start offset to read from (default: start) end End offset to stop reading at (default: end) length Length to read in (optional replacement for end) encoding Convert strings in the pattern to a specific Python encoding (default: utf8) fixed_string Interpret the pattern as a fixed string (disable regular expressions) hex_format Interpret the pattern as raw hexidecimal (default: false) ignore_case Perform a case-insensitive search major_len Number of hexadecimal groups per line minor_len Number of bytes per hexadecimal group colour Add ANSI colour formatting to output (default: true) address_base Base address to use for labels (default: start) before Number of lines of context preceeding a match to show after Number of lines of context following a match to show title Name to print as a heading if there's a match. Useful for file names. format Output format; one of 'hex', 'text' or 'json' (default: 'hex') Raises ValueError if both end and length are defined. """ for line in grepdump_iter( pattern, source, start, end, length, encoding, fixed_string, hex_format, ignore_case, major_len, minor_len, colour, address_base, before, after, title, format ): print( line )
[docs]def search_iter( pattern, source, prefix='source', depth=None, encoding='utf8', fixed_string=False, hex_format=False, ignore_case=False ): """Return an iterator that finds the Fields inside a Block that match a regular expression pattern. pattern Pattern to match, as a Python string source Block object to inspect encoding Convert strings in the pattern to a specific Python encoding (default: utf8) fixed_string Interpret the pattern as a fixed string (disable regular expressions) hex_format Interpret the pattern as raw hexidecimal (default: false) ignore_case Perform a case-insensitive search """ from mrcrowbar.models import Block, Chunk contains = False depth = depth-1 if depth is not None else None match_list = [x.span() for x in grep( pattern, source.export_data(), encoding, fixed_string, hex_format, ignore_case )] fields = source.get_field_names() def check_field( offset, size, data, pref ): for match in match_list: if offset <= match[0] < offset + size or offset <= match[1] < offset+size: if isinstance( data, Block ): success = False for x in search_iter( pattern, data, pref, depth, encoding, fixed_string, hex_format, ignore_case ): success = True yield x if not success: # no exact match, yield entire object yield pref elif isinstance( data, Chunk ): success = False for x in search_iter( pattern, data.obj, '{}.obj'.format( pref ), depth, encoding, fixed_string, hex_format, ignore_case ): success = True yield x if not success: yield pref else: yield pref break for name in fields: offset = source.get_field_start_offset( name ) size = source.get_field_size( name ) data = getattr( source, name ) if type( data ) == list: # field contents is a list, check each of the individual elements el_offset = offset for i, el in enumerate( data ): el_size = source.get_field_size( name, index=i ) yield from check_field( el_offset, el_size, el, '{}.{}[{}]'.format( prefix, name, i ) ) el_offset += el_size else: yield from check_field( offset, size, data, '{}.{}'.format( prefix, name ) )
[docs]def finddump_iter( substring, source, start=None, end=None, length=None, overlap=False, ignore_case=False, encodings=['utf_8'], brute=False, char_size=1, major_len=8, minor_len=4, colour=True, address_base=None, before=2, after=2, title=None, format='hex' ): """Return an iterator that finds every location of a substring in a source byte string, checking against multiple encodings, and renders the result. substring Substring(s) to match, as a single Python string/byte string or a list source Source byte string or Python string to search. start Start offset to read from (default: start) end End offset to stop reading at (default: end) length Length to read in (optional replacement for end) overlap Whether to return overlapping matches (default: false) ignore_case Perform a case-insensitive search encodings A list of encodings to try, or 'all' for every supported encoding (default: ['utf_8']) brute Brute-force an encoding based on recurring letter patterns (default: false) char_size Size in bytes for each character for brute-forcing (default: 1) major_len Number of hexadecimal groups per line minor_len Number of bytes per hexadecimal group colour Add ANSI colour formatting to output (default: true) address_base Base address to use for labels (default: start) before Number of lines of context preceeding a match to show after Number of lines of context following a match to show title Name to print as a heading if there's a match. Useful for file names. format Output format; one of 'hex', 'text' or 'json' (default: 'hex') """ assert is_bytes( source ) start, end = bounds( start, end, length, len( source ) ) if len( source ) == 0 or (start == end == 0): return address_base_offset = address_base-start if address_base is not None else 0 stride = minor_len*major_len assert format in ('hex', 'text', 'json') findings = find_iter( substring, source, start, end, None, overlap, ignore_case, encodings, brute, char_size ) if format == 'hex': for match in findings: if isinstance( match[3], dict ): header = repr( match[2] ) + ' - ' + repr( {k: v.hex() for k, v in match[3].items()} ) else: header = repr( match[2] ) + ' - ' + repr( match[3] ) if title: header = title + ' - ' + header print( header ) hb = ansi.HexdumpHighlightBuffer( source, start, end, None, major_len, minor_len, colour, address_base, before, after ) hb.add_span( (match[0], match[1]) ) yield from hb.flush( final=True ) elif format == 'text': for match in findings: start_off = match[0] + address_base_offset end_off = match[1] + address_base_offset digits = '{:0'+str( max( 8, math.floor( math.log( end + address_base_offset )/math.log( 16 ) ) ) )+'x}' line = (digits+':'+digits).format( start_off, end_off ) line += ':{}'.format( repr( source[match[0]:match[1]] ) ) if isinstance( match[3], list ): line += ':{}'.format( repr( match[3] ) ) elif isinstance( match[3], dict ): line += ':{}'.format( repr( {k: v.hex() for k, v in match[3].items()} ) ) if title: line = '{}:'.format( title ) + line yield line elif format == 'json': for match in findings: result = { 'title': title, 'start_offset': match[0] + address_base_offset, 'end_offset': match[1] + address_base_offset, 'match': source[match[0]:match[1]].hex(), } if isinstance( match[3], list ): result['encodings'] = match[3] elif isinstance( match[3], dict ): result['alphabet'] = {k: v.hex() for k, v in match[3].items()} yield json.dumps(result)
[docs]def finddump( substring, source, start=None, end=None, length=None, overlap=False, ignore_case=False, encodings=['utf_8'], brute=False, char_size=1, major_len=8, minor_len=4, colour=True, address_base=None, before=2, after=2, title=None, format='hex' ): """Return an iterator that finds every location of a substring in a source byte string, checking against multiple encodings, and renders the result. substring Substring(s) to match, as a single Python string/byte string or a list source Source byte string or Python string to search. start Start offset to read from (default: start) end End offset to stop reading at (default: end) length Length to read in (optional replacement for end) overlap Whether to return overlapping matches (default: false) ignore_case Perform a case-insensitive search encodings A list of encodings to try, or 'all' for every supported encoding (default: ['utf_8']) brute Brute-force an encoding based on recurring letter patterns (default: false) char_size Size in bytes for each character for brute-forcing (default: 1) major_len Number of hexadecimal groups per line minor_len Number of bytes per hexadecimal group colour Add ANSI colour formatting to output (default: true) address_base Base address to use for labels (default: start) before Number of lines of context preceeding a match to show after Number of lines of context following a match to show title Name to print as a heading if there's a match. Useful for file names. format Output format; one of 'hex', 'text' or 'json' (default: 'hex') """ for x in finddump_iter( substring, source, start, end, length, overlap, ignore_case, encodings, brute, char_size, major_len, minor_len, colour, address_base, before, after, title, format ): print( x )
[docs]def diff( source1, source2, start=None, end=None ): """Perform a diff between two equal-sized binary strings and return a list of (offset, size) tuples denoting the differences. source1 The first byte string source. source2 The second byte string source. start Start offset to read from (default: start) end End offset to stop reading at (default: end) """ start = start if start is not None else 0 end = end if end is not None else min( len( source1 ), len( source2 ) ) end_point = min( end, len( source1 ), len( source2 ) ) pointer = start diff_start = None results = [] while pointer < end_point: if source1[pointer] != source2[pointer]: if diff_start is None: diff_start = pointer else: if diff_start is not None: results.append( (diff_start, pointer-diff_start) ) diff_start = None pointer += 1 if diff_start is not None: results.append( (diff_start, pointer-diff_start) ) diff_start = None return results
[docs]def objdiff_iter( source1, source2, prefix='source', depth=None ): """Return an iterator that finds differences between two objects. source1 The first source. source2 The second source. prefix The name of the base element to display. depth Maximum number of levels to traverse. """ depth = depth-1 if depth is not None else None def abbr( src ): return src if type( src ) in (int, float, str, bytes, bytearray) else (src.__class__.__module__, src.__class__.__name__) if (type( source1 ) != type( source2 )) and not (is_bytes( source1 ) and is_bytes( source2 )): yield (prefix, abbr( source1 ), abbr( source2 )) else: if type( source1 ) == list: for i in range( max( len( source1 ), len( source2 ) ) ): prefix_mod = prefix+'[{}]'.format( i ) if i < len( source1 ) and i < len( source2 ): yield from objdiff_iter( source1[i], source2[i], prefix=prefix_mod, depth=depth ) elif i >= len( source2 ): yield (prefix_mod, abbr( source1[i] ), None) else: yield (prefix_mod, None, abbr( source2[i] )) elif is_bytes( source1 ): if source1 != source2: yield( prefix, source1, source2 ) elif type( source1 ) in (int, float, str): if source1 != source2: yield (prefix, source1, source2) elif hasattr( source1, 'serialised' ): # Block s1 = source1.serialised s2 = source2.serialised if s1 != s2 and depth is not None and depth <= 0: yield (prefix, source1, source2) else: assert s1[0] == s2[0] assert len( s1[1] ) == len( s2[1] ) for i in range( len( s1[1] ) ): assert s1[1][i][0] == s2[1][i][0] if s1[1][i][1] != s2[1][i][1]: yield from objdiff_iter( getattr( source1, s1[1][i][0] ), getattr( source2, s1[1][i][0] ), prefix='{}.{}'.format( prefix, s1[1][i][0] ), depth=depth ) else: if source1 != source2: yield (prefix, source1, source2)
[docs]def objdiff( source1, source2, prefix='source', depth=None ): """Find differences between two objects. source1 The first source. source2 The second source. prefix The name of the base element to display. depth Maximum number of levels to traverse. """ return [x for x in objdiff_iter( source1, source2, prefix, depth )]
[docs]def objdiffdump_iter( source1, source2, prefix='source', depth=None ): """Return an iterator that renders a list of differences between two objects. source1 First source object source2 Second source object prefix The name of the base element to display. depth Maximum number of levels to traverse. """ same = True for p, s1, s2 in diff_iter( source1, source2, prefix, depth ): if is_bytes( s1 ) and is_bytes( s2 ): yield '* {}:'.format( p ) yield from diffdump_iter( s1, s2 ) same = False continue if s1 is not None: yield ansi.format_string( '- {}: {}'.format( p, s1 ), foreground=DIFF_COLOUR_MAP[0] ) same = False if s2 is not None: yield ansi.format_string( '+ {}: {}'.format( p, s2 ), foreground=DIFF_COLOUR_MAP[1] ) same = False return same
[docs]def objdiffdump( source1, source2, prefix='source', depth=None ): """Print a list of differences between two objects. source1 First source object source2 Second source object prefix The name of the base element to display. depth Maximum number of levels to traverse. """ for line in objdiffdump_iter( source1, source2, prefix, depth ): print( line )
[docs]def histdump_iter( source, start=None, end=None, length=None, samples=0x10000, width=64, address_base=None ): """Return an iterator that renders a histogram of a byte string. source Source byte string to measure start Start offset to read from (default: start) end End offset to stop reading at (default: end) length Length to read in (optional replacement for end) samples Number of samples per histogram slice (default: 0x10000) width Width of rendered histogram (default: 64) address_base Base address to use for labelling (default: start) """ assert is_bytes( source ) start, end = bounds( start, end, length, len( source ) ) if len( source ) == 0 or (start == end == 0): return address_base_offset = address_base-start if address_base is not None else 0 for offset in range( start, end, samples ): yield ansi.format_histdump_line( source, offset, length=samples, end=end, width=width, address_base_offset=address_base_offset ) return
[docs]def histdump( source, start=None, end=None, length=None, samples=0x10000, width=64, address_base=None ): """Print a histogram of a byte string. source Source byte string to measure start Start offset to read from (default: start) end End offset to stop reading at (default: end) length Length to read in (optional replacement for end) samples Number of samples per histogram slice (default: 0x10000) width Width of rendered histogram (default: 64) address_base Base address to use for labelling (default: start) """ for line in histdump_iter( source, start, end, length, samples, width, address_base ): print( line )
[docs]def hexdump_iter( source, start=None, end=None, length=None, major_len=8, minor_len=4, colour=True, address_base=None, show_offsets=True, show_glyphs=True ): """Return an iterator that renders a byte string in tabular hexadecimal/ASCII format. source Source byte string to render start Start offset to read from (default: start) end End offset to stop reading at (default: end) length Length to read in (optional replacement for end) major_len Number of hexadecimal groups per line minor_len Number of bytes per hexadecimal group colour Add ANSI colour formatting to output (default: true) address_base Base address to use for labels (default: start) show_offsets Display offsets at the start of each line (default: true) show_glyphs Display glyph map at the end of each line (default: true) Raises ValueError if both end and length are defined. """ assert is_bytes( source ) start, end = bounds( start, end, length, len( source ) ) if len( source ) == 0 or (start == end == 0): return address_base_offset = address_base-start if address_base is not None else 0 for offset in range( start, end, minor_len*major_len ): yield ansi.format_hexdump_line( source, offset, end, major_len, minor_len, colour, address_base_offset=address_base_offset, show_offsets=show_offsets, show_glyphs=show_glyphs ) return
[docs]def hexdump( source, start=None, end=None, length=None, major_len=8, minor_len=4, colour=True, address_base=None, show_offsets=True, show_glyphs=True ): """Print a byte string in tabular hexadecimal/ASCII format. source Source byte string to print start Start offset to read from (default: start) end End offset to stop reading at (default: end) length Length to read in (optional replacement for end) major_len Number of hexadecimal groups per line minor_len Number of bytes per hexadecimal group colour Add ANSI colour formatting to output (default: true) address_base Base address to use for labels (default: start) show_offsets Display offsets at the start of each line (default: true) show_glyphs Display glyph map at the end of each line (default: true) Raises ValueError if both end and length are defined. """ for line in hexdump_iter( source, start, end, length, major_len, minor_len, colour, address_base, show_offsets, show_glyphs ): print( line )
[docs]def diffdump_iter( source1, source2, start=None, end=None, length=None, major_len=8, minor_len=4, colour=True, before=2, after=2, address_base=None ): """Return an iterator that renders the differences between two byte strings. source1 First byte string source source2 Second byte string source start Start offset to read from (default: start) end End offset to stop reading at (default: end) length Length to read in (optional replacement for end) major_len Number of hexadecimal groups per line minor_len Number of bytes per hexadecimal group colour Add ANSI colour formatting to output (default: true) before Number of lines of context preceeding a match to show after Number of lines of context following a match to show address_base Base address to use for labels (default: start) Raises ValueError if both end and length are defined. """ stride = minor_len*major_len start = start if start is not None else 0 end = end if end is not None else max( len( source1 ), len( source2 ) ) start = max( start, 0 ) end = min( end, max( len( source1 ), len( source2 ) ) ) address_base_offset = address_base-start if address_base is not None else 0 diff_lines = [] for offset in range( start, end, stride ): if source1[offset:offset+stride] != source2[offset:offset+stride]: diff_lines.append( offset ) show_all = before is None or after is None if show_all: show_lines = {x: (2 if x in diff_lines else 1) for x in range( start, end, stride )} else: show_lines = {x: (2 if x in diff_lines else 0) for x in range( start, end, stride )} for index in diff_lines: for b in [index-(x+1)*stride for x in range( before )]: if b in show_lines and show_lines[b] == 0: show_lines[b] = 1 for a in [index+(x+1)*stride for x in range( after )]: if a in show_lines and show_lines[a] == 0: show_lines[a] = 1 skip = False for offset in sorted( show_lines.keys() ): if skip == True and show_lines[offset] != 0: yield '...' skip = False if show_lines[offset] == 2: check = diff( source1, source2, start=offset, end=offset+stride ) highlights = {} for (o, l) in check: for i in range( o, o+l ): highlights[i] = DIFF_COLOUR_MAP[0] if offset < len( source1 ): yield ansi.format_hexdump_line( source1, offset, min( end, len( source1 ) ), major_len, minor_len, colour, prefix='-', highlight_addr=DIFF_COLOUR_MAP[0], highlight_map=highlights, address_base_offset=address_base_offset ) highlights = {k: DIFF_COLOUR_MAP[1] for k in highlights.keys()} if offset < len( source2 ): yield ansi.format_hexdump_line( source2, offset, min( end, len( source2 ) ), major_len, minor_len, colour, prefix='+' , highlight_addr=DIFF_COLOUR_MAP[1], highlight_map=highlights, address_base_offset=address_base_offset ) elif show_lines[offset] == 1: yield ansi.format_hexdump_line( source1, offset, end, major_len, minor_len, colour, prefix=' ', address_base_offset=address_base_offset ) elif show_lines[offset] == 0: skip = True if skip == True: yield '...' skip = False return
[docs]def diffdump( source1, source2, start=None, end=None, length=None, major_len=8, minor_len=4, colour=True, before=2, after=2, address_base=None ): """Print the differences between two byte strings. source1 First byte string source source2 Second byte string source start Start offset to read from (default: start) end End offset to stop reading at (default: end) length Length to read in (optional replacement for end) major_len Number of hexadecimal groups per line minor_len Number of bytes per hexadecimal group colour Add ANSI colour formatting to output (default: true) before Number of lines of context preceeding a match to show after Number of lines of context following a match to show address_base Base address to use for labels (default: start) Raises ValueError if both end and length are defined. """ for line in diffdump_iter( source1, source2, start, end, length, major_len, minor_len, colour, before, after, address_base ): print( line )
[docs]def stats( source, start=None, end=None, length=None, width=64, height=12 ): """Print histogram graph for a byte string. source Source byte string to render start Start offset to read from (default: start) end End offset to stop reading at (default: end) width Width of graph to render in pixels (default: 64) height Height of graph to render in pixels (default: auto) """ start, end = bounds( start, end, length, len( source ) ) stat = statistics.Stats( source[start:end] ) stat.print( width=width, height=height )
[docs]def pixdump_iter( source, start=None, end=None, length=None, width=64, height=None, palette=None ): """Return an iterator which renders the contents of a byte string as a 256 colour image. source Source byte string to render start Start offset to read from (default: start) end End offset to stop reading at (default: end) length Length to read in (optional replacement for end) width Width of image to render in pixels (default: 64) height Height of image to render in pixels (default: auto) palette List of Colours to use (default: test palette) """ assert is_bytes( source ) if not palette: palette = colour.TEST_PALETTE start = 0 if (start is None) else start if (end is not None) and (length is not None): raise ValueError( 'Can\'t define both an end and a length!' ) elif (length is not None): end = start+length elif (end is not None): pass else: end = len( source ) start = max( start, 0 ) end = min( end, len( source ) ) if len( source ) == 0 or (start == end == 0): return iter(()) if height is None: height = math.ceil( (end-start)/width ) def data_fetch( x_pos, y_pos, frame ): index = y_pos*width + x_pos + start if index >= end: return colour.Transparent() return palette[source[index]] return ansi.format_image_iter( data_fetch, width=width, height=height )
[docs]def pixdump( source, start=None, end=None, length=None, width=64, height=None, palette=None ): """Print the contents of a byte string as a 256 colour image. source Source byte string to print start Start offset to read from (default: start) end End offset to stop reading at (default: end) length Length to read in (optional replacement for end) width Width of image to render in pixels (default: 64) height Height of image to render in pixels (default: auto) palette List of Colours to use (default: test palette) """ for line in pixdump_iter( source, start, end, length, width, height, palette ): print( line )
[docs]def pixdump_sweep( source, range=(64,), delay=None, start=None, end=None, length=None, height=None, palette=None ): """Test printing the contents of a byte string as a 256 colour image for a range of widths. source The byte string to print. range List of widths to render (default: [64]) delay Number of seconds to wait between each print (default: 0) start Start offset to read from (default: start) end End offset to stop reading at (default: end) length Length to read in (optional replacement for end) height Height of image to render in pixels (default: auto) palette List of Colours to use (default: test palette) """ for w in range: print( w ) for line in pixdump_iter( source, start, end, length, w, height, palette ): print( line ) print() if delay is not None: time.sleep( delay )