Source code for mrcrowbar.statistics
from __future__ import annotations
import array
import math
from collections import Counter
from mrcrowbar.common import BytesReadType
[docs]class Stats:
"""Helper class for performing some basic statistical analysis on binary data."""
def __init__( self, buffer: BytesReadType ):
"""Generate a Stats instance for a byte string and analyse the data."""
self.samples = len( buffer )
# Python's Counter object uses a fast path
cc = Counter( buffer )
#: Byte histogram for the source data.
self.histo = array.array( "L", (cc.get( i, 0 ) for i in range( 256 )) )
#: Shanning entropy calculated for the source data.
self.entropy = 0.0
for count in self.histo:
if count != 0:
cover = count / self.samples
self.entropy += -cover * math.log2( cover )
[docs] def histogram( self, width: int ) -> list[int]:
if (256 % width) != 0:
raise ValueError( "Width of the histogram must be a divisor of 256" )
elif width <= 0:
raise ValueError( "Width of the histogram must be greater than zero" )
elif width > 256:
raise ValueError(
"Width of the histogram must be less than or equal to 256"
)
bucket = 256 // width
return [sum( self.histo[i : i + bucket] ) for i in range( 0, 256, bucket )]
[docs] def ansi_format( self, width: int = 64, height: int = 12 ) -> str:
"""Return a human readable ANSI-terminal printout of the stats.
width
Custom width for the graph (in characters).
height
Custom height for the graph (in characters).
"""
from mrcrowbar.ansi import format_bar_graph_iter
if (256 % width) != 0:
raise ValueError( "Width of the histogram must be a divisor of 256" )
elif width <= 0:
raise ValueError( "Width of the histogram must be greater than zero" )
elif width > 256:
raise ValueError(
"Width of the histogram must be less than or equal to 256"
)
buckets = self.histogram( width )
result: list[str] = []
for line in format_bar_graph_iter( buckets, width=width, height=height ):
result.append( f" {line}\n" )
result.append( f'╘{"═"*width}╛\n' )
result.append( f"entropy: {self.entropy:.10f}\n" )
result.append( f"samples: {self.samples}" )
return "".join( result )
[docs] def print( self, width: int = 64, height: int = 12 ):
"""Print the graphical version of the results produced by ansi_format()."""
print( self.ansi_format( width=width, height=height ) )
def __str__( self ):
return self.ansi_format()