Source code for scythe.file
from scythe.base import BaseSingleFileExtractor
from hashlib import sha512
from warnings import warn
import json
import os
try:
import magic
except ImportError as e:
if 'failed to find libmagic' in str(e):
warn('The libmagic library is not installed. '
'See: https://github.com/ahupp/python-magic#installation')
else:
warn('The python wrapper for libmagic is not installed. '
'If desired, call: https://github.com/ahupp/python-magic#installation')
magic = None
[docs]class GenericFileExtractor(BaseSingleFileExtractor):
"""Gather basic file information"""
def __init__(self, store_path=True, compute_hash=True):
"""
Args:
store_path (bool): Whether to record the path of the file
compute_hash (bool): Whether to compute the hash of a file
"""
super().__init__()
self.store_path = store_path
self.compute_hash = compute_hash
def _extract_file(self, path, context=None):
output = {
"length": os.path.getsize(path),
"filename": os.path.basename(path),
}
# If magic imported properly, use it
if magic is not None:
output["mime_type"] = magic.from_file(path, mime=True)
output["data_type"] = magic.from_file(path)
if self.store_path:
output['path'] = path
if self.compute_hash:
sha = sha512()
with open(path, 'rb') as fp:
while True:
data = fp.read(65536)
if not data:
break
sha.update(data)
output['sha512'] = sha.hexdigest()
return output
def implementors(self):
return ['Logan Ward']
def version(self):
return '0.0.1'
@property
def schema(self):
with open(os.path.join(os.path.dirname(__file__), 'schemas', 'file.json')) as fp:
return json.load(fp)