#!/usr/bin/python from imohash.imohash import hashfile as _imohash import argparse import hashlib import os import psutil import sqlite3 # https://stackoverflow.com/a/17782753 def file_hash(path, block_size=4096*8): ''' Block size directly depends on the block size of your filesystem to avoid performances issues Here I have blocks of 4096 octets (Default NTFS) ''' _hash = hashlib.blake2b() if os.path.getsize(path) < psutil.virtual_memory().available: split = False else: split = True with open(path,'rb') as f: if split: for chunk in iter(lambda: f.read(block_size), b''): _hash.update(chunk) else: bytes = f.read() _hash.update(bytes) return _hash.digest() def backup(top,db_path): paths = hashify(top) con = __init_database__(db_path) cur = con.cursor() cur.executemany('INSERT OR IGNORE INTO `paths` VALUES (?,?,?);',paths) con.commit() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('dir') parser.add_argument('-d','--database',default='fs.db') args = parser.parse_args() __init_database__(args.database) backup(args.dir,args.database)