From 5516ffcee94091672719aa7397d7e21c780a3d07 Mon Sep 17 00:00:00 2001 From: Raphael Roberts Date: Tue, 13 Nov 2018 22:11:54 -0600 Subject: [PATCH] using sqlite3 + blake as backup hash algo --- backup.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ restore.py | 0 2 files changed, 69 insertions(+) create mode 100644 backup.py create mode 100644 restore.py diff --git a/backup.py b/backup.py new file mode 100644 index 0000000..becdb60 --- /dev/null +++ b/backup.py @@ -0,0 +1,69 @@ +from imohash.imohash import hashfile as _imohash +import hashlib +import hashlib +import multiprocessing +import os +import psutil +import sqlite3 +# https://stackoverflow.com/a/17782753 +def file_hash(path, block_size=4096*8): + ''' + Block size directly depends on the block size of your filesystem + to avoid performances issues + Here I have blocks of 4096 octets (Default NTFS) + ''' + _hash = hashlib.blake2b() + if os.path.getsize(path) < psutil.virtual_memory().available: + split = False + else: + split = True + with open(path,'rb') as f: + if split: + for chunk in iter(lambda: f.read(block_size), b''): + _hash.update(chunk) + else: + bytes = f.read() + _hash.update(bytes) + return path,_hash.digest() + +def hashify(top): + old_dir = os.getcwd() + os.chdir(top) + ret = [] + imo_hashes = set() + for root,dirs,files in os.walk('.'): + try: + for file in files: + filepath = os.path.join(root,file) + imohash = _imohash(filepath) + real_hash = None + if imohash in imo_hashes: + real_hash = file_hash(filepath) + print(file) + ret.append( + ( + filepath, + imohash, + real_hash + ) + ) + except PermissionError: + print('Access denied:',root) + except Exception as e: + print(e,file) + os.chdir(old_dir) + return ret +def __init_database__(path): + con = sqlite3.connect(path) + cur = con.cursor() + cur.execute('CREATE TABLE IF NOT EXISTS `paths` (`path` TEXT, `imohash` BLOB, `blake` BLOB, UNIQUE(`path`, `imohash`, `blake`));') + con.commit() + return con +def backup(top,db_path): + paths = hashify(top) + con = __init_database__(db_path) + cur = con.cursor() + cur.executemany('INSERT OR IGNORE INTO `paths` VALUES (?,?,?);',paths) + con.commit() +if __name__ == "__main__": + backup(os.path.expandvars('%userprofile%'),'test.db') \ No newline at end of file diff --git a/restore.py b/restore.py new file mode 100644 index 0000000..e69de29