|
|
|
@ -0,0 +1,69 @@ |
|
|
|
from imohash.imohash import hashfile as _imohash |
|
|
|
import hashlib |
|
|
|
import hashlib |
|
|
|
import multiprocessing |
|
|
|
import os |
|
|
|
import psutil |
|
|
|
import sqlite3 |
|
|
|
# https://stackoverflow.com/a/17782753 |
|
|
|
def file_hash(path, block_size=4096*8): |
|
|
|
''' |
|
|
|
Block size directly depends on the block size of your filesystem |
|
|
|
to avoid performances issues |
|
|
|
Here I have blocks of 4096 octets (Default NTFS) |
|
|
|
''' |
|
|
|
_hash = hashlib.blake2b() |
|
|
|
if os.path.getsize(path) < psutil.virtual_memory().available: |
|
|
|
split = False |
|
|
|
else: |
|
|
|
split = True |
|
|
|
with open(path,'rb') as f: |
|
|
|
if split: |
|
|
|
for chunk in iter(lambda: f.read(block_size), b''): |
|
|
|
_hash.update(chunk) |
|
|
|
else: |
|
|
|
bytes = f.read() |
|
|
|
_hash.update(bytes) |
|
|
|
return path,_hash.digest() |
|
|
|
|
|
|
|
def hashify(top): |
|
|
|
old_dir = os.getcwd() |
|
|
|
os.chdir(top) |
|
|
|
ret = [] |
|
|
|
imo_hashes = set() |
|
|
|
for root,dirs,files in os.walk('.'): |
|
|
|
try: |
|
|
|
for file in files: |
|
|
|
filepath = os.path.join(root,file) |
|
|
|
imohash = _imohash(filepath) |
|
|
|
real_hash = None |
|
|
|
if imohash in imo_hashes: |
|
|
|
real_hash = file_hash(filepath) |
|
|
|
print(file) |
|
|
|
ret.append( |
|
|
|
( |
|
|
|
filepath, |
|
|
|
imohash, |
|
|
|
real_hash |
|
|
|
) |
|
|
|
) |
|
|
|
except PermissionError: |
|
|
|
print('Access denied:',root) |
|
|
|
except Exception as e: |
|
|
|
print(e,file) |
|
|
|
os.chdir(old_dir) |
|
|
|
return ret |
|
|
|
def __init_database__(path): |
|
|
|
con = sqlite3.connect(path) |
|
|
|
cur = con.cursor() |
|
|
|
cur.execute('CREATE TABLE IF NOT EXISTS `paths` (`path` TEXT, `imohash` BLOB, `blake` BLOB, UNIQUE(`path`, `imohash`, `blake`));') |
|
|
|
con.commit() |
|
|
|
return con |
|
|
|
def backup(top,db_path): |
|
|
|
paths = hashify(top) |
|
|
|
con = __init_database__(db_path) |
|
|
|
cur = con.cursor() |
|
|
|
cur.executemany('INSERT OR IGNORE INTO `paths` VALUES (?,?,?);',paths) |
|
|
|
con.commit() |
|
|
|
if __name__ == "__main__": |
|
|
|
backup(os.path.expandvars('%userprofile%'),'test.db') |