You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
77 lines
2.2 KiB
77 lines
2.2 KiB
#!/usr/bin/python
|
|
from imohash.imohash import hashfile as _imohash
|
|
import argparse
|
|
import hashlib
|
|
import os
|
|
import psutil
|
|
import sqlite3
|
|
|
|
# https://stackoverflow.com/a/17782753
|
|
def file_hash(path, block_size=4096*8):
|
|
'''
|
|
Block size directly depends on the block size of your filesystem
|
|
to avoid performances issues
|
|
Here I have blocks of 4096 octets (Default NTFS)
|
|
'''
|
|
_hash = hashlib.blake2b()
|
|
if os.path.getsize(path) < psutil.virtual_memory().available:
|
|
split = False
|
|
else:
|
|
split = True
|
|
with open(path,'rb') as f:
|
|
if split:
|
|
for chunk in iter(lambda: f.read(block_size), b''):
|
|
_hash.update(chunk)
|
|
else:
|
|
bytes = f.read()
|
|
_hash.update(bytes)
|
|
return path,_hash.digest()
|
|
|
|
def hashify(top):
|
|
old_dir = os.getcwd()
|
|
os.chdir(top)
|
|
ret = []
|
|
imo_hashes = set()
|
|
for root,dirs,files in os.walk('.'):
|
|
try:
|
|
for file in files:
|
|
filepath = os.path.join(root,file)
|
|
imohash = _imohash(filepath)
|
|
real_hash = None
|
|
if imohash in imo_hashes:
|
|
real_hash = file_hash(filepath)
|
|
ret.append(
|
|
(
|
|
filepath,
|
|
imohash,
|
|
real_hash
|
|
)
|
|
)
|
|
except PermissionError:
|
|
print('Access denied:',root)
|
|
except Exception as e:
|
|
print(e,file)
|
|
os.chdir(old_dir)
|
|
return ret
|
|
|
|
def __init_database__(path):
|
|
con = sqlite3.connect(path)
|
|
cur = con.cursor()
|
|
cur.execute('CREATE TABLE IF NOT EXISTS `paths` (`path` TEXT, `imohash` BLOB, `blake` BLOB, UNIQUE(`path`, `imohash`, `blake`));')
|
|
con.commit()
|
|
return con
|
|
|
|
def backup(top,db_path):
|
|
paths = hashify(top)
|
|
con = __init_database__(db_path)
|
|
cur = con.cursor()
|
|
cur.executemany('INSERT OR IGNORE INTO `paths` VALUES (?,?,?);',paths)
|
|
con.commit()
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('dir')
|
|
parser.add_argument('-d','--database',default='fs.db')
|
|
args = parser.parse_args()
|
|
__init_database__(args.database)
|
|
backup(args.dir,args.database)
|