You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

77 lines
2.2 KiB

#!/usr/bin/python
from imohash.imohash import hashfile as _imohash
import argparse
import hashlib
import os
import psutil
import sqlite3
# https://stackoverflow.com/a/17782753
def file_hash(path, block_size=4096*8):
'''
Block size directly depends on the block size of your filesystem
to avoid performances issues
Here I have blocks of 4096 octets (Default NTFS)
'''
_hash = hashlib.blake2b()
if os.path.getsize(path) < psutil.virtual_memory().available:
split = False
else:
split = True
with open(path,'rb') as f:
if split:
for chunk in iter(lambda: f.read(block_size), b''):
_hash.update(chunk)
else:
bytes = f.read()
_hash.update(bytes)
return path,_hash.digest()
def hashify(top):
old_dir = os.getcwd()
os.chdir(top)
ret = []
imo_hashes = set()
for root,dirs,files in os.walk('.'):
try:
for file in files:
filepath = os.path.join(root,file)
imohash = _imohash(filepath)
real_hash = None
if imohash in imo_hashes:
real_hash = file_hash(filepath)
ret.append(
(
filepath,
imohash,
real_hash
)
)
except PermissionError:
print('Access denied:',root)
except Exception as e:
print(e,file)
os.chdir(old_dir)
return ret
def __init_database__(path):
con = sqlite3.connect(path)
cur = con.cursor()
cur.execute('CREATE TABLE IF NOT EXISTS `paths` (`path` TEXT, `imohash` BLOB, `blake` BLOB, UNIQUE(`path`, `imohash`, `blake`));')
con.commit()
return con
def backup(top,db_path):
paths = hashify(top)
con = __init_database__(db_path)
cur = con.cursor()
cur.executemany('INSERT OR IGNORE INTO `paths` VALUES (?,?,?);',paths)
con.commit()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('dir')
parser.add_argument('-d','--database',default='fs.db')
args = parser.parse_args()
__init_database__(args.database)
backup(args.dir,args.database)