You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
#!/usr/bin/pythonfrom imohash.imohash import hashfile as _imohashimport argparseimport hashlibimport osimport psutilimport sqlite3
# https://stackoverflow.com/a/17782753def file_hash(path, block_size=4096*8): '''
Block size directly depends on the block size of your filesystem to avoid performances issues Here I have blocks of 4096 octets (Default NTFS) '''
_hash = hashlib.blake2b() if os.path.getsize(path) < psutil.virtual_memory().available: split = False else: split = True with open(path,'rb') as f: if split: for chunk in iter(lambda: f.read(block_size), b''): _hash.update(chunk) else: bytes = f.read() _hash.update(bytes) return _hash.digest()
def hashify(top): old_dir = os.getcwd() os.chdir(top) ret = [] imo_hashes = set() for root,dirs,files in os.walk('.'): try: for file in files: filepath = os.path.join(root,file) imohash = _imohash(filepath) real_hash = None if imohash in imo_hashes: print(filepath) real_hash = file_hash(filepath) else: imo_hashes.add(imohash) ret.append( ( filepath, imohash, real_hash ) ) except PermissionError: print('Access denied:',root) except Exception as e: print(e,file) os.chdir(old_dir) #print(imo_hashes) return ret
def __init_database__(path): con = sqlite3.connect(path) cur = con.cursor() cur.execute('CREATE TABLE IF NOT EXISTS `paths` (`path` TEXT, `imohash` BLOB, `blake` BLOB, UNIQUE(`path`, `imohash`, `blake`));') con.commit() return con
def backup(top,db_path): paths = hashify(top) con = __init_database__(db_path) cur = con.cursor() cur.executemany('INSERT OR IGNORE INTO `paths` VALUES (?,?,?);',paths) con.commit()
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('dir') parser.add_argument('-d','--database',default='fs.db') args = parser.parse_args() __init_database__(args.database) backup(args.dir,args.database)
|