You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
from imohash.imohash import hashfile as _imohashimport hashlibimport hashlibimport multiprocessingimport osimport psutilimport sqlite3# https://stackoverflow.com/a/17782753def file_hash(path, block_size=4096*8): '''
Block size directly depends on the block size of your filesystem to avoid performances issues Here I have blocks of 4096 octets (Default NTFS) '''
_hash = hashlib.blake2b() if os.path.getsize(path) < psutil.virtual_memory().available: split = False else: split = True with open(path,'rb') as f: if split: for chunk in iter(lambda: f.read(block_size), b''): _hash.update(chunk) else: bytes = f.read() _hash.update(bytes) return path,_hash.digest()
def hashify(top): old_dir = os.getcwd() os.chdir(top) ret = [] imo_hashes = set() for root,dirs,files in os.walk('.'): try: for file in files: filepath = os.path.join(root,file) imohash = _imohash(filepath) real_hash = None if imohash in imo_hashes: real_hash = file_hash(filepath) print(file) ret.append( ( filepath, imohash, real_hash ) ) except PermissionError: print('Access denied:',root) except Exception as e: print(e,file) os.chdir(old_dir) return retdef __init_database__(path): con = sqlite3.connect(path) cur = con.cursor() cur.execute('CREATE TABLE IF NOT EXISTS `paths` (`path` TEXT, `imohash` BLOB, `blake` BLOB, UNIQUE(`path`, `imohash`, `blake`));') con.commit() return condef backup(top,db_path): paths = hashify(top) con = __init_database__(db_path) cur = con.cursor() cur.executemany('INSERT OR IGNORE INTO `paths` VALUES (?,?,?);',paths) con.commit()if __name__ == "__main__": backup(os.path.expandvars('%userprofile%'),'test.db')
|