Browse Source

using sqlite3 + blake as backup hash algo

restore_refactor
Raphael Roberts 7 years ago
parent
commit
5516ffcee9
  1. 69
      backup.py
  2. 0
      restore.py

69
backup.py

@ -0,0 +1,69 @@
from imohash.imohash import hashfile as _imohash
import hashlib
import hashlib
import multiprocessing
import os
import psutil
import sqlite3
# https://stackoverflow.com/a/17782753
def file_hash(path, block_size=4096*8):
'''
Block size directly depends on the block size of your filesystem
to avoid performances issues
Here I have blocks of 4096 octets (Default NTFS)
'''
_hash = hashlib.blake2b()
if os.path.getsize(path) < psutil.virtual_memory().available:
split = False
else:
split = True
with open(path,'rb') as f:
if split:
for chunk in iter(lambda: f.read(block_size), b''):
_hash.update(chunk)
else:
bytes = f.read()
_hash.update(bytes)
return path,_hash.digest()
def hashify(top):
old_dir = os.getcwd()
os.chdir(top)
ret = []
imo_hashes = set()
for root,dirs,files in os.walk('.'):
try:
for file in files:
filepath = os.path.join(root,file)
imohash = _imohash(filepath)
real_hash = None
if imohash in imo_hashes:
real_hash = file_hash(filepath)
print(file)
ret.append(
(
filepath,
imohash,
real_hash
)
)
except PermissionError:
print('Access denied:',root)
except Exception as e:
print(e,file)
os.chdir(old_dir)
return ret
def __init_database__(path):
con = sqlite3.connect(path)
cur = con.cursor()
cur.execute('CREATE TABLE IF NOT EXISTS `paths` (`path` TEXT, `imohash` BLOB, `blake` BLOB, UNIQUE(`path`, `imohash`, `blake`));')
con.commit()
return con
def backup(top,db_path):
paths = hashify(top)
con = __init_database__(db_path)
cur = con.cursor()
cur.executemany('INSERT OR IGNORE INTO `paths` VALUES (?,?,?);',paths)
con.commit()
if __name__ == "__main__":
backup(os.path.expandvars('%userprofile%'),'test.db')

0
restore.py

Loading…
Cancel
Save