|
|
#!/usr/bin/pythonimport sqlite3import functoolsfrom imohash import hashfile as _imohashdef hash_file(path,hash_if_less,sample): return _imohash(path,hash_if_less,sample)
def hashify(top,hashfunc): old_dir = os.getcwd() os.chdir(top) ret = [] for root,dirs,files in os.walk('.'): try: for file in files: filepath = os.path.join(root,file) hash = hashfunc(filepath) ret.append( ( filepath, hash ) ) except PermissionError: print('Access denied:',root) except Exception as e: print(e,file) os.chdir(old_dir) return ret
def __init_database__(con): cur = con.cursor() cur.execute('''CREATE TABLE IF NOT EXISTS `paths` (
`path` TEXT, `hash` BLOB, `id` INTEGER)''')
cur.execute('''CREATE TABLE IF NOT EXISTS `backups` (
`id` INTEGER, `hash_threshold` INTEGER, `sample_size` INTEGER, `top` TEXT, PRIMARY KEY(`id`) );''')
con.commit()
class fs:
def __init__(self,top,db_path=None,hash_threshold=1024**2,sample_size=128*1024,id=None): self.top=top self.hash_threshold=hash_threshold self.sample_size=sample_size self.hash_files = {} self.hashfunc = functools.partial( hash_file, hash_if_less=self.hash_threshold, sample=self.sample_size ) if db_path is None: self.db_path = 'fs.db' else: self.db_path=db_path self.con = sqlite3.connect(self.db_path) __init_database__(self.con) self.cur = self.con.cursor()
if id is None: self.cur.execute('SELECT MAX(id) FROM backups') id=self.cur.fetchone() self.new = True if id[0] is None: self.id = 0 else: self.id = id[0]+1 else: self.new = False self.cur.execute('SELECT id FROM backups WHERE id=?',[id]) res = self.cur.fetchone() if res[0] is None: raise ValueError('id not found') self.id = id
def write_to_db(self): '''stores self.hash_files in database, along with a backup id, hash_func parameters, and top level dir''' for hash,files in self.hash_files.items(): for file in files: self.cur.execute('INSERT INTO paths VALUES (?,?,?)',[file,hash,self.id])
params = { 'id':self.id, 'hash_threshold':self.hash_threshold, 'sample_size':self.sample_size, 'top':self.top, }
if self.new: self.cur.execute('INSERT INTO backups VALUES (:id,:hash_threshold,:sample_size,:top)',params) else: self.cur.execute('UPDATE backups SET (:hash_threshold,:sample_size,:top) WHERE id=:id',params)
self.con.commit()
def read_from_db(self): self.cur.execute('SELECT path,hash FROM paths WHERE id=?',[self.id]) for path,hash in self.cur.fetchall(): self.hash_files.setdefault(hash,[]).append(path)
def id_from_top(self): self.cur.execute('SELECT id FROM backups WHERE top=?',[self.top]) res = self.cur.fetchall() if len(res) > 0: if len(res) == 1: self.id = res[0][0] else: print('\n'.join(map(str,(row[0] for row in res)))) which = input('Which one?: ') id = int(which) else: print('Not found')
def restore(self): '''renames/copies all files in self.top to the order prescribed in database''' self.read_from_db()
def backup(self): '''fills in self.hash_file {hash:[filepaths]}''' paths = hashify(self.top,self.hashfunc) for filepath,hash in paths: self.hash_files.setdefault(hash,[]).append(filepath)
if __name__ == "__main__": import os test = fs('.git') # test.backup() # test.write_to_db()
|