diff --git a/.sql.swp b/.sql.swp new file mode 100644 index 0000000..901c4f4 Binary files /dev/null and b/.sql.swp differ diff --git a/backup.py b/backup.py index e54d6f4..a2dd90e 100755 --- a/backup.py +++ b/backup.py @@ -27,44 +27,6 @@ def file_hash(path, block_size=4096*8): _hash.update(bytes) return _hash.digest() -def hashify(top): - old_dir = os.getcwd() - os.chdir(top) - ret = [] - imo_hashes = set() - for root,dirs,files in os.walk('.'): - try: - for file in files: - filepath = os.path.join(root,file) - imohash = _imohash(filepath) - real_hash = None - if imohash in imo_hashes: - print(filepath) - real_hash = file_hash(filepath) - else: - imo_hashes.add(imohash) - ret.append( - ( - filepath, - imohash, - real_hash - ) - ) - except PermissionError: - print('Access denied:',root) - except Exception as e: - print(e,file) - os.chdir(old_dir) - #print(imo_hashes) - return ret - -def __init_database__(path): - con = sqlite3.connect(path) - cur = con.cursor() - cur.execute('CREATE TABLE IF NOT EXISTS `paths` (`path` TEXT, `imohash` BLOB, `blake` BLOB, UNIQUE(`path`, `imohash`, `blake`));') - con.commit() - return con - def backup(top,db_path): paths = hashify(top) con = __init_database__(db_path) diff --git a/fs_class.py b/fs_class.py new file mode 100755 index 0000000..fe428a2 --- /dev/null +++ b/fs_class.py @@ -0,0 +1,134 @@ +#!/usr/bin/python +import sqlite3 +import functools +from imohash import hashfile as _imohash +def hash_file(path,hash_if_less,sample): + return _imohash(path,hash_if_less,sample) + +def hashify(top,hashfunc): + old_dir = os.getcwd() + os.chdir(top) + ret = [] + for root,dirs,files in os.walk('.'): + try: + for file in files: + filepath = os.path.join(root,file) + hash = hashfunc(filepath) + ret.append( + ( + filepath, + hash + ) + ) + except PermissionError: + print('Access denied:',root) + except Exception as e: + print(e,file) + os.chdir(old_dir) + return ret + +def __init_database__(con): + cur = con.cursor() + cur.execute('''CREATE TABLE IF NOT EXISTS `paths` ( + `path` TEXT, + `hash` BLOB, + `id` INTEGER)''') + cur.execute('''CREATE TABLE IF NOT EXISTS `backups` ( + `id` INTEGER, + `hash_threshold` INTEGER, + `sample_size` INTEGER, + `top` TEXT, + PRIMARY KEY(`id`) + );''') + con.commit() + +class fs: + + def __init__(self,top,db_path=None,hash_threshold=1024**2,sample_size=128*1024,id=None): + self.top=top + self.hash_threshold=hash_threshold + self.sample_size=sample_size + self.hash_files = {} + self.hashfunc = functools.partial( + hash_file, + hash_if_less=self.hash_threshold, + sample=self.sample_size + ) + if db_path is None: + self.db_path = 'fs.db' + else: + self.db_path=db_path + self.con = sqlite3.connect(self.db_path) + __init_database__(self.con) + self.cur = self.con.cursor() + + if id is None: + self.cur.execute('SELECT MAX(id) FROM backups') + id=self.cur.fetchone() + self.new = True + if id[0] is None: + self.id = 0 + else: + self.id = id[0]+1 + else: + self.new = False + self.cur.execute('SELECT id FROM backups WHERE id=?',[id]) + res = self.cur.fetchone() + if res[0] is None: + raise ValueError('id not found') + self.id = id + + + def write_to_db(self): + '''stores self.hash_files in database, along with a backup id, hash_func parameters, and top level dir''' + for hash,files in self.hash_files.items(): + for file in files: + self.cur.execute('INSERT INTO paths VALUES (?,?,?)',[file,hash,self.id]) + + params = { + 'id':self.id, + 'hash_threshold':self.hash_threshold, + 'sample_size':self.sample_size, + 'top':self.top, + } + + if self.new: + self.cur.execute('INSERT INTO backups VALUES (:id,:hash_threshold,:sample_size,:top)',params) + else: + self.cur.execute('UPDATE backups SET (:hash_threshold,:sample_size,:top) WHERE id=:id',params) + + self.con.commit() + + def read_from_db(self): + self.cur.execute('SELECT path,hash FROM paths WHERE id=?',[self.id]) + for path,hash in self.cur.fetchall(): + self.hash_files.setdefault(hash,[]).append(path) + + def id_from_top(self): + self.cur.execute('SELECT id FROM backups WHERE top=?',[self.top]) + res = self.cur.fetchall() + if len(res) > 0: + if len(res) == 1: + self.id = res[0][0] + else: + print('\n'.join(map(str,(row[0] for row in res)))) + which = input('Which one?: ') + id = int(which) + else: + print('Not found') + + def restore(self): + '''renames/copies all files in self.top to the order prescribed in database''' + self.read_from_db() + + def backup(self): + '''fills in self.hash_file {hash:[filepaths]}''' + paths = hashify(self.top,self.hashfunc) + for filepath,hash in paths: + self.hash_files.setdefault(hash,[]).append(filepath) + +if __name__ == "__main__": + import os + test = fs('.git') + # test.backup() + # test.write_to_db() diff --git a/restore.py b/restore.py index e13c094..505fbe8 100755 --- a/restore.py +++ b/restore.py @@ -7,11 +7,27 @@ import sqlite3 import time import time -def create_temp(): +def create_temp_dir(): name= 'fsb{}'.format(hex(time.time_ns())[2:]) os.mkdir(name) return name +def copy_file_temp(path): + name= 'fsb{}'.format(hex(time.time_ns())[2:]) + os.copy(path,name) + return name + +def super_rename(src,dst): + parent = + if not o + +def create_hash_lookup(top): + hashes = hasifiy(top) + ret = {} + for _hash in hashes: + path,*two_hashes = _hash + ret.setdefault(two_hashes,[]).append(path) + return ret def lookup(cur,imohash,realhash=None): if realhash is not None: cur.execute('SELECT path FROM paths where blake=?',[realhash]) @@ -23,9 +39,11 @@ def lookup(cur,imohash,realhash=None): def restore(database,source,destination): con=sqlite3.connect(database) cur=con.cursor() - temp = create_temp() + temp = create_hash_lookup(source) hashes = hashify(source) - for path,imohash,realhash in hashes: + for _hashes,paths in hashes.items(): + search_results = lookup(cur,*_hashes) + ppath = os.path.join(source,path) qpath=lookup(cur,imohash,realhash) qpath=os.path.join(temp,qpath)