From 121b6e31513f22d600318e65a8564dda681efefc Mon Sep 17 00:00:00 2001 From: Raphael Roberts Date: Thu, 6 Dec 2018 16:59:25 -0600 Subject: [PATCH] added read_from_db,id_from_top, and checker to ensure id is in dataset --- fs_class.py | 77 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 18 deletions(-) diff --git a/fs_class.py b/fs_class.py index aa09383..fe428a2 100755 --- a/fs_class.py +++ b/fs_class.py @@ -29,21 +29,25 @@ def hashify(top,hashfunc): def __init_database__(con): cur = con.cursor() - cur.execute('''CREATE TABLE IF NOT EXISTS `PATHS` ( - `PATH` TEXT, - `HASH` BLOB, - `ID` INTEGER)''') - cur.execute('''CREATE TABLE IF NOT EXISTS `BACKUPS` ( - `ID` INTEGER, - `HASH_THRESHOLD` INTEGER, - `SAMPLE_SIZE` INTEGER)''') + cur.execute('''CREATE TABLE IF NOT EXISTS `paths` ( + `path` TEXT, + `hash` BLOB, + `id` INTEGER)''') + cur.execute('''CREATE TABLE IF NOT EXISTS `backups` ( + `id` INTEGER, + `hash_threshold` INTEGER, + `sample_size` INTEGER, + `top` TEXT, + PRIMARY KEY(`id`) + );''') con.commit() + class fs: + def __init__(self,top,db_path=None,hash_threshold=1024**2,sample_size=128*1024,id=None): self.top=top self.hash_threshold=hash_threshold self.sample_size=sample_size - self.hash_files = {} self.hashfunc = functools.partial( hash_file, @@ -61,33 +65,70 @@ class fs: if id is None: self.cur.execute('SELECT MAX(id) FROM backups') id=self.cur.fetchone() + self.new = True if id[0] is None: self.id = 0 else: self.id = id[0]+1 else: + self.new = False + self.cur.execute('SELECT id FROM backups WHERE id=?',[id]) + res = self.cur.fetchone() + if res[0] is None: + raise ValueError('id not found') self.id = id + def write_to_db(self): - '''stores self.hash_files in database, along with a backup id and hash_func parameters''' + '''stores self.hash_files in database, along with a backup id, hash_func parameters, and top level dir''' for hash,files in self.hash_files.items(): for file in files: - self.cur.execute('INSERT INTO paths VALUES (?,?,?)',[hash,file,self.id]) + self.cur.execute('INSERT INTO paths VALUES (?,?,?)',[file,hash,self.id]) + + params = { + 'id':self.id, + 'hash_threshold':self.hash_threshold, + 'sample_size':self.sample_size, + 'top':self.top, + } + + if self.new: + self.cur.execute('INSERT INTO backups VALUES (:id,:hash_threshold,:sample_size,:top)',params) + else: + self.cur.execute('UPDATE backups SET (:hash_threshold,:sample_size,:top) WHERE id=:id',params) + self.con.commit() - def morph(self,other): - '''renames/copies all files in self.top to match other''' - pass + def read_from_db(self): + self.cur.execute('SELECT path,hash FROM paths WHERE id=?',[self.id]) + for path,hash in self.cur.fetchall(): + self.hash_files.setdefault(hash,[]).append(path) + + def id_from_top(self): + self.cur.execute('SELECT id FROM backups WHERE top=?',[self.top]) + res = self.cur.fetchall() + if len(res) > 0: + if len(res) == 1: + self.id = res[0][0] + else: + print('\n'.join(map(str,(row[0] for row in res)))) + which = input('Which one?: ') + id = int(which) + else: + print('Not found') + + def restore(self): + '''renames/copies all files in self.top to the order prescribed in database''' + self.read_from_db() def backup(self): '''fills in self.hash_file {hash:[filepaths]}''' paths = hashify(self.top,self.hashfunc) for filepath,hash in paths: self.hash_files.setdefault(hash,[]).append(filepath) - if __name__ == "__main__": import os - test = fs(os.getcwd()) - test.backup() - test.write_to_db() + test = fs('.git') + # test.backup() + # test.write_to_db()