From 229a84d34ef6b13bf81dd01b82ca8c2c9375cdde Mon Sep 17 00:00:00 2001 From: rlbr Date: Tue, 27 Nov 2018 17:10:07 -0600 Subject: [PATCH 1/9] Added some functions replacing for loop statements --- backup.py | 1 - restore.py | 24 +++++++++++++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/backup.py b/backup.py index e54d6f4..f5b0d51 100755 --- a/backup.py +++ b/backup.py @@ -39,7 +39,6 @@ def hashify(top): imohash = _imohash(filepath) real_hash = None if imohash in imo_hashes: - print(filepath) real_hash = file_hash(filepath) else: imo_hashes.add(imohash) diff --git a/restore.py b/restore.py index e13c094..505fbe8 100755 --- a/restore.py +++ b/restore.py @@ -7,11 +7,27 @@ import sqlite3 import time import time -def create_temp(): +def create_temp_dir(): name= 'fsb{}'.format(hex(time.time_ns())[2:]) os.mkdir(name) return name +def copy_file_temp(path): + name= 'fsb{}'.format(hex(time.time_ns())[2:]) + os.copy(path,name) + return name + +def super_rename(src,dst): + parent = + if not o + +def create_hash_lookup(top): + hashes = hasifiy(top) + ret = {} + for _hash in hashes: + path,*two_hashes = _hash + ret.setdefault(two_hashes,[]).append(path) + return ret def lookup(cur,imohash,realhash=None): if realhash is not None: cur.execute('SELECT path FROM paths where blake=?',[realhash]) @@ -23,9 +39,11 @@ def lookup(cur,imohash,realhash=None): def restore(database,source,destination): con=sqlite3.connect(database) cur=con.cursor() - temp = create_temp() + temp = create_hash_lookup(source) hashes = hashify(source) - for path,imohash,realhash in hashes: + for _hashes,paths in hashes.items(): + search_results = lookup(cur,*_hashes) + ppath = os.path.join(source,path) qpath=lookup(cur,imohash,realhash) qpath=os.path.join(temp,qpath) From 92bab46fec499f41004423ecf4164876b0f522d7 Mon Sep 17 00:00:00 2001 From: rlbr Date: Wed, 28 Nov 2018 14:41:20 -0600 Subject: [PATCH 2/9] added fs class --- fs_class.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 fs_class.py diff --git a/fs_class.py b/fs_class.py new file mode 100644 index 0000000..477f6a8 --- /dev/null +++ b/fs_class.py @@ -0,0 +1,23 @@ +#/usr/bin/python +def hash_file(path): + +class fs: + def __init__(self,top,db_path=None,hash_threshold=1024**2): + self.top=top + self.hash_threshold=hash_threshold + if db_path is not None: + self.db_path=db_path + else: + self.db_path = 'fs.db' + + def from_db(self,top,db_path,_id=0): + pass + + def write_to_db(self): + pass + + def morph(self,other): + pass + + def backup(self,other): + pass From d75f659408c56d9ee5e91b321585a0b36c3ca5b7 Mon Sep 17 00:00:00 2001 From: rlbr Date: Wed, 28 Nov 2018 14:55:58 -0600 Subject: [PATCH 3/9] Only using imohash this time --- fs_class.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs_class.py b/fs_class.py index 477f6a8..c8c7590 100644 --- a/fs_class.py +++ b/fs_class.py @@ -1,8 +1,8 @@ #/usr/bin/python -def hash_file(path): - +def hash_file(path,hash_if_less,sample): + return _imohash(path,hash_if_less,sample) class fs: - def __init__(self,top,db_path=None,hash_threshold=1024**2): + def __init__(self,top,db_path=None,hash_threshold=1024**2,sample=128*1024): self.top=top self.hash_threshold=hash_threshold if db_path is not None: From 01cb0a24b6995a1bfc417101639c8a3a63494ad1 Mon Sep 17 00:00:00 2001 From: rlbr Date: Wed, 28 Nov 2018 16:37:15 -0600 Subject: [PATCH 4/9] partial --- backup.py | 7 ------- fs_class.py | 8 ++++++++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/backup.py b/backup.py index f5b0d51..f26d74c 100755 --- a/backup.py +++ b/backup.py @@ -57,13 +57,6 @@ def hashify(top): #print(imo_hashes) return ret -def __init_database__(path): - con = sqlite3.connect(path) - cur = con.cursor() - cur.execute('CREATE TABLE IF NOT EXISTS `paths` (`path` TEXT, `imohash` BLOB, `blake` BLOB, UNIQUE(`path`, `imohash`, `blake`));') - con.commit() - return con - def backup(top,db_path): paths = hashify(top) con = __init_database__(db_path) diff --git a/fs_class.py b/fs_class.py index c8c7590..793f191 100644 --- a/fs_class.py +++ b/fs_class.py @@ -1,6 +1,14 @@ #/usr/bin/python def hash_file(path,hash_if_less,sample): return _imohash(path,hash_if_less,sample) + +def __init_database__(path): + con = sqlite3.connect(path) + cur = con.cursor() + #cur.execute('CREATE TABLE IF NOT EXISTS `paths` (`path` TEXT, `imohash` BLOB, `blake` BLOB, UNIQUE(`path`, `imohash`, `blake`));') + + con.commit() + class fs: def __init__(self,top,db_path=None,hash_threshold=1024**2,sample=128*1024): self.top=top From 68b9d73ec5bbc7ef8d6be60890f96e318c3de61b Mon Sep 17 00:00:00 2001 From: rlbr Date: Thu, 29 Nov 2018 13:23:47 -0600 Subject: [PATCH 5/9] table initializes and working on fs constructor --- fs_class.py | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) mode change 100644 => 100755 fs_class.py diff --git a/fs_class.py b/fs_class.py old mode 100644 new mode 100755 index 793f191..17e94d3 --- a/fs_class.py +++ b/fs_class.py @@ -1,25 +1,40 @@ -#/usr/bin/python +#!/usr/bin/python +import sqlite3 def hash_file(path,hash_if_less,sample): return _imohash(path,hash_if_less,sample) -def __init_database__(path): - con = sqlite3.connect(path) +def __init_database__(con): cur = con.cursor() - #cur.execute('CREATE TABLE IF NOT EXISTS `paths` (`path` TEXT, `imohash` BLOB, `blake` BLOB, UNIQUE(`path`, `imohash`, `blake`));') - + cur.execute('''CREATE TABLE IF NOT EXISTS `PATHS` ( + `PATH` TEXT, + `HASH` BLOB, + `ID` INTEGER)''') + cur.execute('''CREATE TABLE IF NOT EXISTS `BACKUPS` ( + `ID` INTEGER, + `HASH_THRESHOLD` INTEGER, + `SAMPLE_SIZE` INTEGER)''') con.commit() - class fs: - def __init__(self,top,db_path=None,hash_threshold=1024**2,sample=128*1024): + def __init__(self,top,db_path=None,hash_threshold=1024**2,sample_size=128*1024,id=None): self.top=top self.hash_threshold=hash_threshold - if db_path is not None: + self.sample_size=sample_size + if db_path is None: + self.db_path = 'fs.db' + else: self.db_path=db_path + self.con = sqlite3.connect(self.db_path) + __init_database__(self.con) + self.cur = self.con.cursor() + if id is None: + self.cur.execute('SELECT MAX(id) FROM backups') + id=self.cur.fetchone() + if id[0] is None: + self.id = 0 + else: + self.id = id[0]+1 else: - self.db_path = 'fs.db' - - def from_db(self,top,db_path,_id=0): - pass + self.id = id def write_to_db(self): pass @@ -29,3 +44,6 @@ class fs: def backup(self,other): pass +if __name__ == "__main__": + import os + test = fs(os.getcwd()) From 5908218cc206e0a3637d542360afa78eb2d943a8 Mon Sep 17 00:00:00 2001 From: rlbr Date: Thu, 29 Nov 2018 18:51:11 -0600 Subject: [PATCH 6/9] added hashify --- .sql.swp | Bin 0 -> 12288 bytes backup.py | 30 ------------------------------ fs_class.py | 32 +++++++++++++++++++++++++++++++- 3 files changed, 31 insertions(+), 31 deletions(-) create mode 100644 .sql.swp diff --git a/.sql.swp b/.sql.swp new file mode 100644 index 0000000000000000000000000000000000000000..901c4f4abbca6f10a0e9a8bdbb5068ecbd3443ae GIT binary patch literal 12288 zcmeI%%}T>S5C`zBcTw~OMm=aix{19Ao*G#(P+PPc?J=7)HK7TK-7WYE`qG-Ut@KnW zcq;#a*^lgOV1CCWcAtX>x^8={i9cNJ&s1j9Vl{On{>=7R;qZUI_ zm2H#G%K1B=r#1w%Qcs8%M^5ncQ&B9dYxFDc5)DpNX($nOpOW*--7nyz+XKX?qq^2S>6Z=K`Z HoIC6r;~zy^ literal 0 HcmV?d00001 diff --git a/backup.py b/backup.py index f26d74c..a2dd90e 100755 --- a/backup.py +++ b/backup.py @@ -27,36 +27,6 @@ def file_hash(path, block_size=4096*8): _hash.update(bytes) return _hash.digest() -def hashify(top): - old_dir = os.getcwd() - os.chdir(top) - ret = [] - imo_hashes = set() - for root,dirs,files in os.walk('.'): - try: - for file in files: - filepath = os.path.join(root,file) - imohash = _imohash(filepath) - real_hash = None - if imohash in imo_hashes: - real_hash = file_hash(filepath) - else: - imo_hashes.add(imohash) - ret.append( - ( - filepath, - imohash, - real_hash - ) - ) - except PermissionError: - print('Access denied:',root) - except Exception as e: - print(e,file) - os.chdir(old_dir) - #print(imo_hashes) - return ret - def backup(top,db_path): paths = hashify(top) con = __init_database__(db_path) diff --git a/fs_class.py b/fs_class.py index 17e94d3..b30a09d 100755 --- a/fs_class.py +++ b/fs_class.py @@ -1,8 +1,31 @@ #!/usr/bin/python import sqlite3 +import functools def hash_file(path,hash_if_less,sample): return _imohash(path,hash_if_less,sample) +def hashify(top,hashfunc): + old_dir = os.getcwd() + os.chdir(top) + ret = [] + for root,dirs,files in os.walk('.'): + try: + for file in files: + filepath = os.path.join(root,file) + hash = hashfunc(filepath) + ret.append( + ( + filepath, + hash + ) + ) + except PermissionError: + print('Access denied:',root) + except Exception as e: + print(e,file) + os.chdir(old_dir) + return ret + def __init_database__(con): cur = con.cursor() cur.execute('''CREATE TABLE IF NOT EXISTS `PATHS` ( @@ -19,6 +42,11 @@ class fs: self.top=top self.hash_threshold=hash_threshold self.sample_size=sample_size + self.hashfunc = functools.partial( + hashfile, + hash_if_less=self.hash_threshold, + sample=self.sample + ) if db_path is None: self.db_path = 'fs.db' else: @@ -26,6 +54,8 @@ class fs: self.con = sqlite3.connect(self.db_path) __init_database__(self.con) self.cur = self.con.cursor() + + self.hash_files = {} if id is None: self.cur.execute('SELECT MAX(id) FROM backups') id=self.cur.fetchone() @@ -43,7 +73,7 @@ class fs: pass def backup(self,other): - pass + if __name__ == "__main__": import os test = fs(os.getcwd()) From ff651ac9238e34202bcc5af18997af47e7bb0982 Mon Sep 17 00:00:00 2001 From: rlbr Date: Thu, 6 Dec 2018 12:45:21 -0600 Subject: [PATCH 7/9] filled out db_save and backup --- fs_class.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs_class.py b/fs_class.py index b30a09d..8ff1a89 100755 --- a/fs_class.py +++ b/fs_class.py @@ -67,13 +67,23 @@ class fs: self.id = id def write_to_db(self): - pass + '''stores self.hash_files in database, along with a backup id and hash_func parameters''' + for hash,files in self.hash_files.item() + for file in files: + self.cur.execue('INSERT INTO paths VALUES (?,?)',[hash,file]) + self.con.commit() def morph(self,other): + '''renames/copies all files in self.top to match other''' pass def backup(self,other): + '''fills in self.hash_file {hash:[filepaths]}''' + paths = hashify(self.top,self.hashfunc) + for filepath,hash in paths: + self.hash_file.setdefault(hash,[]).append(filepath) + if __name__ == "__main__": import os test = fs(os.getcwd()) From baeaaf9371fcfdc0f87c0830bbf42d926f4ac736 Mon Sep 17 00:00:00 2001 From: rlbr Date: Thu, 6 Dec 2018 13:15:08 -0600 Subject: [PATCH 8/9] tested backup and write_to_db --- fs_class.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/fs_class.py b/fs_class.py index 8ff1a89..aa09383 100755 --- a/fs_class.py +++ b/fs_class.py @@ -1,6 +1,7 @@ #!/usr/bin/python import sqlite3 import functools +from imohash import hashfile as _imohash def hash_file(path,hash_if_less,sample): return _imohash(path,hash_if_less,sample) @@ -42,10 +43,12 @@ class fs: self.top=top self.hash_threshold=hash_threshold self.sample_size=sample_size + + self.hash_files = {} self.hashfunc = functools.partial( - hashfile, + hash_file, hash_if_less=self.hash_threshold, - sample=self.sample + sample=self.sample_size ) if db_path is None: self.db_path = 'fs.db' @@ -55,7 +58,6 @@ class fs: __init_database__(self.con) self.cur = self.con.cursor() - self.hash_files = {} if id is None: self.cur.execute('SELECT MAX(id) FROM backups') id=self.cur.fetchone() @@ -68,22 +70,24 @@ class fs: def write_to_db(self): '''stores self.hash_files in database, along with a backup id and hash_func parameters''' - for hash,files in self.hash_files.item() + for hash,files in self.hash_files.items(): for file in files: - self.cur.execue('INSERT INTO paths VALUES (?,?)',[hash,file]) + self.cur.execute('INSERT INTO paths VALUES (?,?,?)',[hash,file,self.id]) self.con.commit() def morph(self,other): '''renames/copies all files in self.top to match other''' pass - def backup(self,other): + def backup(self): '''fills in self.hash_file {hash:[filepaths]}''' paths = hashify(self.top,self.hashfunc) for filepath,hash in paths: - self.hash_file.setdefault(hash,[]).append(filepath) + self.hash_files.setdefault(hash,[]).append(filepath) if __name__ == "__main__": import os test = fs(os.getcwd()) + test.backup() + test.write_to_db() From 121b6e31513f22d600318e65a8564dda681efefc Mon Sep 17 00:00:00 2001 From: Raphael Roberts Date: Thu, 6 Dec 2018 16:59:25 -0600 Subject: [PATCH 9/9] added read_from_db,id_from_top, and checker to ensure id is in dataset --- fs_class.py | 77 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 18 deletions(-) diff --git a/fs_class.py b/fs_class.py index aa09383..fe428a2 100755 --- a/fs_class.py +++ b/fs_class.py @@ -29,21 +29,25 @@ def hashify(top,hashfunc): def __init_database__(con): cur = con.cursor() - cur.execute('''CREATE TABLE IF NOT EXISTS `PATHS` ( - `PATH` TEXT, - `HASH` BLOB, - `ID` INTEGER)''') - cur.execute('''CREATE TABLE IF NOT EXISTS `BACKUPS` ( - `ID` INTEGER, - `HASH_THRESHOLD` INTEGER, - `SAMPLE_SIZE` INTEGER)''') + cur.execute('''CREATE TABLE IF NOT EXISTS `paths` ( + `path` TEXT, + `hash` BLOB, + `id` INTEGER)''') + cur.execute('''CREATE TABLE IF NOT EXISTS `backups` ( + `id` INTEGER, + `hash_threshold` INTEGER, + `sample_size` INTEGER, + `top` TEXT, + PRIMARY KEY(`id`) + );''') con.commit() + class fs: + def __init__(self,top,db_path=None,hash_threshold=1024**2,sample_size=128*1024,id=None): self.top=top self.hash_threshold=hash_threshold self.sample_size=sample_size - self.hash_files = {} self.hashfunc = functools.partial( hash_file, @@ -61,33 +65,70 @@ class fs: if id is None: self.cur.execute('SELECT MAX(id) FROM backups') id=self.cur.fetchone() + self.new = True if id[0] is None: self.id = 0 else: self.id = id[0]+1 else: + self.new = False + self.cur.execute('SELECT id FROM backups WHERE id=?',[id]) + res = self.cur.fetchone() + if res[0] is None: + raise ValueError('id not found') self.id = id + def write_to_db(self): - '''stores self.hash_files in database, along with a backup id and hash_func parameters''' + '''stores self.hash_files in database, along with a backup id, hash_func parameters, and top level dir''' for hash,files in self.hash_files.items(): for file in files: - self.cur.execute('INSERT INTO paths VALUES (?,?,?)',[hash,file,self.id]) + self.cur.execute('INSERT INTO paths VALUES (?,?,?)',[file,hash,self.id]) + + params = { + 'id':self.id, + 'hash_threshold':self.hash_threshold, + 'sample_size':self.sample_size, + 'top':self.top, + } + + if self.new: + self.cur.execute('INSERT INTO backups VALUES (:id,:hash_threshold,:sample_size,:top)',params) + else: + self.cur.execute('UPDATE backups SET (:hash_threshold,:sample_size,:top) WHERE id=:id',params) + self.con.commit() - def morph(self,other): - '''renames/copies all files in self.top to match other''' - pass + def read_from_db(self): + self.cur.execute('SELECT path,hash FROM paths WHERE id=?',[self.id]) + for path,hash in self.cur.fetchall(): + self.hash_files.setdefault(hash,[]).append(path) + + def id_from_top(self): + self.cur.execute('SELECT id FROM backups WHERE top=?',[self.top]) + res = self.cur.fetchall() + if len(res) > 0: + if len(res) == 1: + self.id = res[0][0] + else: + print('\n'.join(map(str,(row[0] for row in res)))) + which = input('Which one?: ') + id = int(which) + else: + print('Not found') + + def restore(self): + '''renames/copies all files in self.top to the order prescribed in database''' + self.read_from_db() def backup(self): '''fills in self.hash_file {hash:[filepaths]}''' paths = hashify(self.top,self.hashfunc) for filepath,hash in paths: self.hash_files.setdefault(hash,[]).append(filepath) - if __name__ == "__main__": import os - test = fs(os.getcwd()) - test.backup() - test.write_to_db() + test = fs('.git') + # test.backup() + # test.write_to_db()