4 changed files with 155 additions and 41 deletions
-
BIN.sql.swp
-
38backup.py
-
134fs_class.py
-
24restore.py
@ -0,0 +1,134 @@ |
|||||
|
#!/usr/bin/python |
||||
|
import sqlite3 |
||||
|
import functools |
||||
|
from imohash import hashfile as _imohash |
||||
|
def hash_file(path,hash_if_less,sample): |
||||
|
return _imohash(path,hash_if_less,sample) |
||||
|
|
||||
|
def hashify(top,hashfunc): |
||||
|
old_dir = os.getcwd() |
||||
|
os.chdir(top) |
||||
|
ret = [] |
||||
|
for root,dirs,files in os.walk('.'): |
||||
|
try: |
||||
|
for file in files: |
||||
|
filepath = os.path.join(root,file) |
||||
|
hash = hashfunc(filepath) |
||||
|
ret.append( |
||||
|
( |
||||
|
filepath, |
||||
|
hash |
||||
|
) |
||||
|
) |
||||
|
except PermissionError: |
||||
|
print('Access denied:',root) |
||||
|
except Exception as e: |
||||
|
print(e,file) |
||||
|
os.chdir(old_dir) |
||||
|
return ret |
||||
|
|
||||
|
def __init_database__(con): |
||||
|
cur = con.cursor() |
||||
|
cur.execute('''CREATE TABLE IF NOT EXISTS `paths` ( |
||||
|
`path` TEXT, |
||||
|
`hash` BLOB, |
||||
|
`id` INTEGER)''') |
||||
|
cur.execute('''CREATE TABLE IF NOT EXISTS `backups` ( |
||||
|
`id` INTEGER, |
||||
|
`hash_threshold` INTEGER, |
||||
|
`sample_size` INTEGER, |
||||
|
`top` TEXT, |
||||
|
PRIMARY KEY(`id`) |
||||
|
);''') |
||||
|
con.commit() |
||||
|
|
||||
|
class fs: |
||||
|
|
||||
|
def __init__(self,top,db_path=None,hash_threshold=1024**2,sample_size=128*1024,id=None): |
||||
|
self.top=top |
||||
|
self.hash_threshold=hash_threshold |
||||
|
self.sample_size=sample_size |
||||
|
self.hash_files = {} |
||||
|
self.hashfunc = functools.partial( |
||||
|
hash_file, |
||||
|
hash_if_less=self.hash_threshold, |
||||
|
sample=self.sample_size |
||||
|
) |
||||
|
if db_path is None: |
||||
|
self.db_path = 'fs.db' |
||||
|
else: |
||||
|
self.db_path=db_path |
||||
|
self.con = sqlite3.connect(self.db_path) |
||||
|
__init_database__(self.con) |
||||
|
self.cur = self.con.cursor() |
||||
|
|
||||
|
if id is None: |
||||
|
self.cur.execute('SELECT MAX(id) FROM backups') |
||||
|
id=self.cur.fetchone() |
||||
|
self.new = True |
||||
|
if id[0] is None: |
||||
|
self.id = 0 |
||||
|
else: |
||||
|
self.id = id[0]+1 |
||||
|
else: |
||||
|
self.new = False |
||||
|
self.cur.execute('SELECT id FROM backups WHERE id=?',[id]) |
||||
|
res = self.cur.fetchone() |
||||
|
if res[0] is None: |
||||
|
raise ValueError('id not found') |
||||
|
self.id = id |
||||
|
|
||||
|
|
||||
|
def write_to_db(self): |
||||
|
'''stores self.hash_files in database, along with a backup id, hash_func parameters, and top level dir''' |
||||
|
for hash,files in self.hash_files.items(): |
||||
|
for file in files: |
||||
|
self.cur.execute('INSERT INTO paths VALUES (?,?,?)',[file,hash,self.id]) |
||||
|
|
||||
|
params = { |
||||
|
'id':self.id, |
||||
|
'hash_threshold':self.hash_threshold, |
||||
|
'sample_size':self.sample_size, |
||||
|
'top':self.top, |
||||
|
} |
||||
|
|
||||
|
if self.new: |
||||
|
self.cur.execute('INSERT INTO backups VALUES (:id,:hash_threshold,:sample_size,:top)',params) |
||||
|
else: |
||||
|
self.cur.execute('UPDATE backups SET (:hash_threshold,:sample_size,:top) WHERE id=:id',params) |
||||
|
|
||||
|
self.con.commit() |
||||
|
|
||||
|
def read_from_db(self): |
||||
|
self.cur.execute('SELECT path,hash FROM paths WHERE id=?',[self.id]) |
||||
|
for path,hash in self.cur.fetchall(): |
||||
|
self.hash_files.setdefault(hash,[]).append(path) |
||||
|
|
||||
|
def id_from_top(self): |
||||
|
self.cur.execute('SELECT id FROM backups WHERE top=?',[self.top]) |
||||
|
res = self.cur.fetchall() |
||||
|
if len(res) > 0: |
||||
|
if len(res) == 1: |
||||
|
self.id = res[0][0] |
||||
|
else: |
||||
|
print('\n'.join(map(str,(row[0] for row in res)))) |
||||
|
which = input('Which one?: ') |
||||
|
id = int(which) |
||||
|
else: |
||||
|
print('Not found') |
||||
|
|
||||
|
def restore(self): |
||||
|
'''renames/copies all files in self.top to the order prescribed in database''' |
||||
|
self.read_from_db() |
||||
|
|
||||
|
def backup(self): |
||||
|
'''fills in self.hash_file {hash:[filepaths]}''' |
||||
|
paths = hashify(self.top,self.hashfunc) |
||||
|
for filepath,hash in paths: |
||||
|
self.hash_files.setdefault(hash,[]).append(filepath) |
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
import os |
||||
|
test = fs('.git') |
||||
|
# test.backup() |
||||
|
# test.write_to_db() |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue