You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
134 lines
4.1 KiB
134 lines
4.1 KiB
#!/usr/bin/python
|
|
import sqlite3
|
|
import functools
|
|
from imohash import hashfile as _imohash
|
|
def hash_file(path,hash_if_less,sample):
|
|
return _imohash(path,hash_if_less,sample)
|
|
|
|
def hashify(top,hashfunc):
|
|
old_dir = os.getcwd()
|
|
os.chdir(top)
|
|
ret = []
|
|
for root,dirs,files in os.walk('.'):
|
|
try:
|
|
for file in files:
|
|
filepath = os.path.join(root,file)
|
|
hash = hashfunc(filepath)
|
|
ret.append(
|
|
(
|
|
filepath,
|
|
hash
|
|
)
|
|
)
|
|
except PermissionError:
|
|
print('Access denied:',root)
|
|
except Exception as e:
|
|
print(e,file)
|
|
os.chdir(old_dir)
|
|
return ret
|
|
|
|
def __init_database__(con):
|
|
cur = con.cursor()
|
|
cur.execute('''CREATE TABLE IF NOT EXISTS `paths` (
|
|
`path` TEXT,
|
|
`hash` BLOB,
|
|
`id` INTEGER)''')
|
|
cur.execute('''CREATE TABLE IF NOT EXISTS `backups` (
|
|
`id` INTEGER,
|
|
`hash_threshold` INTEGER,
|
|
`sample_size` INTEGER,
|
|
`top` TEXT,
|
|
PRIMARY KEY(`id`)
|
|
);''')
|
|
con.commit()
|
|
|
|
class fs:
|
|
|
|
def __init__(self,top,db_path=None,hash_threshold=1024**2,sample_size=128*1024,id=None):
|
|
self.top=top
|
|
self.hash_threshold=hash_threshold
|
|
self.sample_size=sample_size
|
|
self.hash_files = {}
|
|
self.hashfunc = functools.partial(
|
|
hash_file,
|
|
hash_if_less=self.hash_threshold,
|
|
sample=self.sample_size
|
|
)
|
|
if db_path is None:
|
|
self.db_path = 'fs.db'
|
|
else:
|
|
self.db_path=db_path
|
|
self.con = sqlite3.connect(self.db_path)
|
|
__init_database__(self.con)
|
|
self.cur = self.con.cursor()
|
|
|
|
if id is None:
|
|
self.cur.execute('SELECT MAX(id) FROM backups')
|
|
id=self.cur.fetchone()
|
|
self.new = True
|
|
if id[0] is None:
|
|
self.id = 0
|
|
else:
|
|
self.id = id[0]+1
|
|
else:
|
|
self.new = False
|
|
self.cur.execute('SELECT id FROM backups WHERE id=?',[id])
|
|
res = self.cur.fetchone()
|
|
if res[0] is None:
|
|
raise ValueError('id not found')
|
|
self.id = id
|
|
|
|
|
|
def write_to_db(self):
|
|
'''stores self.hash_files in database, along with a backup id, hash_func parameters, and top level dir'''
|
|
for hash,files in self.hash_files.items():
|
|
for file in files:
|
|
self.cur.execute('INSERT INTO paths VALUES (?,?,?)',[file,hash,self.id])
|
|
|
|
params = {
|
|
'id':self.id,
|
|
'hash_threshold':self.hash_threshold,
|
|
'sample_size':self.sample_size,
|
|
'top':self.top,
|
|
}
|
|
|
|
if self.new:
|
|
self.cur.execute('INSERT INTO backups VALUES (:id,:hash_threshold,:sample_size,:top)',params)
|
|
else:
|
|
self.cur.execute('UPDATE backups SET (:hash_threshold,:sample_size,:top) WHERE id=:id',params)
|
|
|
|
self.con.commit()
|
|
|
|
def read_from_db(self):
|
|
self.cur.execute('SELECT path,hash FROM paths WHERE id=?',[self.id])
|
|
for path,hash in self.cur.fetchall():
|
|
self.hash_files.setdefault(hash,[]).append(path)
|
|
|
|
def id_from_top(self):
|
|
self.cur.execute('SELECT id FROM backups WHERE top=?',[self.top])
|
|
res = self.cur.fetchall()
|
|
if len(res) > 0:
|
|
if len(res) == 1:
|
|
self.id = res[0][0]
|
|
else:
|
|
print('\n'.join(map(str,(row[0] for row in res))))
|
|
which = input('Which one?: ')
|
|
id = int(which)
|
|
else:
|
|
print('Not found')
|
|
|
|
def restore(self):
|
|
'''renames/copies all files in self.top to the order prescribed in database'''
|
|
self.read_from_db()
|
|
|
|
def backup(self):
|
|
'''fills in self.hash_file {hash:[filepaths]}'''
|
|
paths = hashify(self.top,self.hashfunc)
|
|
for filepath,hash in paths:
|
|
self.hash_files.setdefault(hash,[]).append(filepath)
|
|
|
|
if __name__ == "__main__":
|
|
import os
|
|
test = fs('.git')
|
|
# test.backup()
|
|
# test.write_to_db()
|