You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

134 lines
4.1 KiB

#!/usr/bin/python
import sqlite3
import functools
from imohash import hashfile as _imohash
def hash_file(path,hash_if_less,sample):
return _imohash(path,hash_if_less,sample)
def hashify(top,hashfunc):
old_dir = os.getcwd()
os.chdir(top)
ret = []
for root,dirs,files in os.walk('.'):
try:
for file in files:
filepath = os.path.join(root,file)
hash = hashfunc(filepath)
ret.append(
(
filepath,
hash
)
)
except PermissionError:
print('Access denied:',root)
except Exception as e:
print(e,file)
os.chdir(old_dir)
return ret
def __init_database__(con):
cur = con.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS `paths` (
`path` TEXT,
`hash` BLOB,
`id` INTEGER)''')
cur.execute('''CREATE TABLE IF NOT EXISTS `backups` (
`id` INTEGER,
`hash_threshold` INTEGER,
`sample_size` INTEGER,
`top` TEXT,
PRIMARY KEY(`id`)
);''')
con.commit()
class fs:
def __init__(self,top,db_path=None,hash_threshold=1024**2,sample_size=128*1024,id=None):
self.top=top
self.hash_threshold=hash_threshold
self.sample_size=sample_size
self.hash_files = {}
self.hashfunc = functools.partial(
hash_file,
hash_if_less=self.hash_threshold,
sample=self.sample_size
)
if db_path is None:
self.db_path = 'fs.db'
else:
self.db_path=db_path
self.con = sqlite3.connect(self.db_path)
__init_database__(self.con)
self.cur = self.con.cursor()
if id is None:
self.cur.execute('SELECT MAX(id) FROM backups')
id=self.cur.fetchone()
self.new = True
if id[0] is None:
self.id = 0
else:
self.id = id[0]+1
else:
self.new = False
self.cur.execute('SELECT id FROM backups WHERE id=?',[id])
res = self.cur.fetchone()
if res[0] is None:
raise ValueError('id not found')
self.id = id
def write_to_db(self):
'''stores self.hash_files in database, along with a backup id, hash_func parameters, and top level dir'''
for hash,files in self.hash_files.items():
for file in files:
self.cur.execute('INSERT INTO paths VALUES (?,?,?)',[file,hash,self.id])
params = {
'id':self.id,
'hash_threshold':self.hash_threshold,
'sample_size':self.sample_size,
'top':self.top,
}
if self.new:
self.cur.execute('INSERT INTO backups VALUES (:id,:hash_threshold,:sample_size,:top)',params)
else:
self.cur.execute('UPDATE backups SET (:hash_threshold,:sample_size,:top) WHERE id=:id',params)
self.con.commit()
def read_from_db(self):
self.cur.execute('SELECT path,hash FROM paths WHERE id=?',[self.id])
for path,hash in self.cur.fetchall():
self.hash_files.setdefault(hash,[]).append(path)
def id_from_top(self):
self.cur.execute('SELECT id FROM backups WHERE top=?',[self.top])
res = self.cur.fetchall()
if len(res) > 0:
if len(res) == 1:
self.id = res[0][0]
else:
print('\n'.join(map(str,(row[0] for row in res))))
which = input('Which one?: ')
id = int(which)
else:
print('Not found')
def restore(self):
'''renames/copies all files in self.top to the order prescribed in database'''
self.read_from_db()
def backup(self):
'''fills in self.hash_file {hash:[filepaths]}'''
paths = hashify(self.top,self.hashfunc)
for filepath,hash in paths:
self.hash_files.setdefault(hash,[]).append(filepath)
if __name__ == "__main__":
import os
test = fs('.git')
# test.backup()
# test.write_to_db()