Browse Source

Merge branch 'fs_class'

master
Raphael Roberts 7 years ago
parent
commit
de319b13db
  1. BIN
      .sql.swp
  2. 38
      backup.py
  3. 134
      fs_class.py
  4. 24
      restore.py

BIN
.sql.swp

38
backup.py

@ -27,44 +27,6 @@ def file_hash(path, block_size=4096*8):
_hash.update(bytes)
return _hash.digest()
def hashify(top):
old_dir = os.getcwd()
os.chdir(top)
ret = []
imo_hashes = set()
for root,dirs,files in os.walk('.'):
try:
for file in files:
filepath = os.path.join(root,file)
imohash = _imohash(filepath)
real_hash = None
if imohash in imo_hashes:
print(filepath)
real_hash = file_hash(filepath)
else:
imo_hashes.add(imohash)
ret.append(
(
filepath,
imohash,
real_hash
)
)
except PermissionError:
print('Access denied:',root)
except Exception as e:
print(e,file)
os.chdir(old_dir)
#print(imo_hashes)
return ret
def __init_database__(path):
con = sqlite3.connect(path)
cur = con.cursor()
cur.execute('CREATE TABLE IF NOT EXISTS `paths` (`path` TEXT, `imohash` BLOB, `blake` BLOB, UNIQUE(`path`, `imohash`, `blake`));')
con.commit()
return con
def backup(top,db_path):
paths = hashify(top)
con = __init_database__(db_path)

134
fs_class.py

@ -0,0 +1,134 @@
#!/usr/bin/python
import sqlite3
import functools
from imohash import hashfile as _imohash
def hash_file(path,hash_if_less,sample):
return _imohash(path,hash_if_less,sample)
def hashify(top,hashfunc):
old_dir = os.getcwd()
os.chdir(top)
ret = []
for root,dirs,files in os.walk('.'):
try:
for file in files:
filepath = os.path.join(root,file)
hash = hashfunc(filepath)
ret.append(
(
filepath,
hash
)
)
except PermissionError:
print('Access denied:',root)
except Exception as e:
print(e,file)
os.chdir(old_dir)
return ret
def __init_database__(con):
cur = con.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS `paths` (
`path` TEXT,
`hash` BLOB,
`id` INTEGER)''')
cur.execute('''CREATE TABLE IF NOT EXISTS `backups` (
`id` INTEGER,
`hash_threshold` INTEGER,
`sample_size` INTEGER,
`top` TEXT,
PRIMARY KEY(`id`)
);''')
con.commit()
class fs:
def __init__(self,top,db_path=None,hash_threshold=1024**2,sample_size=128*1024,id=None):
self.top=top
self.hash_threshold=hash_threshold
self.sample_size=sample_size
self.hash_files = {}
self.hashfunc = functools.partial(
hash_file,
hash_if_less=self.hash_threshold,
sample=self.sample_size
)
if db_path is None:
self.db_path = 'fs.db'
else:
self.db_path=db_path
self.con = sqlite3.connect(self.db_path)
__init_database__(self.con)
self.cur = self.con.cursor()
if id is None:
self.cur.execute('SELECT MAX(id) FROM backups')
id=self.cur.fetchone()
self.new = True
if id[0] is None:
self.id = 0
else:
self.id = id[0]+1
else:
self.new = False
self.cur.execute('SELECT id FROM backups WHERE id=?',[id])
res = self.cur.fetchone()
if res[0] is None:
raise ValueError('id not found')
self.id = id
def write_to_db(self):
'''stores self.hash_files in database, along with a backup id, hash_func parameters, and top level dir'''
for hash,files in self.hash_files.items():
for file in files:
self.cur.execute('INSERT INTO paths VALUES (?,?,?)',[file,hash,self.id])
params = {
'id':self.id,
'hash_threshold':self.hash_threshold,
'sample_size':self.sample_size,
'top':self.top,
}
if self.new:
self.cur.execute('INSERT INTO backups VALUES (:id,:hash_threshold,:sample_size,:top)',params)
else:
self.cur.execute('UPDATE backups SET (:hash_threshold,:sample_size,:top) WHERE id=:id',params)
self.con.commit()
def read_from_db(self):
self.cur.execute('SELECT path,hash FROM paths WHERE id=?',[self.id])
for path,hash in self.cur.fetchall():
self.hash_files.setdefault(hash,[]).append(path)
def id_from_top(self):
self.cur.execute('SELECT id FROM backups WHERE top=?',[self.top])
res = self.cur.fetchall()
if len(res) > 0:
if len(res) == 1:
self.id = res[0][0]
else:
print('\n'.join(map(str,(row[0] for row in res))))
which = input('Which one?: ')
id = int(which)
else:
print('Not found')
def restore(self):
'''renames/copies all files in self.top to the order prescribed in database'''
self.read_from_db()
def backup(self):
'''fills in self.hash_file {hash:[filepaths]}'''
paths = hashify(self.top,self.hashfunc)
for filepath,hash in paths:
self.hash_files.setdefault(hash,[]).append(filepath)
if __name__ == "__main__":
import os
test = fs('.git')
# test.backup()
# test.write_to_db()

24
restore.py

@ -7,11 +7,27 @@ import sqlite3
import time
import time
def create_temp():
def create_temp_dir():
name= 'fsb{}'.format(hex(time.time_ns())[2:])
os.mkdir(name)
return name
def copy_file_temp(path):
name= 'fsb{}'.format(hex(time.time_ns())[2:])
os.copy(path,name)
return name
def super_rename(src,dst):
parent =
if not o
def create_hash_lookup(top):
hashes = hasifiy(top)
ret = {}
for _hash in hashes:
path,*two_hashes = _hash
ret.setdefault(two_hashes,[]).append(path)
return ret
def lookup(cur,imohash,realhash=None):
if realhash is not None:
cur.execute('SELECT path FROM paths where blake=?',[realhash])
@ -23,9 +39,11 @@ def lookup(cur,imohash,realhash=None):
def restore(database,source,destination):
con=sqlite3.connect(database)
cur=con.cursor()
temp = create_temp()
temp = create_hash_lookup(source)
hashes = hashify(source)
for path,imohash,realhash in hashes:
for _hashes,paths in hashes.items():
search_results = lookup(cur,*_hashes)
ppath = os.path.join(source,path)
qpath=lookup(cur,imohash,realhash)
qpath=os.path.join(temp,qpath)

Loading…
Cancel
Save