You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

93 lines
2.7 KiB

#!/usr/bin/python
import sqlite3
import functools
from imohash import hashfile as _imohash
def hash_file(path,hash_if_less,sample):
return _imohash(path,hash_if_less,sample)
def hashify(top,hashfunc):
old_dir = os.getcwd()
os.chdir(top)
ret = []
for root,dirs,files in os.walk('.'):
try:
for file in files:
filepath = os.path.join(root,file)
hash = hashfunc(filepath)
ret.append(
(
filepath,
hash
)
)
except PermissionError:
print('Access denied:',root)
except Exception as e:
print(e,file)
os.chdir(old_dir)
return ret
def __init_database__(con):
cur = con.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS `PATHS` (
`PATH` TEXT,
`HASH` BLOB,
`ID` INTEGER)''')
cur.execute('''CREATE TABLE IF NOT EXISTS `BACKUPS` (
`ID` INTEGER,
`HASH_THRESHOLD` INTEGER,
`SAMPLE_SIZE` INTEGER)''')
con.commit()
class fs:
def __init__(self,top,db_path=None,hash_threshold=1024**2,sample_size=128*1024,id=None):
self.top=top
self.hash_threshold=hash_threshold
self.sample_size=sample_size
self.hash_files = {}
self.hashfunc = functools.partial(
hash_file,
hash_if_less=self.hash_threshold,
sample=self.sample_size
)
if db_path is None:
self.db_path = 'fs.db'
else:
self.db_path=db_path
self.con = sqlite3.connect(self.db_path)
__init_database__(self.con)
self.cur = self.con.cursor()
if id is None:
self.cur.execute('SELECT MAX(id) FROM backups')
id=self.cur.fetchone()
if id[0] is None:
self.id = 0
else:
self.id = id[0]+1
else:
self.id = id
def write_to_db(self):
'''stores self.hash_files in database, along with a backup id and hash_func parameters'''
for hash,files in self.hash_files.items():
for file in files:
self.cur.execute('INSERT INTO paths VALUES (?,?,?)',[hash,file,self.id])
self.con.commit()
def morph(self,other):
'''renames/copies all files in self.top to match other'''
pass
def backup(self):
'''fills in self.hash_file {hash:[filepaths]}'''
paths = hashify(self.top,self.hashfunc)
for filepath,hash in paths:
self.hash_files.setdefault(hash,[]).append(filepath)
if __name__ == "__main__":
import os
test = fs(os.getcwd())
test.backup()
test.write_to_db()