You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
2.6 KiB

7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
  1. #!/usr/bin/python
  2. import sqlite3
  3. import functools
  4. def hash_file(path,hash_if_less,sample):
  5. return _imohash(path,hash_if_less,sample)
  6. def hashify(top,hashfunc):
  7. old_dir = os.getcwd()
  8. os.chdir(top)
  9. ret = []
  10. for root,dirs,files in os.walk('.'):
  11. try:
  12. for file in files:
  13. filepath = os.path.join(root,file)
  14. hash = hashfunc(filepath)
  15. ret.append(
  16. (
  17. filepath,
  18. hash
  19. )
  20. )
  21. except PermissionError:
  22. print('Access denied:',root)
  23. except Exception as e:
  24. print(e,file)
  25. os.chdir(old_dir)
  26. return ret
  27. def __init_database__(con):
  28. cur = con.cursor()
  29. cur.execute('''CREATE TABLE IF NOT EXISTS `PATHS` (
  30. `PATH` TEXT,
  31. `HASH` BLOB,
  32. `ID` INTEGER)''')
  33. cur.execute('''CREATE TABLE IF NOT EXISTS `BACKUPS` (
  34. `ID` INTEGER,
  35. `HASH_THRESHOLD` INTEGER,
  36. `SAMPLE_SIZE` INTEGER)''')
  37. con.commit()
  38. class fs:
  39. def __init__(self,top,db_path=None,hash_threshold=1024**2,sample_size=128*1024,id=None):
  40. self.top=top
  41. self.hash_threshold=hash_threshold
  42. self.sample_size=sample_size
  43. self.hashfunc = functools.partial(
  44. hashfile,
  45. hash_if_less=self.hash_threshold,
  46. sample=self.sample
  47. )
  48. if db_path is None:
  49. self.db_path = 'fs.db'
  50. else:
  51. self.db_path=db_path
  52. self.con = sqlite3.connect(self.db_path)
  53. __init_database__(self.con)
  54. self.cur = self.con.cursor()
  55. self.hash_files = {}
  56. if id is None:
  57. self.cur.execute('SELECT MAX(id) FROM backups')
  58. id=self.cur.fetchone()
  59. if id[0] is None:
  60. self.id = 0
  61. else:
  62. self.id = id[0]+1
  63. else:
  64. self.id = id
  65. def write_to_db(self):
  66. '''stores self.hash_files in database, along with a backup id and hash_func parameters'''
  67. for hash,files in self.hash_files.item()
  68. for file in files:
  69. self.cur.execue('INSERT INTO paths VALUES (?,?)',[hash,file])
  70. self.con.commit()
  71. def morph(self,other):
  72. '''renames/copies all files in self.top to match other'''
  73. pass
  74. def backup(self,other):
  75. '''fills in self.hash_file {hash:[filepaths]}'''
  76. paths = hashify(self.top,self.hashfunc)
  77. for filepath,hash in paths:
  78. self.hash_file.setdefault(hash,[]).append(filepath)
  79. if __name__ == "__main__":
  80. import os
  81. test = fs(os.getcwd())