You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

134 lines
4.1 KiB

7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
  1. #!/usr/bin/python
  2. import sqlite3
  3. import functools
  4. from imohash import hashfile as _imohash
  5. def hash_file(path,hash_if_less,sample):
  6. return _imohash(path,hash_if_less,sample)
  7. def hashify(top,hashfunc):
  8. old_dir = os.getcwd()
  9. os.chdir(top)
  10. ret = []
  11. for root,dirs,files in os.walk('.'):
  12. try:
  13. for file in files:
  14. filepath = os.path.join(root,file)
  15. hash = hashfunc(filepath)
  16. ret.append(
  17. (
  18. filepath,
  19. hash
  20. )
  21. )
  22. except PermissionError:
  23. print('Access denied:',root)
  24. except Exception as e:
  25. print(e,file)
  26. os.chdir(old_dir)
  27. return ret
  28. def __init_database__(con):
  29. cur = con.cursor()
  30. cur.execute('''CREATE TABLE IF NOT EXISTS `paths` (
  31. `path` TEXT,
  32. `hash` BLOB,
  33. `id` INTEGER)''')
  34. cur.execute('''CREATE TABLE IF NOT EXISTS `backups` (
  35. `id` INTEGER,
  36. `hash_threshold` INTEGER,
  37. `sample_size` INTEGER,
  38. `top` TEXT,
  39. PRIMARY KEY(`id`)
  40. );''')
  41. con.commit()
  42. class fs:
  43. def __init__(self,top,db_path=None,hash_threshold=1024**2,sample_size=128*1024,id=None):
  44. self.top=top
  45. self.hash_threshold=hash_threshold
  46. self.sample_size=sample_size
  47. self.hash_files = {}
  48. self.hashfunc = functools.partial(
  49. hash_file,
  50. hash_if_less=self.hash_threshold,
  51. sample=self.sample_size
  52. )
  53. if db_path is None:
  54. self.db_path = 'fs.db'
  55. else:
  56. self.db_path=db_path
  57. self.con = sqlite3.connect(self.db_path)
  58. __init_database__(self.con)
  59. self.cur = self.con.cursor()
  60. if id is None:
  61. self.cur.execute('SELECT MAX(id) FROM backups')
  62. id=self.cur.fetchone()
  63. self.new = True
  64. if id[0] is None:
  65. self.id = 0
  66. else:
  67. self.id = id[0]+1
  68. else:
  69. self.new = False
  70. self.cur.execute('SELECT id FROM backups WHERE id=?',[id])
  71. res = self.cur.fetchone()
  72. if res[0] is None:
  73. raise ValueError('id not found')
  74. self.id = id
  75. def write_to_db(self):
  76. '''stores self.hash_files in database, along with a backup id, hash_func parameters, and top level dir'''
  77. for hash,files in self.hash_files.items():
  78. for file in files:
  79. self.cur.execute('INSERT INTO paths VALUES (?,?,?)',[file,hash,self.id])
  80. params = {
  81. 'id':self.id,
  82. 'hash_threshold':self.hash_threshold,
  83. 'sample_size':self.sample_size,
  84. 'top':self.top,
  85. }
  86. if self.new:
  87. self.cur.execute('INSERT INTO backups VALUES (:id,:hash_threshold,:sample_size,:top)',params)
  88. else:
  89. self.cur.execute('UPDATE backups SET (:hash_threshold,:sample_size,:top) WHERE id=:id',params)
  90. self.con.commit()
  91. def read_from_db(self):
  92. self.cur.execute('SELECT path,hash FROM paths WHERE id=?',[self.id])
  93. for path,hash in self.cur.fetchall():
  94. self.hash_files.setdefault(hash,[]).append(path)
  95. def id_from_top(self):
  96. self.cur.execute('SELECT id FROM backups WHERE top=?',[self.top])
  97. res = self.cur.fetchall()
  98. if len(res) > 0:
  99. if len(res) == 1:
  100. self.id = res[0][0]
  101. else:
  102. print('\n'.join(map(str,(row[0] for row in res))))
  103. which = input('Which one?: ')
  104. id = int(which)
  105. else:
  106. print('Not found')
  107. def restore(self):
  108. '''renames/copies all files in self.top to the order prescribed in database'''
  109. self.read_from_db()
  110. def backup(self):
  111. '''fills in self.hash_file {hash:[filepaths]}'''
  112. paths = hashify(self.top,self.hashfunc)
  113. for filepath,hash in paths:
  114. self.hash_files.setdefault(hash,[]).append(filepath)
  115. if __name__ == "__main__":
  116. import os
  117. test = fs('.git')
  118. # test.backup()
  119. # test.write_to_db()