You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

81 lines
2.3 KiB

  1. #!/usr/bin/python
  2. from imohash.imohash import hashfile as _imohash
  3. import argparse
  4. import hashlib
  5. import os
  6. import psutil
  7. import sqlite3
  8. # https://stackoverflow.com/a/17782753
  9. def file_hash(path, block_size=4096*8):
  10. '''
  11. Block size directly depends on the block size of your filesystem
  12. to avoid performances issues
  13. Here I have blocks of 4096 octets (Default NTFS)
  14. '''
  15. _hash = hashlib.blake2b()
  16. if os.path.getsize(path) < psutil.virtual_memory().available:
  17. split = False
  18. else:
  19. split = True
  20. with open(path,'rb') as f:
  21. if split:
  22. for chunk in iter(lambda: f.read(block_size), b''):
  23. _hash.update(chunk)
  24. else:
  25. bytes = f.read()
  26. _hash.update(bytes)
  27. return _hash.digest()
  28. def hashify(top):
  29. old_dir = os.getcwd()
  30. os.chdir(top)
  31. ret = []
  32. imo_hashes = set()
  33. for root,dirs,files in os.walk('.'):
  34. try:
  35. for file in files:
  36. filepath = os.path.join(root,file)
  37. imohash = _imohash(filepath)
  38. real_hash = None
  39. if imohash in imo_hashes:
  40. print(filepath)
  41. real_hash = file_hash(filepath)
  42. else:
  43. imo_hashes.add(imohash)
  44. ret.append(
  45. (
  46. filepath,
  47. imohash,
  48. real_hash
  49. )
  50. )
  51. except PermissionError:
  52. print('Access denied:',root)
  53. except Exception as e:
  54. print(e,file)
  55. os.chdir(old_dir)
  56. #print(imo_hashes)
  57. return ret
  58. def __init_database__(path):
  59. con = sqlite3.connect(path)
  60. cur = con.cursor()
  61. cur.execute('CREATE TABLE IF NOT EXISTS `paths` (`path` TEXT, `imohash` BLOB, `blake` BLOB, UNIQUE(`path`, `imohash`, `blake`));')
  62. con.commit()
  63. return con
  64. def backup(top,db_path):
  65. paths = hashify(top)
  66. con = __init_database__(db_path)
  67. cur = con.cursor()
  68. cur.executemany('INSERT OR IGNORE INTO `paths` VALUES (?,?,?);',paths)
  69. con.commit()
  70. if __name__ == "__main__":
  71. parser = argparse.ArgumentParser()
  72. parser.add_argument('dir')
  73. parser.add_argument('-d','--database',default='fs.db')
  74. args = parser.parse_args()
  75. __init_database__(args.database)
  76. backup(args.dir,args.database)