You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

75 lines
2.3 KiB

  1. #!/usr/bin/python
  2. from imohash.imohash import hashfile as _imohash
  3. import hashlib
  4. import multiprocessing
  5. import os
  6. import psutil
  7. import sqlite3
  8. import argparse
  9. # https://stackoverflow.com/a/17782753
  10. def file_hash(path, block_size=4096*8):
  11. '''
  12. Block size directly depends on the block size of your filesystem
  13. to avoid performances issues
  14. Here I have blocks of 4096 octets (Default NTFS)
  15. '''
  16. _hash = hashlib.blake2b()
  17. if os.path.getsize(path) < psutil.virtual_memory().available:
  18. split = False
  19. else:
  20. split = True
  21. with open(path,'rb') as f:
  22. if split:
  23. for chunk in iter(lambda: f.read(block_size), b''):
  24. _hash.update(chunk)
  25. else:
  26. bytes = f.read()
  27. _hash.update(bytes)
  28. return path,_hash.digest()
  29. def hashify(top):
  30. old_dir = os.getcwd()
  31. os.chdir(top)
  32. ret = []
  33. imo_hashes = set()
  34. for root,dirs,files in os.walk('.'):
  35. try:
  36. for file in files:
  37. filepath = os.path.join(root,file)
  38. imohash = _imohash(filepath)
  39. real_hash = None
  40. if imohash in imo_hashes:
  41. real_hash = file_hash(filepath)
  42. print(file)
  43. ret.append(
  44. (
  45. filepath,
  46. imohash,
  47. real_hash
  48. )
  49. )
  50. except PermissionError:
  51. print('Access denied:',root)
  52. except Exception as e:
  53. print(e,file)
  54. os.chdir(old_dir)
  55. return ret
  56. def __init_database__(path):
  57. con = sqlite3.connect(path)
  58. cur = con.cursor()
  59. cur.execute('CREATE TABLE IF NOT EXISTS `paths` (`path` TEXT, `imohash` BLOB, `blake` BLOB, UNIQUE(`path`, `imohash`, `blake`));')
  60. con.commit()
  61. return con
  62. def backup(top,db_path):
  63. paths = hashify(top)
  64. con = __init_database__(db_path)
  65. cur = con.cursor()
  66. cur.executemany('INSERT OR IGNORE INTO `paths` VALUES (?,?,?);',paths)
  67. con.commit()
  68. if __name__ == "__main__":
  69. parser = argparse.ArgumentParser()
  70. parser.add_argument('dir')
  71. parser.add_argument('-d','--database',default='fs.db')
  72. args = parser.parse_args()
  73. __init_database__(args.database)
  74. backup(args.dir,args.database)