You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

73 lines
2.1 KiB

  1. #!/usr/bin/python
  2. from imohash.imohash import hashfile as _imohash
  3. import argparse
  4. import hashlib
  5. import os
  6. import psutil
  7. import sqlite3
  8. # https://stackoverflow.com/a/17782753
  9. def file_hash(path, block_size=4096*8):
  10. '''
  11. Block size directly depends on the block size of your filesystem
  12. to avoid performances issues
  13. Here I have blocks of 4096 octets (Default NTFS)
  14. '''
  15. _hash = hashlib.blake2b()
  16. if os.path.getsize(path) < psutil.virtual_memory().available:
  17. split = False
  18. else:
  19. split = True
  20. with open(path,'rb') as f:
  21. if split:
  22. for chunk in iter(lambda: f.read(block_size), b''):
  23. _hash.update(chunk)
  24. else:
  25. bytes = f.read()
  26. _hash.update(bytes)
  27. return _hash.digest()
  28. def hashify(top):
  29. old_dir = os.getcwd()
  30. os.chdir(top)
  31. ret = []
  32. imo_hashes = set()
  33. for root,dirs,files in os.walk('.'):
  34. try:
  35. for file in files:
  36. filepath = os.path.join(root,file)
  37. imohash = _imohash(filepath)
  38. real_hash = None
  39. if imohash in imo_hashes:
  40. real_hash = file_hash(filepath)
  41. else:
  42. imo_hashes.add(imohash)
  43. ret.append(
  44. (
  45. filepath,
  46. imohash,
  47. real_hash
  48. )
  49. )
  50. except PermissionError:
  51. print('Access denied:',root)
  52. except Exception as e:
  53. print(e,file)
  54. os.chdir(old_dir)
  55. #print(imo_hashes)
  56. return ret
  57. def backup(top,db_path):
  58. paths = hashify(top)
  59. con = __init_database__(db_path)
  60. cur = con.cursor()
  61. cur.executemany('INSERT OR IGNORE INTO `paths` VALUES (?,?,?);',paths)
  62. con.commit()
  63. if __name__ == "__main__":
  64. parser = argparse.ArgumentParser()
  65. parser.add_argument('dir')
  66. parser.add_argument('-d','--database',default='fs.db')
  67. args = parser.parse_args()
  68. __init_database__(args.database)
  69. backup(args.dir,args.database)