You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

68 lines
2.1 KiB

  1. from imohash.imohash import hashfile as _imohash
  2. import hashlib
  3. import hashlib
  4. import multiprocessing
  5. import os
  6. import psutil
  7. import sqlite3
  8. # https://stackoverflow.com/a/17782753
  9. def file_hash(path, block_size=4096*8):
  10. '''
  11. Block size directly depends on the block size of your filesystem
  12. to avoid performances issues
  13. Here I have blocks of 4096 octets (Default NTFS)
  14. '''
  15. _hash = hashlib.blake2b()
  16. if os.path.getsize(path) < psutil.virtual_memory().available:
  17. split = False
  18. else:
  19. split = True
  20. with open(path,'rb') as f:
  21. if split:
  22. for chunk in iter(lambda: f.read(block_size), b''):
  23. _hash.update(chunk)
  24. else:
  25. bytes = f.read()
  26. _hash.update(bytes)
  27. return path,_hash.digest()
  28. def hashify(top):
  29. old_dir = os.getcwd()
  30. os.chdir(top)
  31. ret = []
  32. imo_hashes = set()
  33. for root,dirs,files in os.walk('.'):
  34. try:
  35. for file in files:
  36. filepath = os.path.join(root,file)
  37. imohash = _imohash(filepath)
  38. real_hash = None
  39. if imohash in imo_hashes:
  40. real_hash = file_hash(filepath)
  41. print(file)
  42. ret.append(
  43. (
  44. filepath,
  45. imohash,
  46. real_hash
  47. )
  48. )
  49. except PermissionError:
  50. print('Access denied:',root)
  51. except Exception as e:
  52. print(e,file)
  53. os.chdir(old_dir)
  54. return ret
  55. def __init_database__(path):
  56. con = sqlite3.connect(path)
  57. cur = con.cursor()
  58. cur.execute('CREATE TABLE IF NOT EXISTS `paths` (`path` TEXT, `imohash` BLOB, `blake` BLOB, UNIQUE(`path`, `imohash`, `blake`));')
  59. con.commit()
  60. return con
  61. def backup(top,db_path):
  62. paths = hashify(top)
  63. con = __init_database__(db_path)
  64. cur = con.cursor()
  65. cur.executemany('INSERT OR IGNORE INTO `paths` VALUES (?,?,?);',paths)
  66. con.commit()
  67. if __name__ == "__main__":
  68. backup(os.path.expandvars('%userprofile%'),'test.db')