Sorry, wasn't expecting guests
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

131 lines
5.8 KiB

  1. import time
  2. import binascii
  3. uncompressed = input("Binary input: ") # BINARY MUST BE SEPARATED BY COMMA
  4. if (uncompressed == 'file'):
  5. try:
  6. #with open('C://Users//Ob//Desktop//binary-output2.txt', 'r') as file:
  7. with open(input("path to file: "), 'r') as file:
  8. uncompressed = file.read().replace('\n', '')
  9. except:
  10. print("that wasn't a valid file.")
  11. time.sleep(2)
  12. quit()
  13. file.close()
  14. print("file length:", len(uncompressed))
  15. time.sleep(2)
  16. def string2bits (string):
  17. return str(bin(int.from_bytes(string.encode(), 'big'))).replace("0b", "")
  18. def stl (lst, n):
  19. return [lst[i:i + n] for i in range(0, len(lst), n)] # Converts a string into a list with each item being n long.
  20. def lts(s):
  21. str1 = ""
  22. for ele in s:
  23. str1 += ele
  24. return str1
  25. def bits2string(b=None):
  26. #return ''.join([chr(int(x, 2)) for x in b])
  27. #[line[i:i+n] for i in range(0, len(line), n)]
  28. return ''.join([chr(int(x, 2)) for x in [b[i:i+8] for i in range(0, len(b), 8)]]) # Black magic. Specific to 8-bit.
  29. def toBinary(n, bits):
  30. return ''.join(str(1 & int(n) >> i) for i in range(bits)[::-1]) # Also black magic. Do not question the Python gods. Also specific to 8-bit.
  31. uncompressed = uncompressed.split(',')
  32. print("Transferring infromation...")
  33. compressed = []
  34. for i in uncompressed:
  35. compressed.append(i)
  36. def bytecount (quotes,data):
  37. if (len(lts(data))/8 < 1000):
  38. print(quotes, len(lts(data))/8, "bytes")
  39. elif (len(lts(data))/8 < 1000000):
  40. print(quotes, len(lts(data))/8000, "kilobytes")
  41. else:
  42. print(quotes, len(lts(data))/8000000, "megabytes")
  43. bytecount("uncompressed file size:", uncompressed)
  44. indexvars = []
  45. indexID = 0
  46. errortest = 0
  47. skiploop = False
  48. matchstore = []
  49. maxscan = 0
  50. for i in range(0, len(uncompressed)):
  51. if (i > maxscan and uncompressed.count(str(uncompressed[i])) > 1): # Checks to see if the letter has been used more than once
  52. matchcount = str(uncompressed[i]) # Prepares to record
  53. io = 0
  54. try:
  55. errortest = uncompressed[i+1] # Checks to make sure there won't be an 'index out of range' error
  56. except:
  57. skiploop = True
  58. else:
  59. skiploop = False
  60. #if (skiploop == False and uncompressed.count(str(uncompressed[i+1])) > 1):
  61. while ((not skiploop == True) and lts(uncompressed).count(matchcount + str(uncompressed[i+io+1])) > 1):
  62. try:
  63. errortest = uncompressed[i+io+2] # Checks to make sure there won't be an 'index out of range' error next loop cycle
  64. except:
  65. skiploop = True # If there's going to be an 'index out of range' error next loop cycle, bail. Exit the loop after this cycle.
  66. io += 1
  67. matchcount = matchcount + str(uncompressed[i+io])
  68. #matchcount = string2bits(bits2string(matchcount).strip()) # The reason why so many patterns are being ignored in the replacement process is because other patterns are colliding with them.
  69. matchcount = str(matchcount)
  70. maxscan = i + io
  71. skipif = False
  72. #if (matchstore in str(matchcount)+str(matchcount)+str(matchcount)): skipif = True # We don't want 'egg ', ' egg', 'g eg', and 'gg e' to be separate occurences.
  73. if (matchcount in lts(indexvars)): skipif = True # We don't want 'egg ', ' egg', 'g eg', and 'gg e' to be separate occurences.
  74. for x in matchstore:
  75. if x in matchcount:
  76. skipif = True
  77. if (skipif == False and lts(uncompressed).count(matchcount) * (len(matchcount) - 40) > len(matchcount) + 8):# and len(matchcount) > 40 and lts(uncompressed).count(matchcount) > 2
  78. matchstore.append(str(matchcount))
  79. indexvars.append('00000000')
  80. indexvars.append(matchcount)
  81. compressed = lts(compressed)
  82. compressed = compressed.replace(matchcount, '11111111' + str(toBinary(indexID, 32)))
  83. compressed = stl(compressed, 8) # Makes it back into a list
  84. indexID += 1
  85. print('[' + bits2string(matchcount) + ']', '(' + str(lts(uncompressed).count(matchcount)), 'instances)', 'progress:', str(int(i/len(lts(uncompressed))*10000)/10) + '% done')
  86. if (indexID > 4294967295): # Maximum 32bit integer... Can guaruntee that no one will need over 4 billion patterns
  87. print("Pattern index overflow. More memory required.")
  88. if (len(indexvars) > 0): indexvars.append('0000000000000000') # Two NULL characters separate indexvars from compressed.
  89. compressed[0:0] = indexvars
  90. bytecount("uncompressed file size:", uncompressed)
  91. bytecount("compressed file size:", compressed)
  92. quit()
  93. print(indexvars)
  94. print()
  95. print(compressed)
  96. print()
  97. print(bits2string(lts(compressed)))
  98. print()
  99. if (len(indexvars) > 0): indexvars.append('0000000000000000') # Two NULL characters separate indexvars from compressed.
  100. compressed[0:0] = indexvars
  101. print()
  102. bytecount("uncompressed file size:", uncompressed)
  103. bytecount("compressed file size:", compressed)
  104. print() # Note: Made indexID 32bit. 4 null characters after 11111111 does not separate indexvars from compressed; It only does that if 0000000000000000 is not after 11111111.
  105. filename = str(input("name output file (leave blank to not save and don't include file extensions): "))
  106. def saveExport ():
  107. filepath = str(input("path to export folder (use '//' to separate): "))
  108. try:
  109. global f
  110. f= open(filepath + filename + ".txt","w+")
  111. except:
  112. print("mistakes have been made.")
  113. saveExport()
  114. if (len(filename) > 0):
  115. saveExport()
  116. print("saving... (This could take a while.)")
  117. f.write(','.join(lts(compressed)[i:i + 8] for i in range(0, len(lts(compressed)), 8)))
  118. f.close()
  119. print("done saving!")