Terribly_Bodged_Compression.../compression.py


								import time

								import binascii

								uncompressed = input("Binary input: ") # BINARY MUST BE SEPARATED BY COMMA

								if (uncompressed == 'file'):

								    try:

								        #with open('C://Users//Ob//Desktop//binary-output2.txt', 'r') as file:

								        with open(input("path to file: "), 'r') as file:

								            uncompressed = file.read().replace('\n', '')

								    except:

								        print("that wasn't a valid file.")

								        time.sleep(2)

								        quit()

								    file.close()

								    print("file length:", len(uncompressed))

								    time.sleep(2)


								def string2bits (string):

								    return str(bin(int.from_bytes(string.encode(), 'big'))).replace("0b", "")

								def stl (lst, n):

								    return [lst[i:i + n] for i in range(0, len(lst), n)] # Converts a string into a list with each item being n long.

								def lts(s):

								    str1 = ""

								    for ele in s:

								        str1 += ele

								    return str1

								def bits2string(b=None):

								    #return ''.join([chr(int(x, 2)) for x in b])

								    #[line[i:i+n] for i in range(0, len(line), n)]

								    return ''.join([chr(int(x, 2)) for x in [b[i:i+8] for i in range(0, len(b), 8)]]) # Black magic. Specific to 8-bit.

								def toBinary(n, bits):

								    return ''.join(str(1 & int(n) >> i) for i in range(bits)[::-1]) # Also black magic. Do not question the Python gods. Also specific to 8-bit.

								uncompressed = uncompressed.split(',')

								print("Transferring infromation...")

								compressed = []

								for i in uncompressed:

								    compressed.append(i)


								def bytecount (quotes,data):

								    if (len(lts(data))/8 < 1000):

								        print(quotes, len(lts(data))/8, "bytes")

								    elif (len(lts(data))/8 < 1000000):

								        print(quotes, len(lts(data))/8000, "kilobytes")

								    else:

								        print(quotes, len(lts(data))/8000000, "megabytes")

								bytecount("uncompressed file size:", uncompressed)


								indexvars = []

								indexID = 0

								errortest = 0

								skiploop = False

								matchstore = []

								maxscan = 0


								for i in range(0, len(uncompressed)):

								    if (i > maxscan and uncompressed.count(str(uncompressed[i])) > 1): # Checks to see if the letter has been used more than once

								        matchcount = str(uncompressed[i]) # Prepares to record

								        io = 0


								        try:

								            errortest = uncompressed[i+1] # Checks to make sure there won't be an 'index out of range' error

								        except:

								            skiploop = True

								        else:

								            skiploop = False

								        #if (skiploop == False and uncompressed.count(str(uncompressed[i+1])) > 1):

								        while ((not skiploop == True) and lts(uncompressed).count(matchcount + str(uncompressed[i+io+1])) > 1):

								            try:

								                errortest = uncompressed[i+io+2] # Checks to make sure there won't be an 'index out of range' error next loop cycle

								            except:

								                skiploop = True # If there's going to be an 'index out of range' error next loop cycle, bail. Exit the loop after this cycle.

								            io += 1

								            matchcount = matchcount + str(uncompressed[i+io])

								        #matchcount = string2bits(bits2string(matchcount).strip()) # The reason why so many patterns are being ignored in the replacement process is because other patterns are colliding with them.

								        matchcount = str(matchcount)

								        maxscan = i + io


								        skipif = False

								        #if (matchstore in str(matchcount)+str(matchcount)+str(matchcount)): skipif = True # We don't want 'egg ', ' egg', 'g eg', and 'gg e' to be separate occurences.

								        if (matchcount in lts(indexvars)): skipif = True # We don't want 'egg ', ' egg', 'g eg', and 'gg e' to be separate occurences.

								        for x in matchstore:

								            if x in matchcount:

								                skipif = True


								        if (skipif == False and lts(uncompressed).count(matchcount) * (len(matchcount) - 40) > len(matchcount) + 8):# and len(matchcount) > 40 and lts(uncompressed).count(matchcount) > 2

								            matchstore.append(str(matchcount))

								            indexvars.append('00000000')

								            indexvars.append(matchcount)

								            compressed = lts(compressed)

								            compressed = compressed.replace(matchcount, '11111111' + str(toBinary(indexID, 32)))

								            compressed = stl(compressed, 8) # Makes it back into a list

								            indexID += 1

								            print('[' + bits2string(matchcount) + ']', '(' + str(lts(uncompressed).count(matchcount)), 'instances)', 'progress:', str(int(i/len(lts(uncompressed))*10000)/10) + '% done')

								            if (indexID > 4294967295): # Maximum 32bit integer... Can guaruntee that no one will need over 4 billion patterns

								                print("Pattern index overflow. More memory required.")

								                if (len(indexvars) > 0): indexvars.append('0000000000000000') # Two NULL characters separate indexvars from compressed.

								                compressed[0:0] = indexvars

								                bytecount("uncompressed file size:", uncompressed)

								                bytecount("compressed file size:", compressed)

								                quit()


								print(indexvars)

								print()

								print(compressed)

								print()

								print(bits2string(lts(compressed)))

								print()


								if (len(indexvars) > 0): indexvars.append('0000000000000000') # Two NULL characters separate indexvars from compressed.

								compressed[0:0] = indexvars


								print()

								bytecount("uncompressed file size:", uncompressed)

								bytecount("compressed file size:", compressed)

								print()                                                 # Note: Made indexID 32bit. 4 null characters after 11111111 does not separate indexvars from compressed; It only does that if 0000000000000000 is not after 11111111.


								filename = str(input("name output file (leave blank to not save and don't include file extensions): "))

								def saveExport ():

								    filepath = str(input("path to export folder (use '//' to separate): "))

								    try:

								        global f

								        f= open(filepath + filename + ".txt","w+")

								    except:

								        print("mistakes have been made.")

								        saveExport()


								if (len(filename) > 0):

								    saveExport()

								    print("saving... (This could take a while.)")

								    f.write(','.join(lts(compressed)[i:i + 8] for i in range(0, len(lts(compressed)), 8)))

								    f.close()

								    print("done saving!")