npp_settings/plugins/config/PythonScript/scripts/regex_group_expand.py

def all_parens(s,pairs = ['()','{}','[]']):
    pair_levels = dict((pair,0) for pair in pairs)
    stored_levels = dict((pair,{}) for pair in pairs)
    ret = {}
    for char_index in range(len(s)):
        try:
            pair_type = next(filter(lambda pair: s[char_index] in pair and not s[char_index-1] == '\\',pairs))
            type = pair_type.index(s[char_index])
            if type == 0:
                pair_levels[pair_type] += 1
                stored_levels[pair_type][pair_levels[pair_type]] = char_index
            else:
                try:
                    level = pair_levels[pair_type]
                    start = stored_levels[pair_type][level]
                    ret[start] = char_index
                except KeyError:
                    pass
                pair_levels[pair_type] -= 1
        except StopIteration:
            pass
    return ret
    
def string_levels(s,*pairs):
    if not pairs:
        pairs = ['()','{}','[]']
    all = all_parens(s,pairs)
    print(s)
    for key in sorted(all.keys()):
        value = all[key]
        yield s[key:value+1]
        
class base_regex_blob(object):
    def __init__(self,string,start,end):
        self.start = start
        self.end = end
        self.string = string
        self.parent = None
        
    def __str__(self):
        return self.string[self.start:self.end+1]
        
    def __repr__(self):
        return repr(str(self))
    
    def wrap(self):
        return 'r"{}"'.format(self)
    
    def __lt__(self,other):
        return self.start < other.start and self.end > other.start
    def to_dict(self):
        return {
            'single':str(self),
            'isblob':True
            }
class regex_group(base_regex_blob):
    
    def from_string(string,*pairs):
        if not pairs:
            pairs = ['()','{}','[]']
        _all = all_parens(string,pairs)
        groups = (
            regex_group(string,key,_all[key]) for key in sorted(_all.keys())
            )
        root = regex_group(test,0,len(test)-1)
        for group in groups:
            root._handover_(group)
        root.blobify()
        return root
    
    def __init__(self,string,start,end):
        super().__init__(string,start,end)
        self.children = []
        self.blobs = []
        
    
    def __contains__(self,other):
        return self.start <= other.start and self.end >= other.end
        
    def __getitem__(self,key):
        return self.children[key]
        
    def __len__(self):
        base = len(self.children)
        return base + sum(map(len,self.children))
    
    def _handover_(self,group):
        if group in self:
            for child in self.children:
                child._handover_(group)
            if not group.parent:
                group.parent = self
                self.children.append(group)
                
    def _make_blobs_(self):
        indices = []
        for child in self.children:
            indices += [child.start,child.end]
        indices = indices[1:-1]
        for pair in range(0,len(indices),2):
            start,stop = indices[pair:pair+2]
            if stop-start > 1:
                blob = base_regex_blob(self.string,start+1,stop-1)
                blob.parent = self
                self.blobs.append(blob)
        def to_dict(self):
            single = ''
            all = []
            if self.children:
                all = sorted(self.blobs+self.children)
            else:
                single = str(self)
            return {
                "single":single,
                "children":[child.to_dict() for child in all],
                "isblob":False,
                }
                            

    def blobify(self):
        self._make_blobs_()
        for child in self.children:
            child.blobify()
if __name__ == "__main__":
    test = r"(?P<show>(\w+)(\W\w+)*)\W*[Ss](eason|(?=\d))\D*(?P<season>\d+)\W*[Ee](pisode|(?=\d))\D*(?P<episode>\d+\w?([^\d\w&]*&[^\d\w&]\d+\w?)?)\W*(?P<other>.*)(?P<ext>\.\w+)$"
    root = regex_group.from_string(test,'()','[]')
    import pprint
    pprint.pprint(root.to_dict())