You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

128 lines
4.0 KiB

def all_parens(s,pairs = ['()','{}','[]']):
pair_levels = dict((pair,0) for pair in pairs)
stored_levels = dict((pair,{}) for pair in pairs)
ret = {}
for char_index in range(len(s)):
try:
pair_type = next(filter(lambda pair: s[char_index] in pair and not s[char_index-1] == '\\',pairs))
type = pair_type.index(s[char_index])
if type == 0:
pair_levels[pair_type] += 1
stored_levels[pair_type][pair_levels[pair_type]] = char_index
else:
try:
level = pair_levels[pair_type]
start = stored_levels[pair_type][level]
ret[start] = char_index
except KeyError:
pass
pair_levels[pair_type] -= 1
except StopIteration:
pass
return ret
def string_levels(s,*pairs):
if not pairs:
pairs = ['()','{}','[]']
all = all_parens(s,pairs)
print(s)
for key in sorted(all.keys()):
value = all[key]
yield s[key:value+1]
class base_regex_blob(object):
def __init__(self,string,start,end):
self.start = start
self.end = end
self.string = string
self.parent = None
def __str__(self):
return self.string[self.start:self.end+1]
def __repr__(self):
return repr(str(self))
def wrap(self):
return 'r"{}"'.format(self)
def __lt__(self,other):
return self.start < other.start and self.end > other.start
def to_dict(self):
return {
'single':str(self),
'isblob':True
}
class regex_group(base_regex_blob):
def from_string(string,*pairs):
if not pairs:
pairs = ['()','{}','[]']
_all = all_parens(string,pairs)
groups = (
regex_group(string,key,_all[key]) for key in sorted(_all.keys())
)
root = regex_group(test,0,len(test)-1)
for group in groups:
root._handover_(group)
root.blobify()
return root
def __init__(self,string,start,end):
super().__init__(string,start,end)
self.children = []
self.blobs = []
def __contains__(self,other):
return self.start <= other.start and self.end >= other.end
def __getitem__(self,key):
return self.children[key]
def __len__(self):
base = len(self.children)
return base + sum(map(len,self.children))
def _handover_(self,group):
if group in self:
for child in self.children:
child._handover_(group)
if not group.parent:
group.parent = self
self.children.append(group)
def _make_blobs_(self):
indices = []
for child in self.children:
indices += [child.start,child.end]
indices = indices[1:-1]
for pair in range(0,len(indices),2):
start,stop = indices[pair:pair+2]
if stop-start > 1:
blob = base_regex_blob(self.string,start+1,stop-1)
blob.parent = self
self.blobs.append(blob)
def to_dict(self):
single = ''
all = []
if self.children:
all = sorted(self.blobs+self.children)
else:
single = str(self)
return {
"single":single,
"children":[child.to_dict() for child in all],
"isblob":False,
}
def blobify(self):
self._make_blobs_()
for child in self.children:
child.blobify()
if __name__ == "__main__":
test = r"(?P<show>(\w+)(\W\w+)*)\W*[Ss](eason|(?=\d))\D*(?P<season>\d+)\W*[Ee](pisode|(?=\d))\D*(?P<episode>\d+\w?([^\d\w&]*&[^\d\w&]\d+\w?)?)\W*(?P<other>.*)(?P<ext>\.\w+)$"
root = regex_group.from_string(test,'()','[]')
import pprint
pprint.pprint(root.to_dict())