You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
128 lines
4.0 KiB
128 lines
4.0 KiB
def all_parens(s,pairs = ['()','{}','[]']):
|
|
pair_levels = dict((pair,0) for pair in pairs)
|
|
stored_levels = dict((pair,{}) for pair in pairs)
|
|
ret = {}
|
|
for char_index in range(len(s)):
|
|
try:
|
|
pair_type = next(filter(lambda pair: s[char_index] in pair and not s[char_index-1] == '\\',pairs))
|
|
type = pair_type.index(s[char_index])
|
|
if type == 0:
|
|
pair_levels[pair_type] += 1
|
|
stored_levels[pair_type][pair_levels[pair_type]] = char_index
|
|
else:
|
|
try:
|
|
level = pair_levels[pair_type]
|
|
start = stored_levels[pair_type][level]
|
|
ret[start] = char_index
|
|
except KeyError:
|
|
pass
|
|
pair_levels[pair_type] -= 1
|
|
except StopIteration:
|
|
pass
|
|
return ret
|
|
|
|
def string_levels(s,*pairs):
|
|
if not pairs:
|
|
pairs = ['()','{}','[]']
|
|
all = all_parens(s,pairs)
|
|
print(s)
|
|
for key in sorted(all.keys()):
|
|
value = all[key]
|
|
yield s[key:value+1]
|
|
|
|
class base_regex_blob(object):
|
|
def __init__(self,string,start,end):
|
|
self.start = start
|
|
self.end = end
|
|
self.string = string
|
|
self.parent = None
|
|
|
|
def __str__(self):
|
|
return self.string[self.start:self.end+1]
|
|
|
|
def __repr__(self):
|
|
return repr(str(self))
|
|
|
|
def wrap(self):
|
|
return 'r"{}"'.format(self)
|
|
|
|
def __lt__(self,other):
|
|
return self.start < other.start and self.end > other.start
|
|
def to_dict(self):
|
|
return {
|
|
'single':str(self),
|
|
'isblob':True
|
|
}
|
|
class regex_group(base_regex_blob):
|
|
|
|
def from_string(string,*pairs):
|
|
if not pairs:
|
|
pairs = ['()','{}','[]']
|
|
_all = all_parens(string,pairs)
|
|
groups = (
|
|
regex_group(string,key,_all[key]) for key in sorted(_all.keys())
|
|
)
|
|
root = regex_group(test,0,len(test)-1)
|
|
for group in groups:
|
|
root._handover_(group)
|
|
root.blobify()
|
|
return root
|
|
|
|
def __init__(self,string,start,end):
|
|
super().__init__(string,start,end)
|
|
self.children = []
|
|
self.blobs = []
|
|
|
|
|
|
def __contains__(self,other):
|
|
return self.start <= other.start and self.end >= other.end
|
|
|
|
def __getitem__(self,key):
|
|
return self.children[key]
|
|
|
|
def __len__(self):
|
|
base = len(self.children)
|
|
return base + sum(map(len,self.children))
|
|
|
|
def _handover_(self,group):
|
|
if group in self:
|
|
for child in self.children:
|
|
child._handover_(group)
|
|
if not group.parent:
|
|
group.parent = self
|
|
self.children.append(group)
|
|
|
|
def _make_blobs_(self):
|
|
indices = []
|
|
for child in self.children:
|
|
indices += [child.start,child.end]
|
|
indices = indices[1:-1]
|
|
for pair in range(0,len(indices),2):
|
|
start,stop = indices[pair:pair+2]
|
|
if stop-start > 1:
|
|
blob = base_regex_blob(self.string,start+1,stop-1)
|
|
blob.parent = self
|
|
self.blobs.append(blob)
|
|
def to_dict(self):
|
|
single = ''
|
|
all = []
|
|
if self.children:
|
|
all = sorted(self.blobs+self.children)
|
|
else:
|
|
single = str(self)
|
|
return {
|
|
"single":single,
|
|
"children":[child.to_dict() for child in all],
|
|
"isblob":False,
|
|
}
|
|
|
|
|
|
def blobify(self):
|
|
self._make_blobs_()
|
|
for child in self.children:
|
|
child.blobify()
|
|
if __name__ == "__main__":
|
|
test = r"(?P<show>(\w+)(\W\w+)*)\W*[Ss](eason|(?=\d))\D*(?P<season>\d+)\W*[Ee](pisode|(?=\d))\D*(?P<episode>\d+\w?([^\d\w&]*&[^\d\w&]\d+\w?)?)\W*(?P<other>.*)(?P<ext>\.\w+)$"
|
|
root = regex_group.from_string(test,'()','[]')
|
|
import pprint
|
|
pprint.pprint(root.to_dict())
|