1 import re,string 2 3 4 class reason_counter: 5 def __init__(self, wording): 6 self.wording = wording 7 self.num = 1 8 9 def update(self, new_wording): 10 self.num += 1 11 self.wording = new_wording 12 13 def html(self): 14 if self.num == 1: 15 return self.wording 16 else: 17 return "%s (%d+)" % (self.wording, self.num) 18 19 20 def numbers_are_irrelevant(txt): 21 ## ? when do we replace numbers with NN ? 22 ## By default is always, but 23 ## if/when some categories of reasons choose to keep their numbers, 24 ## then the function shall return False for such categories 25 return True 26 27 28 def aggregate_reason_fields(reasons_list): 29 # each reason in the list may be a combination 30 # of | - separated reasons. 31 # expand into list 32 reasons_txt = '|'.join(reasons_list) 33 reasons = reasons_txt.split('|') 34 reason_htable = {} 35 for reason in reasons: 36 reason_reduced = reason.strip() 37 ## reduce whitespaces 38 reason_reduced = re.sub(r"\s+"," ", reason_reduced) 39 40 if reason_reduced == '': 41 continue # ignore empty reasons 42 43 if numbers_are_irrelevant(reason_reduced): 44 # reduce numbers included into reason descriptor 45 # by replacing them with generic NN 46 reason_reduced = re.sub(r"\d+","NN", reason_reduced) 47 48 if not reason_reduced in reason_htable: 49 reason_htable[reason_reduced] = reason_counter(reason) 50 else: 51 ## reason_counter keeps original ( non reduced ) 52 ## reason if it occured once 53 ## if reason occured more then once, reason_counter 54 ## will keep it in reduced/generalized form 55 reason_htable[reason_reduced].update(reason_reduced) 56 57 generic_reasons = reason_htable.keys() 58 generic_reasons.sort(key = (lambda k: reason_htable[k].num), 59 reverse = True) 60 return map(lambda generic_reason: reason_htable[generic_reason].html(), 61 generic_reasons) 62