Home | History | Annotate | Download | only in tko
      1 import re,string
      2 
      3 
      4 class reason_counter:
      5     def __init__(self, wording):
      6         self.wording = wording
      7         self.num = 1
      8 
      9     def update(self, new_wording):
     10         self.num += 1
     11         self.wording = new_wording
     12 
     13     def html(self):
     14         if self.num == 1:
     15             return self.wording
     16         else:
     17             return "%s (%d+)" % (self.wording, self.num)
     18 
     19 
     20 def numbers_are_irrelevant(txt):
     21     ## ? when do we replace numbers with NN ?
     22     ## By default is always, but
     23     ## if/when some categories of reasons choose to keep their numbers,
     24     ## then the function shall return False for such categories
     25     return True
     26 
     27 
     28 def aggregate_reason_fields(reasons_list):
     29     # each reason in the list may be a combination
     30     # of | - separated reasons.
     31     # expand into list
     32     reasons_txt = '|'.join(reasons_list)
     33     reasons = reasons_txt.split('|')
     34     reason_htable = {}
     35     for reason in reasons:
     36         reason_reduced = reason.strip()
     37         ## reduce whitespaces
     38         reason_reduced = re.sub(r"\s+"," ", reason_reduced)
     39 
     40         if reason_reduced == '':
     41             continue # ignore empty reasons
     42 
     43         if numbers_are_irrelevant(reason_reduced):
     44             # reduce numbers included into reason descriptor
     45             # by replacing them with generic NN
     46             reason_reduced = re.sub(r"\d+","NN", reason_reduced)
     47 
     48         if not reason_reduced in reason_htable:
     49             reason_htable[reason_reduced] = reason_counter(reason)
     50         else:
     51             ## reason_counter keeps original ( non reduced )
     52             ## reason if it occured once
     53             ## if reason occured more then once, reason_counter
     54             ## will keep it in reduced/generalized form
     55             reason_htable[reason_reduced].update(reason_reduced)
     56 
     57     generic_reasons = reason_htable.keys()
     58     generic_reasons.sort(key = (lambda k: reason_htable[k].num),
     59                          reverse = True)
     60     return map(lambda generic_reason: reason_htable[generic_reason].html(),
     61                             generic_reasons)
     62