Home | History | Annotate | Download | only in mem_tests
      1 #! /usr/bin/python
      2 """Cleans output from other scripts to eliminate duplicates.
      3 
      4 When frequently sampling data, we see that records occasionally will contain
      5 the same timestamp (due to perf recording twice in the same second).
      6 
      7 This removes all of the duplicate timestamps for every record. Order with
      8 respect to timestamps is not preserved. Also, the assumption is that the log
      9 file is a csv with the first value in each row being the time in seconds from a
     10 standard time.
     11 
     12 """
     13 
     14 import argparse
     15 
     16 parser = argparse.ArgumentParser()
     17 parser.add_argument('filename')
     18 args = parser.parse_args()
     19 
     20 my_file = open(args.filename)
     21 output_file = open('clean2.csv', 'a')
     22 dictionary = dict()
     23 
     24 for line in my_file:
     25   new_time = int(line.split(',')[0])
     26   dictionary[new_time] = line
     27 
     28 for key in dictionary.keys():
     29   output_file.write(dictionary[key])
     30