1 #! /usr/bin/python 2 """Cleans output from other scripts to eliminate duplicates. 3 4 When frequently sampling data, we see that records occasionally will contain 5 the same timestamp (due to perf recording twice in the same second). 6 7 This removes all of the duplicate timestamps for every record. Order with 8 respect to timestamps is not preserved. Also, the assumption is that the log 9 file is a csv with the first value in each row being the time in seconds from a 10 standard time. 11 12 """ 13 14 import argparse 15 16 parser = argparse.ArgumentParser() 17 parser.add_argument('filename') 18 args = parser.parse_args() 19 20 my_file = open(args.filename) 21 output_file = open('clean2.csv', 'a') 22 dictionary = dict() 23 24 for line in my_file: 25 new_time = int(line.split(',')[0]) 26 dictionary[new_time] = line 27 28 for key in dictionary.keys(): 29 output_file.write(dictionary[key]) 30