1 #!/usr/bin/env python 2 # Copyright 2013 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 # A Python library to read and store procfs (/proc) information on Linux. 7 # 8 # Each information storage class in this file stores original data as original 9 # as reasonablly possible. Translation is done when requested. It is to make it 10 # always possible to probe the original data. 11 12 13 import collections 14 import logging 15 import os 16 import re 17 import struct 18 import sys 19 20 21 class _NullHandler(logging.Handler): 22 def emit(self, record): 23 pass 24 25 26 _LOGGER = logging.getLogger('procfs') 27 _LOGGER.addHandler(_NullHandler()) 28 29 30 class ProcStat(object): 31 """Reads and stores information in /proc/pid/stat.""" 32 _PATTERN = re.compile(r'^' 33 '(?P<PID>-?[0-9]+) ' 34 '\((?P<COMM>.+)\) ' 35 '(?P<STATE>[RSDZTW]) ' 36 '(?P<PPID>-?[0-9]+) ' 37 '(?P<PGRP>-?[0-9]+) ' 38 '(?P<SESSION>-?[0-9]+) ' 39 '(?P<TTY_NR>-?[0-9]+) ' 40 '(?P<TPGID>-?[0-9]+) ' 41 '(?P<FLAGS>[0-9]+) ' 42 '(?P<MINFIT>[0-9]+) ' 43 '(?P<CMINFIT>[0-9]+) ' 44 '(?P<MAJFIT>[0-9]+) ' 45 '(?P<CMAJFIT>[0-9]+) ' 46 '(?P<UTIME>[0-9]+) ' 47 '(?P<STIME>[0-9]+) ' 48 '(?P<CUTIME>[0-9]+) ' 49 '(?P<CSTIME>[0-9]+) ' 50 '(?P<PRIORITY>[0-9]+) ' 51 '(?P<NICE>[0-9]+) ' 52 '(?P<NUM_THREADS>[0-9]+) ' 53 '(?P<ITREALVALUE>[0-9]+) ' 54 '(?P<STARTTIME>[0-9]+) ' 55 '(?P<VSIZE>[0-9]+) ' 56 '(?P<RSS>[0-9]+) ' 57 '(?P<RSSLIM>[0-9]+) ' 58 '(?P<STARTCODE>[0-9]+) ' 59 '(?P<ENDCODE>[0-9]+) ' 60 '(?P<STARTSTACK>[0-9]+) ' 61 '(?P<KSTKESP>[0-9]+) ' 62 '(?P<KSTKEIP>[0-9]+) ' 63 '(?P<SIGNAL>[0-9]+) ' 64 '(?P<BLOCKED>[0-9]+) ' 65 '(?P<SIGIGNORE>[0-9]+) ' 66 '(?P<SIGCATCH>[0-9]+) ' 67 '(?P<WCHAN>[0-9]+) ' 68 '(?P<NSWAP>[0-9]+) ' 69 '(?P<CNSWAP>[0-9]+) ' 70 '(?P<EXIT_SIGNAL>[0-9]+) ' 71 '(?P<PROCESSOR>[0-9]+) ' 72 '(?P<RT_PRIORITY>[0-9]+) ' 73 '(?P<POLICY>[0-9]+) ' 74 '(?P<DELAYACCT_BLKIO_TICKS>[0-9]+) ' 75 '(?P<GUEST_TIME>[0-9]+) ' 76 '(?P<CGUEST_TIME>[0-9]+)', re.IGNORECASE) 77 78 def __init__(self, raw, pid, vsize, rss): 79 self._raw = raw 80 self._pid = pid 81 self._vsize = vsize 82 self._rss = rss 83 84 @staticmethod 85 def load_file(stat_f): 86 raw = stat_f.readlines() 87 stat = ProcStat._PATTERN.match(raw[0]) 88 return ProcStat(raw, 89 stat.groupdict().get('PID'), 90 stat.groupdict().get('VSIZE'), 91 stat.groupdict().get('RSS')) 92 93 @staticmethod 94 def load(pid): 95 try: 96 with open(os.path.join('/proc', str(pid), 'stat'), 'r') as stat_f: 97 return ProcStat.load_file(stat_f) 98 except IOError: 99 return None 100 101 @property 102 def raw(self): 103 return self._raw 104 105 @property 106 def pid(self): 107 return int(self._pid) 108 109 @property 110 def vsize(self): 111 return int(self._vsize) 112 113 @property 114 def rss(self): 115 return int(self._rss) 116 117 118 class ProcStatm(object): 119 """Reads and stores information in /proc/pid/statm.""" 120 _PATTERN = re.compile(r'^' 121 '(?P<SIZE>[0-9]+) ' 122 '(?P<RESIDENT>[0-9]+) ' 123 '(?P<SHARE>[0-9]+) ' 124 '(?P<TEXT>[0-9]+) ' 125 '(?P<LIB>[0-9]+) ' 126 '(?P<DATA>[0-9]+) ' 127 '(?P<DT>[0-9]+)', re.IGNORECASE) 128 129 def __init__(self, raw, size, resident, share, text, lib, data, dt): 130 self._raw = raw 131 self._size = size 132 self._resident = resident 133 self._share = share 134 self._text = text 135 self._lib = lib 136 self._data = data 137 self._dt = dt 138 139 @staticmethod 140 def load_file(statm_f): 141 try: 142 raw = statm_f.readlines() 143 except (IOError, OSError): 144 return None 145 statm = ProcStatm._PATTERN.match(raw[0]) 146 return ProcStatm(raw, 147 statm.groupdict().get('SIZE'), 148 statm.groupdict().get('RESIDENT'), 149 statm.groupdict().get('SHARE'), 150 statm.groupdict().get('TEXT'), 151 statm.groupdict().get('LIB'), 152 statm.groupdict().get('DATA'), 153 statm.groupdict().get('DT')) 154 155 @staticmethod 156 def load(pid): 157 try: 158 with open(os.path.join('/proc', str(pid), 'statm'), 'r') as statm_f: 159 return ProcStatm.load_file(statm_f) 160 except (IOError, OSError): 161 return None 162 163 @property 164 def raw(self): 165 return self._raw 166 167 @property 168 def size(self): 169 return int(self._size) 170 171 @property 172 def resident(self): 173 return int(self._resident) 174 175 @property 176 def share(self): 177 return int(self._share) 178 179 @property 180 def text(self): 181 return int(self._text) 182 183 @property 184 def lib(self): 185 return int(self._lib) 186 187 @property 188 def data(self): 189 return int(self._data) 190 191 @property 192 def dt(self): 193 return int(self._dt) 194 195 196 class ProcStatus(object): 197 """Reads and stores information in /proc/pid/status.""" 198 _PATTERN = re.compile(r'^(?P<NAME>[A-Za-z0-9_]+):\s+(?P<VALUE>.*)') 199 200 def __init__(self, raw, dct): 201 self._raw = raw 202 self._pid = dct.get('Pid') 203 self._name = dct.get('Name') 204 self._vm_peak = dct.get('VmPeak') 205 self._vm_size = dct.get('VmSize') 206 self._vm_lck = dct.get('VmLck') 207 self._vm_pin = dct.get('VmPin') 208 self._vm_hwm = dct.get('VmHWM') 209 self._vm_rss = dct.get('VmRSS') 210 self._vm_data = dct.get('VmData') 211 self._vm_stack = dct.get('VmStk') 212 self._vm_exe = dct.get('VmExe') 213 self._vm_lib = dct.get('VmLib') 214 self._vm_pte = dct.get('VmPTE') 215 self._vm_swap = dct.get('VmSwap') 216 217 @staticmethod 218 def load_file(status_f): 219 raw = status_f.readlines() 220 dct = {} 221 for line in raw: 222 status_match = ProcStatus._PATTERN.match(line) 223 if status_match: 224 match_dict = status_match.groupdict() 225 dct[match_dict['NAME']] = match_dict['VALUE'] 226 else: 227 raise SyntaxError('Unknown /proc/pid/status format.') 228 return ProcStatus(raw, dct) 229 230 @staticmethod 231 def load(pid): 232 with open(os.path.join('/proc', str(pid), 'status'), 'r') as status_f: 233 return ProcStatus.load_file(status_f) 234 235 @property 236 def raw(self): 237 return self._raw 238 239 @property 240 def pid(self): 241 return int(self._pid) 242 243 @property 244 def vm_peak(self): 245 """Returns a high-water (peak) virtual memory size in kilo-bytes.""" 246 if self._vm_peak.endswith('kB'): 247 return int(self._vm_peak.split()[0]) 248 raise ValueError('VmPeak is not in kB.') 249 250 @property 251 def vm_size(self): 252 """Returns a virtual memory size in kilo-bytes.""" 253 if self._vm_size.endswith('kB'): 254 return int(self._vm_size.split()[0]) 255 raise ValueError('VmSize is not in kB.') 256 257 @property 258 def vm_hwm(self): 259 """Returns a high-water (peak) resident set size (RSS) in kilo-bytes.""" 260 if self._vm_hwm.endswith('kB'): 261 return int(self._vm_hwm.split()[0]) 262 raise ValueError('VmHWM is not in kB.') 263 264 @property 265 def vm_rss(self): 266 """Returns a resident set size (RSS) in kilo-bytes.""" 267 if self._vm_rss.endswith('kB'): 268 return int(self._vm_rss.split()[0]) 269 raise ValueError('VmRSS is not in kB.') 270 271 272 class ProcMapsEntry(object): 273 """A class representing one line in /proc/pid/maps.""" 274 275 def __init__( 276 self, begin, end, readable, writable, executable, private, offset, 277 major, minor, inode, name): 278 self.begin = begin 279 self.end = end 280 self.readable = readable 281 self.writable = writable 282 self.executable = executable 283 self.private = private 284 self.offset = offset 285 self.major = major 286 self.minor = minor 287 self.inode = inode 288 self.name = name 289 290 def as_dict(self): 291 return { 292 'begin': self.begin, 293 'end': self.end, 294 'readable': self.readable, 295 'writable': self.writable, 296 'executable': self.executable, 297 'private': self.private, 298 'offset': self.offset, 299 'major': self.major, 300 'minor': self.minor, 301 'inode': self.inode, 302 'name': self.name, 303 } 304 305 306 class ProcMaps(object): 307 """Reads and stores information in /proc/pid/maps.""" 308 309 MAPS_PATTERN = re.compile( 310 r'^([a-f0-9]+)-([a-f0-9]+)\s+(.)(.)(.)(.)\s+([a-f0-9]+)\s+(\S+):(\S+)\s+' 311 r'(\d+)\s*(.*)$', re.IGNORECASE) 312 313 EXECUTABLE_PATTERN = re.compile( 314 r'\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?') 315 316 def __init__(self): 317 self._sorted_indexes = [] 318 self._dictionary = {} 319 self._sorted = True 320 321 def iter(self, condition): 322 if not self._sorted: 323 self._sorted_indexes.sort() 324 self._sorted = True 325 for index in self._sorted_indexes: 326 if not condition or condition(self._dictionary[index]): 327 yield self._dictionary[index] 328 329 def __iter__(self): 330 if not self._sorted: 331 self._sorted_indexes.sort() 332 self._sorted = True 333 for index in self._sorted_indexes: 334 yield self._dictionary[index] 335 336 @staticmethod 337 def load_file(maps_f): 338 table = ProcMaps() 339 for line in maps_f: 340 table.append_line(line) 341 return table 342 343 @staticmethod 344 def load(pid): 345 try: 346 with open(os.path.join('/proc', str(pid), 'maps'), 'r') as maps_f: 347 return ProcMaps.load_file(maps_f) 348 except (IOError, OSError): 349 return None 350 351 def append_line(self, line): 352 entry = self.parse_line(line) 353 if entry: 354 self._append_entry(entry) 355 return entry 356 357 @staticmethod 358 def parse_line(line): 359 matched = ProcMaps.MAPS_PATTERN.match(line) 360 if matched: 361 return ProcMapsEntry( # pylint: disable=W0212 362 int(matched.group(1), 16), # begin 363 int(matched.group(2), 16), # end 364 matched.group(3), # readable 365 matched.group(4), # writable 366 matched.group(5), # executable 367 matched.group(6), # private 368 int(matched.group(7), 16), # offset 369 matched.group(8), # major 370 matched.group(9), # minor 371 int(matched.group(10), 10), # inode 372 matched.group(11) # name 373 ) 374 else: 375 return None 376 377 @staticmethod 378 def constants(entry): 379 return entry.writable == '-' and entry.executable == '-' 380 381 @staticmethod 382 def executable(entry): 383 return entry.executable == 'x' 384 385 @staticmethod 386 def executable_and_constants(entry): 387 return ((entry.writable == '-' and entry.executable == '-') or 388 entry.executable == 'x') 389 390 def _append_entry(self, entry): 391 if self._sorted_indexes and self._sorted_indexes[-1] > entry.begin: 392 self._sorted = False 393 self._sorted_indexes.append(entry.begin) 394 self._dictionary[entry.begin] = entry 395 396 397 class ProcSmaps(object): 398 """Reads and stores information in /proc/pid/smaps.""" 399 _SMAPS_PATTERN = re.compile(r'^(?P<NAME>[A-Za-z0-9_]+):\s+(?P<VALUE>.*)') 400 401 class VMA(object): 402 def __init__(self): 403 self._size = 0 404 self._rss = 0 405 self._pss = 0 406 407 def append(self, name, value): 408 dct = { 409 'Size': '_size', 410 'Rss': '_rss', 411 'Pss': '_pss', 412 'Referenced': '_referenced', 413 'Private_Clean': '_private_clean', 414 'Shared_Clean': '_shared_clean', 415 'KernelPageSize': '_kernel_page_size', 416 'MMUPageSize': '_mmu_page_size', 417 } 418 if name in dct: 419 self.__setattr__(dct[name], value) 420 421 @property 422 def size(self): 423 if self._size.endswith('kB'): 424 return int(self._size.split()[0]) 425 return int(self._size) 426 427 @property 428 def rss(self): 429 if self._rss.endswith('kB'): 430 return int(self._rss.split()[0]) 431 return int(self._rss) 432 433 @property 434 def pss(self): 435 if self._pss.endswith('kB'): 436 return int(self._pss.split()[0]) 437 return int(self._pss) 438 439 def __init__(self, raw, total_dct, maps, vma_internals): 440 self._raw = raw 441 self._size = total_dct['Size'] 442 self._rss = total_dct['Rss'] 443 self._pss = total_dct['Pss'] 444 self._referenced = total_dct['Referenced'] 445 self._shared_clean = total_dct['Shared_Clean'] 446 self._private_clean = total_dct['Private_Clean'] 447 self._kernel_page_size = total_dct['KernelPageSize'] 448 self._mmu_page_size = total_dct['MMUPageSize'] 449 self._maps = maps 450 self._vma_internals = vma_internals 451 452 @staticmethod 453 def load(pid): 454 with open(os.path.join('/proc', str(pid), 'smaps'), 'r') as smaps_f: 455 raw = smaps_f.readlines() 456 457 vma = None 458 vma_internals = collections.OrderedDict() 459 total_dct = collections.defaultdict(int) 460 maps = ProcMaps() 461 for line in raw: 462 maps_match = ProcMaps.MAPS_PATTERN.match(line) 463 if maps_match: 464 vma = maps.append_line(line.strip()) 465 vma_internals[vma] = ProcSmaps.VMA() 466 else: 467 smaps_match = ProcSmaps._SMAPS_PATTERN.match(line) 468 if smaps_match: 469 match_dict = smaps_match.groupdict() 470 vma_internals[vma].append(match_dict['NAME'], match_dict['VALUE']) 471 total_dct[match_dict['NAME']] += int(match_dict['VALUE'].split()[0]) 472 473 return ProcSmaps(raw, total_dct, maps, vma_internals) 474 475 @property 476 def size(self): 477 return self._size 478 479 @property 480 def rss(self): 481 return self._rss 482 483 @property 484 def referenced(self): 485 return self._referenced 486 487 @property 488 def pss(self): 489 return self._pss 490 491 @property 492 def private_clean(self): 493 return self._private_clean 494 495 @property 496 def shared_clean(self): 497 return self._shared_clean 498 499 @property 500 def kernel_page_size(self): 501 return self._kernel_page_size 502 503 @property 504 def mmu_page_size(self): 505 return self._mmu_page_size 506 507 @property 508 def vma_internals(self): 509 return self._vma_internals 510 511 512 class ProcPagemap(object): 513 """Reads and stores partial information in /proc/pid/pagemap. 514 515 It picks up virtual addresses to read based on ProcMaps (/proc/pid/maps). 516 See https://www.kernel.org/doc/Documentation/vm/pagemap.txt for details. 517 """ 518 _BYTES_PER_PAGEMAP_VALUE = 8 519 _BYTES_PER_OS_PAGE = 4096 520 _VIRTUAL_TO_PAGEMAP_OFFSET = _BYTES_PER_OS_PAGE / _BYTES_PER_PAGEMAP_VALUE 521 522 _MASK_PRESENT = 1 << 63 523 _MASK_SWAPPED = 1 << 62 524 _MASK_FILEPAGE_OR_SHAREDANON = 1 << 61 525 _MASK_SOFTDIRTY = 1 << 55 526 _MASK_PFN = (1 << 55) - 1 527 528 class VMA(object): 529 def __init__(self, vsize, present, swapped, pageframes): 530 self._vsize = vsize 531 self._present = present 532 self._swapped = swapped 533 self._pageframes = pageframes 534 535 @property 536 def vsize(self): 537 return int(self._vsize) 538 539 @property 540 def present(self): 541 return int(self._present) 542 543 @property 544 def swapped(self): 545 return int(self._swapped) 546 547 @property 548 def pageframes(self): 549 return self._pageframes 550 551 def __init__(self, vsize, present, swapped, vma_internals, in_process_dup): 552 self._vsize = vsize 553 self._present = present 554 self._swapped = swapped 555 self._vma_internals = vma_internals 556 self._in_process_dup = in_process_dup 557 558 @staticmethod 559 def load(pid, maps): 560 total_present = 0 561 total_swapped = 0 562 total_vsize = 0 563 in_process_dup = 0 564 vma_internals = collections.OrderedDict() 565 process_pageframe_set = set() 566 567 try: 568 pagemap_fd = os.open( 569 os.path.join('/proc', str(pid), 'pagemap'), os.O_RDONLY) 570 except (IOError, OSError): 571 return None 572 for vma in maps: 573 present = 0 574 swapped = 0 575 vsize = 0 576 pageframes = collections.defaultdict(int) 577 begin_offset = ProcPagemap._offset(vma.begin) 578 chunk_size = ProcPagemap._offset(vma.end) - begin_offset 579 try: 580 os.lseek(pagemap_fd, begin_offset, os.SEEK_SET) 581 buf = os.read(pagemap_fd, chunk_size) 582 except (IOError, OSError): 583 return None 584 if len(buf) < chunk_size: 585 _LOGGER.warn('Failed to read pagemap at 0x%x in %d.' % (vma.begin, pid)) 586 pagemap_values = struct.unpack( 587 '=%dQ' % (len(buf) / ProcPagemap._BYTES_PER_PAGEMAP_VALUE), buf) 588 for pagemap_value in pagemap_values: 589 vsize += ProcPagemap._BYTES_PER_OS_PAGE 590 if pagemap_value & ProcPagemap._MASK_PRESENT: 591 if (pagemap_value & ProcPagemap._MASK_PFN) in process_pageframe_set: 592 in_process_dup += ProcPagemap._BYTES_PER_OS_PAGE 593 else: 594 process_pageframe_set.add(pagemap_value & ProcPagemap._MASK_PFN) 595 if (pagemap_value & ProcPagemap._MASK_PFN) not in pageframes: 596 present += ProcPagemap._BYTES_PER_OS_PAGE 597 pageframes[pagemap_value & ProcPagemap._MASK_PFN] += 1 598 if pagemap_value & ProcPagemap._MASK_SWAPPED: 599 swapped += ProcPagemap._BYTES_PER_OS_PAGE 600 vma_internals[vma] = ProcPagemap.VMA(vsize, present, swapped, pageframes) 601 total_present += present 602 total_swapped += swapped 603 total_vsize += vsize 604 try: 605 os.close(pagemap_fd) 606 except OSError: 607 return None 608 609 return ProcPagemap(total_vsize, total_present, total_swapped, 610 vma_internals, in_process_dup) 611 612 @staticmethod 613 def _offset(virtual_address): 614 return virtual_address / ProcPagemap._VIRTUAL_TO_PAGEMAP_OFFSET 615 616 @property 617 def vsize(self): 618 return int(self._vsize) 619 620 @property 621 def present(self): 622 return int(self._present) 623 624 @property 625 def swapped(self): 626 return int(self._swapped) 627 628 @property 629 def vma_internals(self): 630 return self._vma_internals 631 632 633 class _ProcessMemory(object): 634 """Aggregates process memory information from /proc for manual testing.""" 635 def __init__(self, pid): 636 self._pid = pid 637 self._maps = None 638 self._pagemap = None 639 self._stat = None 640 self._status = None 641 self._statm = None 642 self._smaps = [] 643 644 def _read(self, proc_file): 645 lines = [] 646 with open(os.path.join('/proc', str(self._pid), proc_file), 'r') as proc_f: 647 lines = proc_f.readlines() 648 return lines 649 650 def read_all(self): 651 self.read_stat() 652 self.read_statm() 653 self.read_status() 654 self.read_smaps() 655 self.read_maps() 656 self.read_pagemap(self._maps) 657 658 def read_maps(self): 659 self._maps = ProcMaps.load(self._pid) 660 661 def read_pagemap(self, maps): 662 self._pagemap = ProcPagemap.load(self._pid, maps) 663 664 def read_smaps(self): 665 self._smaps = ProcSmaps.load(self._pid) 666 667 def read_stat(self): 668 self._stat = ProcStat.load(self._pid) 669 670 def read_statm(self): 671 self._statm = ProcStatm.load(self._pid) 672 673 def read_status(self): 674 self._status = ProcStatus.load(self._pid) 675 676 @property 677 def pid(self): 678 return self._pid 679 680 @property 681 def maps(self): 682 return self._maps 683 684 @property 685 def pagemap(self): 686 return self._pagemap 687 688 @property 689 def smaps(self): 690 return self._smaps 691 692 @property 693 def stat(self): 694 return self._stat 695 696 @property 697 def statm(self): 698 return self._statm 699 700 @property 701 def status(self): 702 return self._status 703 704 705 def main(argv): 706 """The main function for manual testing.""" 707 _LOGGER.setLevel(logging.WARNING) 708 handler = logging.StreamHandler() 709 handler.setLevel(logging.WARNING) 710 handler.setFormatter(logging.Formatter( 711 '%(asctime)s:%(name)s:%(levelname)s:%(message)s')) 712 _LOGGER.addHandler(handler) 713 714 pids = [] 715 for arg in argv[1:]: 716 try: 717 pid = int(arg) 718 except ValueError: 719 raise SyntaxError("%s is not an integer." % arg) 720 else: 721 pids.append(pid) 722 723 procs = {} 724 for pid in pids: 725 procs[pid] = _ProcessMemory(pid) 726 procs[pid].read_all() 727 728 print '=== PID: %d ===' % pid 729 730 print ' stat: %d' % procs[pid].stat.vsize 731 print ' statm: %d' % (procs[pid].statm.size * 4096) 732 print ' status: %d (Peak:%d)' % (procs[pid].status.vm_size * 1024, 733 procs[pid].status.vm_peak * 1024) 734 print ' smaps: %d' % (procs[pid].smaps.size * 1024) 735 print 'pagemap: %d' % procs[pid].pagemap.vsize 736 print ' stat: %d' % (procs[pid].stat.rss * 4096) 737 print ' statm: %d' % (procs[pid].statm.resident * 4096) 738 print ' status: %d (Peak:%d)' % (procs[pid].status.vm_rss * 1024, 739 procs[pid].status.vm_hwm * 1024) 740 print ' smaps: %d' % (procs[pid].smaps.rss * 1024) 741 print 'pagemap: %d' % procs[pid].pagemap.present 742 743 return 0 744 745 746 if __name__ == '__main__': 747 sys.exit(main(sys.argv)) 748