Home | History | Annotate | Download | only in readwrite
      1 """
      2 **********
      3 Edge Lists
      4 **********
      5 Read and write NetworkX graphs as edge lists.
      6 
      7 The multi-line adjacency list format is useful for graphs with nodes
      8 that can be meaningfully represented as strings.  With the edgelist
      9 format simple edge data can be stored but node or graph data is not.
     10 There is no way of representing isolated nodes unless the node has a
     11 self-loop edge.
     12 
     13 Format
     14 ------
     15 You can read or write three formats of edge lists with these functions.
     16 
     17 Node pairs with no data::
     18 
     19  1 2 
     20 
     21 Python dictionary as data::
     22 
     23  1 2 {'weight':7, 'color':'green'} 
     24 
     25 Arbitrary data::
     26 
     27  1 2 7 green
     28 """
     29 __author__ = """Aric Hagberg (hagberg (at] lanl.gov)\nDan Schult (dschult (at] colgate.edu)"""
     30 #    Copyright (C) 2004-2011 by 
     31 #    Aric Hagberg <hagberg (at] lanl.gov>
     32 #    Dan Schult <dschult (at] colgate.edu>
     33 #    Pieter Swart <swart (at] lanl.gov>
     34 #    All rights reserved.
     35 #    BSD license.
     36 
     37 __all__ = ['generate_edgelist',
     38            'write_edgelist',
     39            'parse_edgelist',
     40            'read_edgelist',
     41            'read_weighted_edgelist',
     42            'write_weighted_edgelist']
     43 
     44 from networkx.utils import open_file, make_str
     45 import networkx as nx
     46 
     47 def generate_edgelist(G, delimiter=' ', data=True):
     48     """Generate a single line of the graph G in edge list format.
     49 
     50     Parameters
     51     ----------
     52     G : NetworkX graph
     53     
     54     delimiter : string, optional
     55        Separator for node labels 
     56 
     57     data : bool or list of keys       
     58        If False generate no edge data.  If True use a dictionary 
     59        representation of edge data.  If a list of keys use a list of data
     60        values corresponding to the keys.
     61 
     62     Returns
     63     -------
     64     lines : string
     65         Lines of data in adjlist format.
     66 
     67     Examples
     68     --------
     69     >>> G = nx.lollipop_graph(4, 3)
     70     >>> G[1][2]['weight'] = 3
     71     >>> G[3][4]['capacity'] = 12
     72     >>> for line in nx.generate_edgelist(G, data=False):
     73     ...     print(line)
     74     0 1
     75     0 2
     76     0 3
     77     1 2
     78     1 3
     79     2 3
     80     3 4
     81     4 5
     82     5 6
     83 
     84     >>> for line in nx.generate_edgelist(G):
     85     ...     print(line)
     86     0 1 {}
     87     0 2 {}
     88     0 3 {}
     89     1 2 {'weight': 3}
     90     1 3 {}
     91     2 3 {}
     92     3 4 {'capacity': 12}
     93     4 5 {}
     94     5 6 {}
     95 
     96     >>> for line in nx.generate_edgelist(G,data=['weight']):
     97     ...     print(line)
     98     0 1
     99     0 2
    100     0 3
    101     1 2 3
    102     1 3
    103     2 3
    104     3 4
    105     4 5
    106     5 6
    107 
    108     See Also
    109     --------
    110     write_adjlist, read_adjlist
    111     """
    112     if data is True or data is False:
    113         for e in G.edges(data=data):
    114             yield delimiter.join(map(make_str,e))
    115     else:
    116         for u,v,d in G.edges(data=True):
    117             e=[u,v]
    118             try:
    119                 e.extend(d[k] for k in data)
    120             except KeyError:
    121                 pass # missing data for this edge, should warn?
    122             yield delimiter.join(map(make_str,e))
    123 
    124 @open_file(1,mode='wb')
    125 def write_edgelist(G, path, comments="#", delimiter=' ', data=True,
    126                    encoding = 'utf-8'):
    127     """Write graph as a list of edges.
    128 
    129     Parameters
    130     ----------
    131     G : graph
    132        A NetworkX graph
    133     path : file or string
    134        File or filename to write. If a file is provided, it must be
    135        opened in 'wb' mode. Filenames ending in .gz or .bz2 will be compressed.
    136     comments : string, optional
    137        The character used to indicate the start of a comment 
    138     delimiter : string, optional
    139        The string used to separate values.  The default is whitespace.
    140     data : bool or list, optional
    141        If False write no edge data.
    142        If True write a string representation of the edge data dictionary..  
    143        If a list (or other iterable) is provided, write the  keys specified 
    144        in the list.
    145     encoding: string, optional
    146        Specify which encoding to use when writing file.
    147 
    148     Examples
    149     --------
    150     >>> G=nx.path_graph(4)
    151     >>> nx.write_edgelist(G, "test.edgelist")
    152     >>> G=nx.path_graph(4)
    153     >>> fh=open("test.edgelist",'wb')
    154     >>> nx.write_edgelist(G, fh)
    155     >>> nx.write_edgelist(G, "test.edgelist.gz")
    156     >>> nx.write_edgelist(G, "test.edgelist.gz", data=False)
    157 
    158     >>> G=nx.Graph()
    159     >>> G.add_edge(1,2,weight=7,color='red')
    160     >>> nx.write_edgelist(G,'test.edgelist',data=False)
    161     >>> nx.write_edgelist(G,'test.edgelist',data=['color'])
    162     >>> nx.write_edgelist(G,'test.edgelist',data=['color','weight'])
    163     
    164     See Also
    165     --------
    166     write_edgelist()
    167     write_weighted_edgelist()
    168     """
    169 
    170     for line in generate_edgelist(G, delimiter, data):
    171         line+='\n'
    172         path.write(line.encode(encoding))
    173 
    174 def parse_edgelist(lines, comments='#', delimiter=None,
    175                    create_using=None, nodetype=None, data=True):
    176     """Parse lines of an edge list representation of a graph.
    177 
    178 
    179     Returns
    180     -------
    181     G: NetworkX Graph
    182         The graph corresponding to lines
    183     data : bool or list of (label,type) tuples
    184        If False generate no edge data or if True use a dictionary 
    185        representation of edge data or a list tuples specifying dictionary 
    186        key names and types for edge data.
    187     create_using: NetworkX graph container, optional
    188        Use given NetworkX graph for holding nodes or edges.
    189     nodetype : Python type, optional
    190        Convert nodes to this type.  
    191     comments : string, optional
    192        Marker for comment lines
    193     delimiter : string, optional
    194        Separator for node labels 
    195     create_using: NetworkX graph container       
    196        Use given NetworkX graph for holding nodes or edges.
    197 
    198     Examples
    199     --------
    200     Edgelist with no data:
    201 
    202     >>> lines = ["1 2",
    203     ...          "2 3",
    204     ...          "3 4"]
    205     >>> G = nx.parse_edgelist(lines, nodetype = int)
    206     >>> G.nodes()
    207     [1, 2, 3, 4]
    208     >>> G.edges()
    209     [(1, 2), (2, 3), (3, 4)]
    210 
    211     Edgelist with data in Python dictionary representation:
    212     
    213     >>> lines = ["1 2 {'weight':3}",
    214     ...          "2 3 {'weight':27}",
    215     ...          "3 4 {'weight':3.0}"]
    216     >>> G = nx.parse_edgelist(lines, nodetype = int)
    217     >>> G.nodes()
    218     [1, 2, 3, 4]
    219     >>> G.edges(data = True)
    220     [(1, 2, {'weight': 3}), (2, 3, {'weight': 27}), (3, 4, {'weight': 3.0})]
    221 
    222     Edgelist with data in a list:
    223 
    224     >>> lines = ["1 2 3",
    225     ...          "2 3 27",
    226     ...          "3 4 3.0"]
    227     >>> G = nx.parse_edgelist(lines, nodetype = int, data=(('weight',float),))
    228     >>> G.nodes()
    229     [1, 2, 3, 4]
    230     >>> G.edges(data = True)
    231     [(1, 2, {'weight': 3.0}), (2, 3, {'weight': 27.0}), (3, 4, {'weight': 3.0})]
    232 
    233     See Also
    234     --------
    235     read_weighted_edgelist
    236 
    237     """
    238     from ast import literal_eval
    239     if create_using is None:
    240         G=nx.Graph()
    241     else:
    242         try:
    243             G=create_using
    244             G.clear()
    245         except:
    246             raise TypeError("create_using input is not a NetworkX graph type")
    247 
    248     for line in lines:
    249         p=line.find(comments)
    250         if p>=0:
    251             line = line[:p]
    252         if not len(line):
    253             continue
    254         # split line, should have 2 or more
    255         s=line.strip().split(delimiter)
    256         if len(s)<2:
    257             continue
    258         u=s.pop(0)
    259         v=s.pop(0)
    260         d=s
    261         if nodetype is not None:
    262             try:
    263                 u=nodetype(u)
    264                 v=nodetype(v)
    265             except:
    266                 raise TypeError("Failed to convert nodes %s,%s to type %s."
    267                                 %(u,v,nodetype))
    268 
    269         if len(d)==0 or data is False:
    270             # no data or data type specified
    271             edgedata={}
    272         elif data is True:
    273             # no edge types specified
    274             try: # try to evaluate as dictionary
    275                 edgedata=dict(literal_eval(' '.join(d)))
    276             except:
    277                 raise TypeError(
    278                     "Failed to convert edge data (%s) to dictionary."%(d))
    279         else:
    280             # convert edge data to dictionary with specified keys and type
    281             if len(d)!=len(data):
    282                 raise IndexError(
    283                     "Edge data %s and data_keys %s are not the same length"%
    284                     (d, data))
    285             edgedata={}
    286             for (edge_key,edge_type),edge_value in zip(data,d):
    287                 try:
    288                     edge_value=edge_type(edge_value)
    289                 except:
    290                     raise TypeError(
    291                         "Failed to convert %s data %s to type %s."
    292                         %(edge_key, edge_value, edge_type))
    293                 edgedata.update({edge_key:edge_value})
    294         G.add_edge(u, v, attr_dict=edgedata)
    295     return G
    296 
    297 @open_file(0,mode='rb')
    298 def read_edgelist(path, comments="#", delimiter=None, create_using=None, 
    299                   nodetype=None, data=True, edgetype=None, encoding='utf-8'):
    300     """Read a graph from a list of edges.
    301 
    302     Parameters
    303     ----------
    304     path : file or string
    305        File or filename to write. If a file is provided, it must be
    306        opened in 'rb' mode.
    307        Filenames ending in .gz or .bz2 will be uncompressed.
    308     comments : string, optional
    309        The character used to indicate the start of a comment. 
    310     delimiter : string, optional
    311        The string used to separate values.  The default is whitespace.
    312     create_using : Graph container, optional, 
    313        Use specified container to build graph.  The default is networkx.Graph,
    314        an undirected graph.
    315     nodetype : int, float, str, Python type, optional
    316        Convert node data from strings to specified type
    317     data : bool or list of (label,type) tuples
    318        Tuples specifying dictionary key names and types for edge data
    319     edgetype : int, float, str, Python type, optional OBSOLETE
    320        Convert edge data from strings to specified type and use as 'weight'
    321     encoding: string, optional
    322        Specify which encoding to use when reading file.
    323 
    324     Returns
    325     -------
    326     G : graph
    327        A networkx Graph or other type specified with create_using
    328 
    329     Examples
    330     --------
    331     >>> nx.write_edgelist(nx.path_graph(4), "test.edgelist")
    332     >>> G=nx.read_edgelist("test.edgelist")
    333 
    334     >>> fh=open("test.edgelist", 'rb')
    335     >>> G=nx.read_edgelist(fh)
    336     >>> fh.close()
    337 
    338     >>> G=nx.read_edgelist("test.edgelist", nodetype=int)
    339     >>> G=nx.read_edgelist("test.edgelist",create_using=nx.DiGraph())
    340 
    341     Edgelist with data in a list:
    342 
    343     >>> textline = '1 2 3'
    344     >>> fh = open('test.edgelist','w')
    345     >>> d = fh.write(textline)
    346     >>> fh.close()
    347     >>> G = nx.read_edgelist('test.edgelist', nodetype=int, data=(('weight',float),))
    348     >>> G.nodes()
    349     [1, 2]
    350     >>> G.edges(data = True)
    351     [(1, 2, {'weight': 3.0})]
    352 
    353     See parse_edgelist() for more examples of formatting.
    354 
    355     See Also
    356     --------
    357     parse_edgelist
    358 
    359     Notes
    360     -----
    361     Since nodes must be hashable, the function nodetype must return hashable
    362     types (e.g. int, float, str, frozenset - or tuples of those, etc.) 
    363     """
    364     lines = (line.decode(encoding) for line in path)
    365     return parse_edgelist(lines,comments=comments, delimiter=delimiter,
    366                           create_using=create_using, nodetype=nodetype,
    367                           data=data)
    368 
    369 
    370 def write_weighted_edgelist(G, path, comments="#", 
    371                             delimiter=' ', encoding='utf-8'):
    372     """Write graph G as a list of edges with numeric weights.
    373 
    374     Parameters
    375     ----------
    376     G : graph
    377        A NetworkX graph
    378     path : file or string
    379        File or filename to write. If a file is provided, it must be
    380        opened in 'wb' mode.
    381        Filenames ending in .gz or .bz2 will be compressed.
    382     comments : string, optional
    383        The character used to indicate the start of a comment 
    384     delimiter : string, optional
    385        The string used to separate values.  The default is whitespace.
    386     encoding: string, optional
    387        Specify which encoding to use when writing file.
    388 
    389     Examples
    390     --------
    391     >>> G=nx.Graph()
    392     >>> G.add_edge(1,2,weight=7)
    393     >>> nx.write_weighted_edgelist(G, 'test.weighted.edgelist')
    394 
    395     See Also
    396     --------
    397     read_edgelist()
    398     write_edgelist()
    399     write_weighted_edgelist()
    400 
    401     """
    402     write_edgelist(G,path, comments=comments, delimiter=delimiter,
    403                    data=('weight',), encoding = encoding)
    404     
    405 def read_weighted_edgelist(path, comments="#", delimiter=None,
    406                            create_using=None, nodetype=None, encoding='utf-8'):
    407 
    408     """Read a graph as list of edges with numeric weights.
    409 
    410     Parameters
    411     ----------
    412     path : file or string
    413        File or filename to write. If a file is provided, it must be
    414        opened in 'rb' mode.
    415        Filenames ending in .gz or .bz2 will be uncompressed.
    416     comments : string, optional
    417        The character used to indicate the start of a comment. 
    418     delimiter : string, optional
    419        The string used to separate values.  The default is whitespace.
    420     create_using : Graph container, optional, 
    421        Use specified container to build graph.  The default is networkx.Graph,
    422        an undirected graph.
    423     nodetype : int, float, str, Python type, optional
    424        Convert node data from strings to specified type
    425     encoding: string, optional
    426        Specify which encoding to use when reading file.
    427 
    428     Returns
    429     -------
    430     G : graph
    431        A networkx Graph or other type specified with create_using
    432 
    433     Notes
    434     -----
    435     Since nodes must be hashable, the function nodetype must return hashable
    436     types (e.g. int, float, str, frozenset - or tuples of those, etc.) 
    437 
    438     Example edgelist file format.
    439 
    440     With numeric edge data:: 
    441 
    442      # read with 
    443      # >>> G=nx.read_weighted_edgelist(fh)
    444      # source target data  
    445      a b 1
    446      a c 3.14159
    447      d e 42
    448     """
    449     return read_edgelist(path,
    450                          comments=comments,
    451                          delimiter=delimiter,
    452                          create_using=create_using,
    453                          nodetype=nodetype,
    454                          data=(('weight',float),),
    455                          encoding = encoding
    456                          )
    457 
    458 
    459 # fixture for nose tests
    460 def teardown_module(module):
    461     import os
    462     os.unlink('test.edgelist')
    463     os.unlink('test.edgelist.gz')
    464     os.unlink('test.weighted.edgelist')
    465