Home | History | Annotate | Download | only in proto
      1 /*
      2  * Copyright 2014 Google Inc. All rights reserved.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *   http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 syntax = "proto3";
     18 
     19 package kythe.proto;
     20 option java_package = "com.google.devtools.kythe.proto";
     21 
     22 // Persistent storage server for Kythe analysis data.
     23 // See: http://www.kythe.io/docs/kythe-storage.html
     24 service GraphStore {
     25   // Read responds with all Entry messages that match the given ReadRequest.
     26   // The Read operation should be implemented with time complexity proportional
     27   // to the size of the return set.
     28   rpc Read(ReadRequest) returns (stream Entry) {}
     29 
     30   // Scan responds with all Entry messages matching the given ScanRequest.  If a
     31   // ScanRequest field is empty, any entry value for that field matches and will
     32   // be returned.  Scan is similar to Read, but with no time complexity
     33   // restrictions.
     34   rpc Scan(ScanRequest) returns (stream Entry) {}
     35 
     36   // Write atomically inserts or updates a collection of entries into the store.
     37   // Each update is a tuple of the form (kind, target, fact, value).  For each
     38   // such update, an entry (source, kind, target, fact, value) is written into
     39   // the store, replacing any existing entry (source, kind, target, fact,
     40   // value') that may exist.  Note that this operation cannot delete any data
     41   // from the store; entries are only ever inserted or updated.  Apart from
     42   // acting atomically, no other constraints are placed on the implementation.
     43   rpc Write(WriteRequest) returns (WriteReply) {}
     44 }
     45 
     46 // ShardedGraphStores can be arbitrarily sharded for parallel processing.
     47 // Depending on the implementation, these methods may not return consistent
     48 // results when the store is being written to.  Shards are indexed from 0.
     49 service ShardedGraphStore {
     50   // Count returns the number of entries in the given shard.
     51   rpc Count(CountRequest) returns (CountReply) {}
     52 
     53   // Shard responds with each Entry in the given shard.
     54   rpc Shard(ShardRequest) returns (stream Entry) {}
     55 }
     56 
     57 // VName is a proto representation of a vector name.
     58 //
     59 // Rules:
     60 //  - All fields must be optional, and must have default values.
     61 //  - No field may ever be removed.  If a field is deprecated, it may be
     62 //    renamed or marked with a comment, but must not be deleted.
     63 //  - New fields are always added to the end of the message.
     64 //  - All fields must be strings, not messages.
     65 //
     66 // One of the key principles is that we want as few fields as possible in a
     67 // vname.  We're not trying to exhaust the possible dimensions along which a
     68 // name could vary, but to find a minimal basis. Be conservative.
     69 message VName {
     70   // A language-specific signature assigned by the analyzer.
     71   // e.g., "com.google.common.collect.Lists.newLinkedList<#1>()"
     72   string signature = 1;
     73 
     74   // The corpus this name belongs to.
     75   // e.g., "kythe", "chromium", "github.com/creachadair/imath", "aosp"
     76   // The corpus label "kythe" is reserved for internal use.
     77   string corpus = 2;
     78 
     79   // A corpus-specific root label, designating a subordinate collection within
     80   // the corpus.  If a corpus stores files in unrelated directory structures,
     81   // for example, the root can be used to distinguish them.  Or, of a corpus
     82   // incorporates subprojects, the root can be a project ID that it governs.
     83   // This may also be used to distinguish virtual subgroups of a corpus such as
     84   // generated files.
     85   string root = 3;
     86 
     87   // A path-structured label describing the location of this object relative to
     88   // the corpus and the root.  For code, this will generally be the relative
     89   // path to the file containing the code, e.g., "storage/service.go" in kythe.
     90   //
     91   // However, this need not be a true file path; virtual objects like figments
     92   // can assign an ad-hoc abstract ID, or omit it entirely.
     93   //
     94   // Examples:
     95   //   "devools/kythe/platform/go/datastore.go" (a file)
     96   //   "type/cpp/void.cc" (a type figment)
     97   string path = 4;
     98 
     99   // The language this name belongs to.
    100   // e.g., "c++", "python", "elisp", "haskell", "java"
    101   //
    102   // The schema will define specific labels for each supported language, so we
    103   // don't wind up with a confusion of names like "cxx", "cpp", "C++", etc.
    104   // Prototype: Official language name converted to lowercase.  If a version
    105   // number is necessary, include it, e.g., "python3".
    106   string language = 5;
    107 
    108   // Other fields we may need in the future, but do not currently use:
    109   // branch -- a branch name within the corpus depot, e.g., "gslb_branch".
    110   // client -- a source-control client ID, e.g., "sergey:googlex:8:citc".
    111 
    112   // Note: We have intentionally NOT included a revision or timestamp here.
    113   // Time should be recorded as facts belonging to the appropriate Nodes and
    114   // Edges.  Having records of when something existed may be important, but time
    115   // is not a good axis for a name -- a name should say "what" something is, not
    116   // "when".  So we will store timestamps, revisions, and other markers of this
    117   // kind as facts inside the graph.
    118 }
    119 
    120 message VNameMask {
    121   bool signature = 1;
    122   bool corpus = 2;
    123   bool root = 3;
    124   bool path = 4;
    125   bool language = 5;
    126 }
    127 
    128 // An Entry associates a fact with a graph object (node or edge).  This is the
    129 // the primary unit of storage.
    130 message Entry {
    131   VName source = 1;
    132 
    133   // The following two fields must either be both empty, or both nonempty.
    134   string edge_kind = 2;
    135   VName target = 3;
    136 
    137   // The grammar for fact_name:
    138   //  name   = "/" | 1*path
    139   //  path   = "/" word
    140   //  word   = 1*{LETTER|DIGIT|PUNCT}
    141   //  LETTER = [A-Za-z]
    142   //  DIGIT  = [0-9]
    143   //  PUNCT  = [-.@#$%&_+:()]
    144   string fact_name = 4;
    145   bytes  fact_value = 5;
    146 }
    147 
    148 // A collection of Entry instances.
    149 message Entries {
    150   repeated Entry entries = 1;
    151 }
    152 
    153 // Request for a stream of Entry objects from a GraphStore.  Read operations
    154 // should be implemented with time complexity proportional to the size of the
    155 // return set.
    156 message ReadRequest {
    157   // Return entries having this source VName, which may not be empty.
    158   VName source = 1;
    159 
    160   // Return entries having this edge kind; if empty, only entries with an empty
    161   // edge kind are returned; if "*", entries of any edge kind are returned.
    162   string edge_kind = 2;
    163 }
    164 
    165 // Request to write Entry objects to a GraphStore
    166 message WriteRequest {
    167   message Update {
    168     string edge_kind = 1;
    169     VName target = 2;
    170     string fact_name = 3;
    171     bytes fact_value = 4;
    172   }
    173 
    174   VName source = 1;
    175   repeated Update update = 2;
    176 }
    177 
    178 // Response to a WriteRequest
    179 message WriteReply {}
    180 
    181 // Request for a stream of Entry objects resulting from a full scan of a
    182 // GraphStore.
    183 message ScanRequest {
    184   // Return entries having this target VName; if empty, any target field is
    185   // matched, including empty.
    186   VName target = 1;
    187 
    188   // Return entries having this kind; if empty, any kind is matched, including
    189   // empty.
    190   string edge_kind = 2;
    191 
    192   // Return entries having fact labels with this prefix; if empty, any fact
    193   // label is matched,
    194   string fact_prefix = 3;
    195 }
    196 
    197 // Request for the size of the shard at the given index.
    198 message CountRequest {
    199   int64 index = 1;
    200   int64 shards = 2;
    201 }
    202 
    203 // Response for a CountRequest
    204 message CountReply {
    205   // Total number of entries in the specified shard.
    206   int64 entries = 1;
    207 }
    208 
    209 // Request for a stream of Entry objects in the given shard.
    210 message ShardRequest {
    211   int64 index = 1;
    212   int64 shards = 2;
    213 }
    214