Home | History | Annotate | Download | only in dyn
      1 <html><body>
      2 <style>
      3 
      4 body, h1, h2, h3, div, span, p, pre, a {
      5   margin: 0;
      6   padding: 0;
      7   border: 0;
      8   font-weight: inherit;
      9   font-style: inherit;
     10   font-size: 100%;
     11   font-family: inherit;
     12   vertical-align: baseline;
     13 }
     14 
     15 body {
     16   font-size: 13px;
     17   padding: 1em;
     18 }
     19 
     20 h1 {
     21   font-size: 26px;
     22   margin-bottom: 1em;
     23 }
     24 
     25 h2 {
     26   font-size: 24px;
     27   margin-bottom: 1em;
     28 }
     29 
     30 h3 {
     31   font-size: 20px;
     32   margin-bottom: 1em;
     33   margin-top: 1em;
     34 }
     35 
     36 pre, code {
     37   line-height: 1.5;
     38   font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
     39 }
     40 
     41 pre {
     42   margin-top: 0.5em;
     43 }
     44 
     45 h1, h2, h3, p {
     46   font-family: Arial, sans serif;
     47 }
     48 
     49 h1, h2, h3 {
     50   border-bottom: solid #CCC 1px;
     51 }
     52 
     53 .toc_element {
     54   margin-top: 0.5em;
     55 }
     56 
     57 .firstline {
     58   margin-left: 2 em;
     59 }
     60 
     61 .method  {
     62   margin-top: 1em;
     63   border: solid 1px #CCC;
     64   padding: 1em;
     65   background: #EEE;
     66 }
     67 
     68 .details {
     69   font-weight: bold;
     70   font-size: 14px;
     71 }
     72 
     73 </style>
     74 
     75 <h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.templates.html">templates</a></h1>
     76 <h2>Instance Methods</h2>
     77 <p class="toc_element">
     78   <code><a href="#create">create(projectId, body, x__xgafv=None)</a></code></p>
     79 <p class="firstline">Creates a Cloud Dataflow job from a template.</p>
     80 <p class="toc_element">
     81   <code><a href="#get">get(projectId, gcsPath=None, location=None, x__xgafv=None, view=None)</a></code></p>
     82 <p class="firstline">Get the template associated with a template.</p>
     83 <p class="toc_element">
     84   <code><a href="#launch">launch(projectId, body, gcsPath=None, location=None, validateOnly=None, x__xgafv=None)</a></code></p>
     85 <p class="firstline">Launch a template.</p>
     86 <h3>Method Details</h3>
     87 <div class="method">
     88     <code class="details" id="create">create(projectId, body, x__xgafv=None)</code>
     89   <pre>Creates a Cloud Dataflow job from a template.
     90 
     91 Args:
     92   projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
     93   body: object, The request body. (required)
     94     The object takes the form of:
     95 
     96 { # A request to create a Cloud Dataflow job from a template.
     97     "environment": { # The environment values to set at runtime. # The runtime environment for the job.
     98       "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
     99           # template if not specified.
    100       "zone": "A String", # The Compute Engine [availability
    101           # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
    102           # for launching worker instances to run your pipeline.
    103       "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
    104           # Use with caution.
    105       "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
    106           # Must be a valid Cloud Storage URL, beginning with `gs://`.
    107       "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
    108       "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
    109           # available to your pipeline during execution, from 1 to 1000.
    110     },
    111     "gcsPath": "A String", # Required. A Cloud Storage path to the template from which to
    112         # create the job.
    113         # Must be a valid Cloud Storage URL, beginning with `gs://`.
    114     "location": "A String", # The location to which to direct the request.
    115     "parameters": { # The runtime parameters to pass to the job.
    116       "a_key": "A String",
    117     },
    118     "jobName": "A String", # Required. The job name to use for the created job.
    119   }
    120 
    121   x__xgafv: string, V1 error format.
    122     Allowed values
    123       1 - v1 error format
    124       2 - v2 error format
    125 
    126 Returns:
    127   An object of the form:
    128 
    129     { # Defines a job to be run by the Cloud Dataflow service.
    130       "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
    131           # If this field is set, the service will ensure its uniqueness.
    132           # The request to create a job will fail if the service has knowledge of a
    133           # previously submitted job with the same client's ID and job name.
    134           # The caller may use this field to ensure idempotence of job
    135           # creation across retried attempts to create a job.
    136           # By default, the field is empty and, in that case, the service ignores it.
    137       "requestedState": "A String", # The job's requested state.
    138           #
    139           # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
    140           # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
    141           # also be used to directly set a job's requested state to
    142           # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
    143           # job if it has not already reached a terminal state.
    144       "name": "A String", # The user-specified Cloud Dataflow job name.
    145           #
    146           # Only one Job with a given name may exist in a project at any
    147           # given time. If a caller attempts to create a Job with the same
    148           # name as an already-existing Job, the attempt returns the
    149           # existing Job.
    150           #
    151           # The name must match the regular expression
    152           # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
    153       "location": "A String", # The location that contains this job.
    154       "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
    155           # `JOB_STATE_UPDATED`), this field contains the ID of that job.
    156       "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
    157       "currentState": "A String", # The current state of the job.
    158           #
    159           # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
    160           # specified.
    161           #
    162           # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
    163           # terminal state. After a job has reached a terminal state, no
    164           # further state updates may be made.
    165           #
    166           # This field may be mutated by the Cloud Dataflow service;
    167           # callers cannot mutate it.
    168       "labels": { # User-defined labels for this job.
    169           #
    170           # The labels map can contain no more than 64 entries.  Entries of the labels
    171           # map are UTF8 strings that comply with the following restrictions:
    172           #
    173           # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
    174           # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
    175           # * Both keys and values are additionally constrained to be <= 128 bytes in
    176           # size.
    177         "a_key": "A String",
    178       },
    179       "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
    180           # corresponding name prefixes of the new job.
    181         "a_key": "A String",
    182       },
    183       "id": "A String", # The unique ID of this job.
    184           #
    185           # This field is set by the Cloud Dataflow service when the Job is
    186           # created, and is immutable for the life of the job.
    187       "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
    188         "version": { # A structure describing which components and their versions of the service
    189             # are required in order to run the job.
    190           "a_key": "", # Properties of the object.
    191         },
    192         "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
    193             # storage.  The system will append the suffix "/temp-{JOBNAME} to
    194             # this resource prefix, where {JOBNAME} is the value of the
    195             # job_name field.  The resulting bucket and object prefix is used
    196             # as the prefix of the resources used to store temporary data
    197             # needed during the job execution.  NOTE: This will override the
    198             # value in taskrunner_settings.
    199             # The supported resource type is:
    200             #
    201             # Google Cloud Storage:
    202             #
    203             #   storage.googleapis.com/{bucket}/{object}
    204             #   bucket.storage.googleapis.com/{object}
    205         "internalExperiments": { # Experimental settings.
    206           "a_key": "", # Properties of the object. Contains field @type with type URL.
    207         },
    208         "dataset": "A String", # The dataset for the current project where various workflow
    209             # related tables are stored.
    210             #
    211             # The supported resource type is:
    212             #
    213             # Google BigQuery:
    214             #   bigquery.googleapis.com/{dataset}
    215         "experiments": [ # The list of experiments to enable.
    216           "A String",
    217         ],
    218         "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
    219         "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
    220             # options are passed through the service and are used to recreate the
    221             # SDK pipeline options on the worker in a language agnostic and platform
    222             # independent way.
    223           "a_key": "", # Properties of the object.
    224         },
    225         "userAgent": { # A description of the process that generated the request.
    226           "a_key": "", # Properties of the object.
    227         },
    228         "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
    229             # unspecified, the service will attempt to choose a reasonable
    230             # default.  This should be in the form of the API service name,
    231             # e.g. "compute.googleapis.com".
    232         "workerPools": [ # The worker pools. At least one "harness" worker pool must be
    233             # specified in order for the job to have workers.
    234           { # Describes one particular pool of Cloud Dataflow workers to be
    235               # instantiated by the Cloud Dataflow service in order to perform the
    236               # computations required by a job.  Note that a workflow job may use
    237               # multiple pools, in order to match the various computational
    238               # requirements of the various stages of the job.
    239             "diskSourceImage": "A String", # Fully qualified source image for disks.
    240             "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
    241                 # using the standard Dataflow task runner.  Users should ignore
    242                 # this field.
    243               "workflowFileName": "A String", # The file to store the workflow in.
    244               "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
    245                   # will not be uploaded.
    246                   #
    247                   # The supported resource type is:
    248                   #
    249                   # Google Cloud Storage:
    250                   #   storage.googleapis.com/{bucket}/{object}
    251                   #   bucket.storage.googleapis.com/{object}
    252               "commandlinesFileName": "A String", # The file to store preprocessing commands in.
    253               "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
    254                 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
    255                 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
    256                     # "shuffle/v1beta1".
    257                 "workerId": "A String", # The ID of the worker running this pipeline.
    258                 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
    259                     #
    260                     # When workers access Google Cloud APIs, they logically do so via
    261                     # relative URLs.  If this field is specified, it supplies the base
    262                     # URL to use for resolving these relative URLs.  The normative
    263                     # algorithm used is defined by RFC 1808, "Relative Uniform Resource
    264                     # Locators".
    265                     #
    266                     # If not specified, the default value is "http://www.googleapis.com/"
    267                 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
    268                     # "dataflow/v1b3/projects".
    269                 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
    270                     # storage.
    271                     #
    272                     # The supported resource type is:
    273                     #
    274                     # Google Cloud Storage:
    275                     #
    276                     #   storage.googleapis.com/{bucket}/{object}
    277                     #   bucket.storage.googleapis.com/{object}
    278               },
    279               "vmId": "A String", # The ID string of the VM.
    280               "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
    281               "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
    282               "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
    283                   # access the Cloud Dataflow API.
    284                 "A String",
    285               ],
    286               "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
    287                   # taskrunner; e.g. "root".
    288               "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
    289                   #
    290                   # When workers access Google Cloud APIs, they logically do so via
    291                   # relative URLs.  If this field is specified, it supplies the base
    292                   # URL to use for resolving these relative URLs.  The normative
    293                   # algorithm used is defined by RFC 1808, "Relative Uniform Resource
    294                   # Locators".
    295                   #
    296                   # If not specified, the default value is "http://www.googleapis.com/"
    297               "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
    298                   # taskrunner; e.g. "wheel".
    299               "languageHint": "A String", # The suggested backend language.
    300               "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
    301                   # console.
    302               "streamingWorkerMainClass": "A String", # The streaming worker main class name.
    303               "logDir": "A String", # The directory on the VM to store logs.
    304               "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
    305               "harnessCommand": "A String", # The command to launch the worker harness.
    306               "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
    307                   # temporary storage.
    308                   #
    309                   # The supported resource type is:
    310                   #
    311                   # Google Cloud Storage:
    312                   #   storage.googleapis.com/{bucket}/{object}
    313                   #   bucket.storage.googleapis.com/{object}
    314               "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
    315             },
    316             "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
    317                 # are supported.
    318             "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
    319                 # service will attempt to choose a reasonable default.
    320             "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
    321                 # the service will use the network "default".
    322             "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
    323                 # will attempt to choose a reasonable default.
    324             "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
    325                 # attempt to choose a reasonable default.
    326             "dataDisks": [ # Data disks that are used by a VM in this workflow.
    327               { # Describes the data disk used by a workflow job.
    328                 "mountPoint": "A String", # Directory in a VM where disk is mounted.
    329                 "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
    330                     # attempt to choose a reasonable default.
    331                 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
    332                     # must be a disk type appropriate to the project and zone in which
    333                     # the workers will run.  If unknown or unspecified, the service
    334                     # will attempt to choose a reasonable default.
    335                     #
    336                     # For example, the standard persistent disk type is a resource name
    337                     # typically ending in "pd-standard".  If SSD persistent disks are
    338                     # available, the resource name typically ends with "pd-ssd".  The
    339                     # actual valid values are defined the Google Compute Engine API,
    340                     # not by the Cloud Dataflow API; consult the Google Compute Engine
    341                     # documentation for more information about determining the set of
    342                     # available disk types for a particular project and zone.
    343                     #
    344                     # Google Compute Engine Disk types are local to a particular
    345                     # project in a particular zone, and so the resource name will
    346                     # typically look something like this:
    347                     #
    348                     # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
    349               },
    350             ],
    351             "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
    352                 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
    353                 # `TEARDOWN_NEVER`.
    354                 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
    355                 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
    356                 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
    357                 # down.
    358                 #
    359                 # If the workers are not torn down by the service, they will
    360                 # continue to run and use Google Compute Engine VM resources in the
    361                 # user's project until they are explicitly terminated by the user.
    362                 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
    363                 # policy except for small, manually supervised test jobs.
    364                 #
    365                 # If unknown or unspecified, the service will attempt to choose a reasonable
    366                 # default.
    367             "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
    368                 # Compute Engine API.
    369             "ipConfiguration": "A String", # Configuration for VM IPs.
    370             "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
    371                 # service will choose a number of threads (according to the number of cores
    372                 # on the selected machine type for batch, or 1 by convention for streaming).
    373             "poolArgs": { # Extra arguments for this worker pool.
    374               "a_key": "", # Properties of the object. Contains field @type with type URL.
    375             },
    376             "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
    377                 # execute the job.  If zero or unspecified, the service will
    378                 # attempt to choose a reasonable default.
    379             "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
    380                 # harness, residing in Google Container Registry.
    381             "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
    382                 # the form "regions/REGION/subnetworks/SUBNETWORK".
    383             "packages": [ # Packages to be installed on workers.
    384               { # The packages that must be installed in order for a worker to run the
    385                   # steps of the Cloud Dataflow job that will be assigned to its worker
    386                   # pool.
    387                   #
    388                   # This is the mechanism by which the Cloud Dataflow SDK causes code to
    389                   # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
    390                   # might use this to install jars containing the user's code and all of the
    391                   # various dependencies (libraries, data files, etc.) required in order
    392                   # for that code to run.
    393                 "location": "A String", # The resource to read the package from. The supported resource type is:
    394                     #
    395                     # Google Cloud Storage:
    396                     #
    397                     #   storage.googleapis.com/{bucket}
    398                     #   bucket.storage.googleapis.com/
    399                 "name": "A String", # The name of the package.
    400               },
    401             ],
    402             "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
    403               "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
    404               "algorithm": "A String", # The algorithm to use for autoscaling.
    405             },
    406             "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
    407                 # select a default set of packages which are useful to worker
    408                 # harnesses written in a particular language.
    409             "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
    410                 # attempt to choose a reasonable default.
    411             "metadata": { # Metadata to set on the Google Compute Engine VMs.
    412               "a_key": "A String",
    413             },
    414           },
    415         ],
    416       },
    417       "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
    418           # A description of the user pipeline and stages through which it is executed.
    419           # Created by Cloud Dataflow service.  Only retrieved with
    420           # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
    421           # form.  This data is provided by the Dataflow service for ease of visualizing
    422           # the pipeline and interpretting Dataflow provided metrics.
    423         "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
    424           { # Description of the type, names/ids, and input/outputs for a transform.
    425             "kind": "A String", # Type of transform.
    426             "name": "A String", # User provided name for this transform instance.
    427             "inputCollectionName": [ # User names for all collection inputs to this transform.
    428               "A String",
    429             ],
    430             "displayData": [ # Transform-specific display data.
    431               { # Data provided with a pipeline or transform to provide descriptive info.
    432                 "shortStrValue": "A String", # A possible additional shorter value to display.
    433                     # For example a java_class_name_value of com.mypackage.MyDoFn
    434                     # will be stored with MyDoFn as the short_str_value and
    435                     # com.mypackage.MyDoFn as the java_class_name value.
    436                     # short_str_value can be displayed and java_class_name_value
    437                     # will be displayed as a tooltip.
    438                 "durationValue": "A String", # Contains value if the data is of duration type.
    439                 "url": "A String", # An optional full URL.
    440                 "floatValue": 3.14, # Contains value if the data is of float type.
    441                 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
    442                     # language namespace (i.e. python module) which defines the display data.
    443                     # This allows a dax monitoring system to specially handle the data
    444                     # and perform custom rendering.
    445                 "javaClassValue": "A String", # Contains value if the data is of java class type.
    446                 "label": "A String", # An optional label to display in a dax UI for the element.
    447                 "boolValue": True or False, # Contains value if the data is of a boolean type.
    448                 "strValue": "A String", # Contains value if the data is of string type.
    449                 "key": "A String", # The key identifying the display data.
    450                     # This is intended to be used as a label for the display data
    451                     # when viewed in a dax monitoring system.
    452                 "int64Value": "A String", # Contains value if the data is of int64 type.
    453                 "timestampValue": "A String", # Contains value if the data is of timestamp type.
    454               },
    455             ],
    456             "outputCollectionName": [ # User  names for all collection outputs to this transform.
    457               "A String",
    458             ],
    459             "id": "A String", # SDK generated id of this transform instance.
    460           },
    461         ],
    462         "displayData": [ # Pipeline level display data.
    463           { # Data provided with a pipeline or transform to provide descriptive info.
    464             "shortStrValue": "A String", # A possible additional shorter value to display.
    465                 # For example a java_class_name_value of com.mypackage.MyDoFn
    466                 # will be stored with MyDoFn as the short_str_value and
    467                 # com.mypackage.MyDoFn as the java_class_name value.
    468                 # short_str_value can be displayed and java_class_name_value
    469                 # will be displayed as a tooltip.
    470             "durationValue": "A String", # Contains value if the data is of duration type.
    471             "url": "A String", # An optional full URL.
    472             "floatValue": 3.14, # Contains value if the data is of float type.
    473             "namespace": "A String", # The namespace for the key. This is usually a class name or programming
    474                 # language namespace (i.e. python module) which defines the display data.
    475                 # This allows a dax monitoring system to specially handle the data
    476                 # and perform custom rendering.
    477             "javaClassValue": "A String", # Contains value if the data is of java class type.
    478             "label": "A String", # An optional label to display in a dax UI for the element.
    479             "boolValue": True or False, # Contains value if the data is of a boolean type.
    480             "strValue": "A String", # Contains value if the data is of string type.
    481             "key": "A String", # The key identifying the display data.
    482                 # This is intended to be used as a label for the display data
    483                 # when viewed in a dax monitoring system.
    484             "int64Value": "A String", # Contains value if the data is of int64 type.
    485             "timestampValue": "A String", # Contains value if the data is of timestamp type.
    486           },
    487         ],
    488         "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
    489           { # Description of the composing transforms, names/ids, and input/outputs of a
    490               # stage of execution.  Some composing transforms and sources may have been
    491               # generated by the Dataflow service during execution planning.
    492             "componentSource": [ # Collections produced and consumed by component transforms of this stage.
    493               { # Description of an interstitial value between transforms in an execution
    494                   # stage.
    495                 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
    496                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
    497                     # source is most closely associated.
    498                 "name": "A String", # Dataflow service generated name for this source.
    499               },
    500             ],
    501             "kind": "A String", # Type of tranform this stage is executing.
    502             "name": "A String", # Dataflow service generated name for this stage.
    503             "outputSource": [ # Output sources for this stage.
    504               { # Description of an input or output of an execution stage.
    505                 "userName": "A String", # Human-readable name for this source; may be user or system generated.
    506                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
    507                     # source is most closely associated.
    508                 "name": "A String", # Dataflow service generated name for this source.
    509                 "sizeBytes": "A String", # Size of the source, if measurable.
    510               },
    511             ],
    512             "inputSource": [ # Input sources for this stage.
    513               { # Description of an input or output of an execution stage.
    514                 "userName": "A String", # Human-readable name for this source; may be user or system generated.
    515                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
    516                     # source is most closely associated.
    517                 "name": "A String", # Dataflow service generated name for this source.
    518                 "sizeBytes": "A String", # Size of the source, if measurable.
    519               },
    520             ],
    521             "componentTransform": [ # Transforms that comprise this execution stage.
    522               { # Description of a transform executed as part of an execution stage.
    523                 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
    524                 "originalTransform": "A String", # User name for the original user transform with which this transform is
    525                     # most closely associated.
    526                 "name": "A String", # Dataflow service generated name for this source.
    527               },
    528             ],
    529             "id": "A String", # Dataflow service generated id for this stage.
    530           },
    531         ],
    532       },
    533       "steps": [ # The top-level steps that constitute the entire job.
    534         { # Defines a particular step within a Cloud Dataflow job.
    535             #
    536             # A job consists of multiple steps, each of which performs some
    537             # specific operation as part of the overall job.  Data is typically
    538             # passed from one step to another as part of the job.
    539             #
    540             # Here's an example of a sequence of steps which together implement a
    541             # Map-Reduce job:
    542             #
    543             #   * Read a collection of data from some source, parsing the
    544             #     collection's elements.
    545             #
    546             #   * Validate the elements.
    547             #
    548             #   * Apply a user-defined function to map each element to some value
    549             #     and extract an element-specific key value.
    550             #
    551             #   * Group elements with the same key into a single element with
    552             #     that key, transforming a multiply-keyed collection into a
    553             #     uniquely-keyed collection.
    554             #
    555             #   * Write the elements out to some data sink.
    556             #
    557             # Note that the Cloud Dataflow service may be used to run many different
    558             # types of jobs, not just Map-Reduce.
    559           "kind": "A String", # The kind of step in the Cloud Dataflow job.
    560           "properties": { # Named properties associated with the step. Each kind of
    561               # predefined step has its own required set of properties.
    562               # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
    563             "a_key": "", # Properties of the object.
    564           },
    565           "name": "A String", # The name that identifies the step. This must be unique for each
    566               # step with respect to all other steps in the Cloud Dataflow job.
    567         },
    568       ],
    569       "currentStateTime": "A String", # The timestamp associated with the current state.
    570       "tempFiles": [ # A set of files the system should be aware of that are used
    571           # for temporary storage. These temporary files will be
    572           # removed on job completion.
    573           # No duplicates are allowed.
    574           # No file patterns are supported.
    575           #
    576           # The supported files are:
    577           #
    578           # Google Cloud Storage:
    579           #
    580           #    storage.googleapis.com/{bucket}/{object}
    581           #    bucket.storage.googleapis.com/{object}
    582         "A String",
    583       ],
    584       "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
    585           # callers cannot mutate it.
    586         { # A message describing the state of a particular execution stage.
    587           "executionStageName": "A String", # The name of the execution stage.
    588           "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
    589           "currentStateTime": "A String", # The time at which the stage transitioned to this state.
    590         },
    591       ],
    592       "type": "A String", # The type of Cloud Dataflow job.
    593       "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
    594           # Cloud Dataflow service.
    595       "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
    596           # of the job it replaced.
    597           #
    598           # When sending a `CreateJobRequest`, you can update a job by specifying it
    599           # here. The job named here is stopped, and its intermediate state is
    600           # transferred to this job.
    601       "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
    602           # isn't contained in the submitted job.
    603         "stages": { # A mapping from each stage to the information about that stage.
    604           "a_key": { # Contains information about how a particular
    605               # google.dataflow.v1beta3.Step will be executed.
    606             "stepName": [ # The steps associated with the execution stage.
    607                 # Note that stages may have several steps, and that a given step
    608                 # might be run by more than one stage.
    609               "A String",
    610             ],
    611           },
    612         },
    613       },
    614     }</pre>
    615 </div>
    616 
    617 <div class="method">
    618     <code class="details" id="get">get(projectId, gcsPath=None, location=None, x__xgafv=None, view=None)</code>
    619   <pre>Get the template associated with a template.
    620 
    621 Args:
    622   projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
    623   gcsPath: string, Required. A Cloud Storage path to the template from which to
    624 create the job.
    625 Must be a valid Cloud Storage URL, beginning with `gs://`.
    626   location: string, The location to which to direct the request.
    627   x__xgafv: string, V1 error format.
    628     Allowed values
    629       1 - v1 error format
    630       2 - v2 error format
    631   view: string, The view to retrieve. Defaults to METADATA_ONLY.
    632 
    633 Returns:
    634   An object of the form:
    635 
    636     { # The response to a GetTemplate request.
    637     "status": { # The `Status` type defines a logical error model that is suitable for different # The status of the get template request. Any problems with the
    638         # request will be indicated in the error_details.
    639         # programming environments, including REST APIs and RPC APIs. It is used by
    640         # [gRPC](https://github.com/grpc). The error model is designed to be:
    641         #
    642         # - Simple to use and understand for most users
    643         # - Flexible enough to meet unexpected needs
    644         #
    645         # # Overview
    646         #
    647         # The `Status` message contains three pieces of data: error code, error message,
    648         # and error details. The error code should be an enum value of
    649         # google.rpc.Code, but it may accept additional error codes if needed.  The
    650         # error message should be a developer-facing English message that helps
    651         # developers *understand* and *resolve* the error. If a localized user-facing
    652         # error message is needed, put the localized message in the error details or
    653         # localize it in the client. The optional error details may contain arbitrary
    654         # information about the error. There is a predefined set of error detail types
    655         # in the package `google.rpc` that can be used for common error conditions.
    656         #
    657         # # Language mapping
    658         #
    659         # The `Status` message is the logical representation of the error model, but it
    660         # is not necessarily the actual wire format. When the `Status` message is
    661         # exposed in different client libraries and different wire protocols, it can be
    662         # mapped differently. For example, it will likely be mapped to some exceptions
    663         # in Java, but more likely mapped to some error codes in C.
    664         #
    665         # # Other uses
    666         #
    667         # The error model and the `Status` message can be used in a variety of
    668         # environments, either with or without APIs, to provide a
    669         # consistent developer experience across different environments.
    670         #
    671         # Example uses of this error model include:
    672         #
    673         # - Partial errors. If a service needs to return partial errors to the client,
    674         #     it may embed the `Status` in the normal response to indicate the partial
    675         #     errors.
    676         #
    677         # - Workflow errors. A typical workflow has multiple steps. Each step may
    678         #     have a `Status` message for error reporting.
    679         #
    680         # - Batch operations. If a client uses batch request and batch response, the
    681         #     `Status` message should be used directly inside batch response, one for
    682         #     each error sub-response.
    683         #
    684         # - Asynchronous operations. If an API call embeds asynchronous operation
    685         #     results in its response, the status of those operations should be
    686         #     represented directly using the `Status` message.
    687         #
    688         # - Logging. If some API errors are stored in logs, the message `Status` could
    689         #     be used directly after any stripping needed for security/privacy reasons.
    690       "message": "A String", # A developer-facing error message, which should be in English. Any
    691           # user-facing error message should be localized and sent in the
    692           # google.rpc.Status.details field, or localized by the client.
    693       "code": 42, # The status code, which should be an enum value of google.rpc.Code.
    694       "details": [ # A list of messages that carry the error details.  There will be a
    695           # common set of message types for APIs to use.
    696         {
    697           "a_key": "", # Properties of the object. Contains field @type with type URL.
    698         },
    699       ],
    700     },
    701     "metadata": { # Metadata describing a template. # The template metadata describing the template name, available
    702         # parameters, etc.
    703       "name": "A String", # Required. The name of the template.
    704       "parameters": [ # The parameters for the template.
    705         { # Metadata for a specific parameter.
    706           "regexes": [ # Optional. Regexes that the parameter must match.
    707             "A String",
    708           ],
    709           "helpText": "A String", # Required. The help text to display for the parameter.
    710           "name": "A String", # Required. The name of the parameter.
    711           "isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.
    712           "label": "A String", # Required. The label to display for the parameter.
    713         },
    714       ],
    715       "description": "A String", # Optional. A description of the template.
    716     },
    717   }</pre>
    718 </div>
    719 
    720 <div class="method">
    721     <code class="details" id="launch">launch(projectId, body, gcsPath=None, location=None, validateOnly=None, x__xgafv=None)</code>
    722   <pre>Launch a template.
    723 
    724 Args:
    725   projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
    726   body: object, The request body. (required)
    727     The object takes the form of:
    728 
    729 { # Parameters to provide to the template being launched.
    730     "environment": { # The environment values to set at runtime. # The runtime environment for the job.
    731       "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
    732           # template if not specified.
    733       "zone": "A String", # The Compute Engine [availability
    734           # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
    735           # for launching worker instances to run your pipeline.
    736       "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
    737           # Use with caution.
    738       "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
    739           # Must be a valid Cloud Storage URL, beginning with `gs://`.
    740       "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
    741       "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
    742           # available to your pipeline during execution, from 1 to 1000.
    743     },
    744     "parameters": { # The runtime parameters to pass to the job.
    745       "a_key": "A String",
    746     },
    747     "jobName": "A String", # Required. The job name to use for the created job.
    748   }
    749 
    750   gcsPath: string, Required. A Cloud Storage path to the template from which to create
    751 the job.
    752 Must be valid Cloud Storage URL, beginning with 'gs://'.
    753   location: string, The location to which to direct the request.
    754   validateOnly: boolean, If true, the request is validated but not actually executed.
    755 Defaults to false.
    756   x__xgafv: string, V1 error format.
    757     Allowed values
    758       1 - v1 error format
    759       2 - v2 error format
    760 
    761 Returns:
    762   An object of the form:
    763 
    764     { # Response to the request to launch a template.
    765     "job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and
    766         # the job was successfully launched.
    767         "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
    768             # If this field is set, the service will ensure its uniqueness.
    769             # The request to create a job will fail if the service has knowledge of a
    770             # previously submitted job with the same client's ID and job name.
    771             # The caller may use this field to ensure idempotence of job
    772             # creation across retried attempts to create a job.
    773             # By default, the field is empty and, in that case, the service ignores it.
    774         "requestedState": "A String", # The job's requested state.
    775             #
    776             # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
    777             # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
    778             # also be used to directly set a job's requested state to
    779             # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
    780             # job if it has not already reached a terminal state.
    781         "name": "A String", # The user-specified Cloud Dataflow job name.
    782             #
    783             # Only one Job with a given name may exist in a project at any
    784             # given time. If a caller attempts to create a Job with the same
    785             # name as an already-existing Job, the attempt returns the
    786             # existing Job.
    787             #
    788             # The name must match the regular expression
    789             # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
    790         "location": "A String", # The location that contains this job.
    791         "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
    792             # `JOB_STATE_UPDATED`), this field contains the ID of that job.
    793         "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
    794         "currentState": "A String", # The current state of the job.
    795             #
    796             # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
    797             # specified.
    798             #
    799             # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
    800             # terminal state. After a job has reached a terminal state, no
    801             # further state updates may be made.
    802             #
    803             # This field may be mutated by the Cloud Dataflow service;
    804             # callers cannot mutate it.
    805         "labels": { # User-defined labels for this job.
    806             #
    807             # The labels map can contain no more than 64 entries.  Entries of the labels
    808             # map are UTF8 strings that comply with the following restrictions:
    809             #
    810             # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
    811             # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
    812             # * Both keys and values are additionally constrained to be <= 128 bytes in
    813             # size.
    814           "a_key": "A String",
    815         },
    816         "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
    817             # corresponding name prefixes of the new job.
    818           "a_key": "A String",
    819         },
    820         "id": "A String", # The unique ID of this job.
    821             #
    822             # This field is set by the Cloud Dataflow service when the Job is
    823             # created, and is immutable for the life of the job.
    824         "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
    825           "version": { # A structure describing which components and their versions of the service
    826               # are required in order to run the job.
    827             "a_key": "", # Properties of the object.
    828           },
    829           "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
    830               # storage.  The system will append the suffix "/temp-{JOBNAME} to
    831               # this resource prefix, where {JOBNAME} is the value of the
    832               # job_name field.  The resulting bucket and object prefix is used
    833               # as the prefix of the resources used to store temporary data
    834               # needed during the job execution.  NOTE: This will override the
    835               # value in taskrunner_settings.
    836               # The supported resource type is:
    837               #
    838               # Google Cloud Storage:
    839               #
    840               #   storage.googleapis.com/{bucket}/{object}
    841               #   bucket.storage.googleapis.com/{object}
    842           "internalExperiments": { # Experimental settings.
    843             "a_key": "", # Properties of the object. Contains field @type with type URL.
    844           },
    845           "dataset": "A String", # The dataset for the current project where various workflow
    846               # related tables are stored.
    847               #
    848               # The supported resource type is:
    849               #
    850               # Google BigQuery:
    851               #   bigquery.googleapis.com/{dataset}
    852           "experiments": [ # The list of experiments to enable.
    853             "A String",
    854           ],
    855           "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
    856           "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
    857               # options are passed through the service and are used to recreate the
    858               # SDK pipeline options on the worker in a language agnostic and platform
    859               # independent way.
    860             "a_key": "", # Properties of the object.
    861           },
    862           "userAgent": { # A description of the process that generated the request.
    863             "a_key": "", # Properties of the object.
    864           },
    865           "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
    866               # unspecified, the service will attempt to choose a reasonable
    867               # default.  This should be in the form of the API service name,
    868               # e.g. "compute.googleapis.com".
    869           "workerPools": [ # The worker pools. At least one "harness" worker pool must be
    870               # specified in order for the job to have workers.
    871             { # Describes one particular pool of Cloud Dataflow workers to be
    872                 # instantiated by the Cloud Dataflow service in order to perform the
    873                 # computations required by a job.  Note that a workflow job may use
    874                 # multiple pools, in order to match the various computational
    875                 # requirements of the various stages of the job.
    876               "diskSourceImage": "A String", # Fully qualified source image for disks.
    877               "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
    878                   # using the standard Dataflow task runner.  Users should ignore
    879                   # this field.
    880                 "workflowFileName": "A String", # The file to store the workflow in.
    881                 "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
    882                     # will not be uploaded.
    883                     #
    884                     # The supported resource type is:
    885                     #
    886                     # Google Cloud Storage:
    887                     #   storage.googleapis.com/{bucket}/{object}
    888                     #   bucket.storage.googleapis.com/{object}
    889                 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
    890                 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
    891                   "reportingEnabled": True or False, # Whether to send work progress updates to the service.
    892                   "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
    893                       # "shuffle/v1beta1".
    894                   "workerId": "A String", # The ID of the worker running this pipeline.
    895                   "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
    896                       #
    897                       # When workers access Google Cloud APIs, they logically do so via
    898                       # relative URLs.  If this field is specified, it supplies the base
    899                       # URL to use for resolving these relative URLs.  The normative
    900                       # algorithm used is defined by RFC 1808, "Relative Uniform Resource
    901                       # Locators".
    902                       #
    903                       # If not specified, the default value is "http://www.googleapis.com/"
    904                   "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
    905                       # "dataflow/v1b3/projects".
    906                   "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
    907                       # storage.
    908                       #
    909                       # The supported resource type is:
    910                       #
    911                       # Google Cloud Storage:
    912                       #
    913                       #   storage.googleapis.com/{bucket}/{object}
    914                       #   bucket.storage.googleapis.com/{object}
    915                 },
    916                 "vmId": "A String", # The ID string of the VM.
    917                 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
    918                 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
    919                 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
    920                     # access the Cloud Dataflow API.
    921                   "A String",
    922                 ],
    923                 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
    924                     # taskrunner; e.g. "root".
    925                 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
    926                     #
    927                     # When workers access Google Cloud APIs, they logically do so via
    928                     # relative URLs.  If this field is specified, it supplies the base
    929                     # URL to use for resolving these relative URLs.  The normative
    930                     # algorithm used is defined by RFC 1808, "Relative Uniform Resource
    931                     # Locators".
    932                     #
    933                     # If not specified, the default value is "http://www.googleapis.com/"
    934                 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
    935                     # taskrunner; e.g. "wheel".
    936                 "languageHint": "A String", # The suggested backend language.
    937                 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
    938                     # console.
    939                 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
    940                 "logDir": "A String", # The directory on the VM to store logs.
    941                 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
    942                 "harnessCommand": "A String", # The command to launch the worker harness.
    943                 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
    944                     # temporary storage.
    945                     #
    946                     # The supported resource type is:
    947                     #
    948                     # Google Cloud Storage:
    949                     #   storage.googleapis.com/{bucket}/{object}
    950                     #   bucket.storage.googleapis.com/{object}
    951                 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
    952               },
    953               "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
    954                   # are supported.
    955               "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
    956                   # service will attempt to choose a reasonable default.
    957               "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
    958                   # the service will use the network "default".
    959               "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
    960                   # will attempt to choose a reasonable default.
    961               "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
    962                   # attempt to choose a reasonable default.
    963               "dataDisks": [ # Data disks that are used by a VM in this workflow.
    964                 { # Describes the data disk used by a workflow job.
    965                   "mountPoint": "A String", # Directory in a VM where disk is mounted.
    966                   "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
    967                       # attempt to choose a reasonable default.
    968                   "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
    969                       # must be a disk type appropriate to the project and zone in which
    970                       # the workers will run.  If unknown or unspecified, the service
    971                       # will attempt to choose a reasonable default.
    972                       #
    973                       # For example, the standard persistent disk type is a resource name
    974                       # typically ending in "pd-standard".  If SSD persistent disks are
    975                       # available, the resource name typically ends with "pd-ssd".  The
    976                       # actual valid values are defined the Google Compute Engine API,
    977                       # not by the Cloud Dataflow API; consult the Google Compute Engine
    978                       # documentation for more information about determining the set of
    979                       # available disk types for a particular project and zone.
    980                       #
    981                       # Google Compute Engine Disk types are local to a particular
    982                       # project in a particular zone, and so the resource name will
    983                       # typically look something like this:
    984                       #
    985                       # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
    986                 },
    987               ],
    988               "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
    989                   # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
    990                   # `TEARDOWN_NEVER`.
    991                   # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
    992                   # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
    993                   # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
    994                   # down.
    995                   #
    996                   # If the workers are not torn down by the service, they will
    997                   # continue to run and use Google Compute Engine VM resources in the
    998                   # user's project until they are explicitly terminated by the user.
    999                   # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
   1000                   # policy except for small, manually supervised test jobs.
   1001                   #
   1002                   # If unknown or unspecified, the service will attempt to choose a reasonable
   1003                   # default.
   1004               "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
   1005                   # Compute Engine API.
   1006               "ipConfiguration": "A String", # Configuration for VM IPs.
   1007               "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
   1008                   # service will choose a number of threads (according to the number of cores
   1009                   # on the selected machine type for batch, or 1 by convention for streaming).
   1010               "poolArgs": { # Extra arguments for this worker pool.
   1011                 "a_key": "", # Properties of the object. Contains field @type with type URL.
   1012               },
   1013               "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
   1014                   # execute the job.  If zero or unspecified, the service will
   1015                   # attempt to choose a reasonable default.
   1016               "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
   1017                   # harness, residing in Google Container Registry.
   1018               "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
   1019                   # the form "regions/REGION/subnetworks/SUBNETWORK".
   1020               "packages": [ # Packages to be installed on workers.
   1021                 { # The packages that must be installed in order for a worker to run the
   1022                     # steps of the Cloud Dataflow job that will be assigned to its worker
   1023                     # pool.
   1024                     #
   1025                     # This is the mechanism by which the Cloud Dataflow SDK causes code to
   1026                     # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
   1027                     # might use this to install jars containing the user's code and all of the
   1028                     # various dependencies (libraries, data files, etc.) required in order
   1029                     # for that code to run.
   1030                   "location": "A String", # The resource to read the package from. The supported resource type is:
   1031                       #
   1032                       # Google Cloud Storage:
   1033                       #
   1034                       #   storage.googleapis.com/{bucket}
   1035                       #   bucket.storage.googleapis.com/
   1036                   "name": "A String", # The name of the package.
   1037                 },
   1038               ],
   1039               "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
   1040                 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
   1041                 "algorithm": "A String", # The algorithm to use for autoscaling.
   1042               },
   1043               "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
   1044                   # select a default set of packages which are useful to worker
   1045                   # harnesses written in a particular language.
   1046               "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
   1047                   # attempt to choose a reasonable default.
   1048               "metadata": { # Metadata to set on the Google Compute Engine VMs.
   1049                 "a_key": "A String",
   1050               },
   1051             },
   1052           ],
   1053         },
   1054         "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
   1055             # A description of the user pipeline and stages through which it is executed.
   1056             # Created by Cloud Dataflow service.  Only retrieved with
   1057             # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
   1058             # form.  This data is provided by the Dataflow service for ease of visualizing
   1059             # the pipeline and interpretting Dataflow provided metrics.
   1060           "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
   1061             { # Description of the type, names/ids, and input/outputs for a transform.
   1062               "kind": "A String", # Type of transform.
   1063               "name": "A String", # User provided name for this transform instance.
   1064               "inputCollectionName": [ # User names for all collection inputs to this transform.
   1065                 "A String",
   1066               ],
   1067               "displayData": [ # Transform-specific display data.
   1068                 { # Data provided with a pipeline or transform to provide descriptive info.
   1069                   "shortStrValue": "A String", # A possible additional shorter value to display.
   1070                       # For example a java_class_name_value of com.mypackage.MyDoFn
   1071                       # will be stored with MyDoFn as the short_str_value and
   1072                       # com.mypackage.MyDoFn as the java_class_name value.
   1073                       # short_str_value can be displayed and java_class_name_value
   1074                       # will be displayed as a tooltip.
   1075                   "durationValue": "A String", # Contains value if the data is of duration type.
   1076                   "url": "A String", # An optional full URL.
   1077                   "floatValue": 3.14, # Contains value if the data is of float type.
   1078                   "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   1079                       # language namespace (i.e. python module) which defines the display data.
   1080                       # This allows a dax monitoring system to specially handle the data
   1081                       # and perform custom rendering.
   1082                   "javaClassValue": "A String", # Contains value if the data is of java class type.
   1083                   "label": "A String", # An optional label to display in a dax UI for the element.
   1084                   "boolValue": True or False, # Contains value if the data is of a boolean type.
   1085                   "strValue": "A String", # Contains value if the data is of string type.
   1086                   "key": "A String", # The key identifying the display data.
   1087                       # This is intended to be used as a label for the display data
   1088                       # when viewed in a dax monitoring system.
   1089                   "int64Value": "A String", # Contains value if the data is of int64 type.
   1090                   "timestampValue": "A String", # Contains value if the data is of timestamp type.
   1091                 },
   1092               ],
   1093               "outputCollectionName": [ # User  names for all collection outputs to this transform.
   1094                 "A String",
   1095               ],
   1096               "id": "A String", # SDK generated id of this transform instance.
   1097             },
   1098           ],
   1099           "displayData": [ # Pipeline level display data.
   1100             { # Data provided with a pipeline or transform to provide descriptive info.
   1101               "shortStrValue": "A String", # A possible additional shorter value to display.
   1102                   # For example a java_class_name_value of com.mypackage.MyDoFn
   1103                   # will be stored with MyDoFn as the short_str_value and
   1104                   # com.mypackage.MyDoFn as the java_class_name value.
   1105                   # short_str_value can be displayed and java_class_name_value
   1106                   # will be displayed as a tooltip.
   1107               "durationValue": "A String", # Contains value if the data is of duration type.
   1108               "url": "A String", # An optional full URL.
   1109               "floatValue": 3.14, # Contains value if the data is of float type.
   1110               "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   1111                   # language namespace (i.e. python module) which defines the display data.
   1112                   # This allows a dax monitoring system to specially handle the data
   1113                   # and perform custom rendering.
   1114               "javaClassValue": "A String", # Contains value if the data is of java class type.
   1115               "label": "A String", # An optional label to display in a dax UI for the element.
   1116               "boolValue": True or False, # Contains value if the data is of a boolean type.
   1117               "strValue": "A String", # Contains value if the data is of string type.
   1118               "key": "A String", # The key identifying the display data.
   1119                   # This is intended to be used as a label for the display data
   1120                   # when viewed in a dax monitoring system.
   1121               "int64Value": "A String", # Contains value if the data is of int64 type.
   1122               "timestampValue": "A String", # Contains value if the data is of timestamp type.
   1123             },
   1124           ],
   1125           "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
   1126             { # Description of the composing transforms, names/ids, and input/outputs of a
   1127                 # stage of execution.  Some composing transforms and sources may have been
   1128                 # generated by the Dataflow service during execution planning.
   1129               "componentSource": [ # Collections produced and consumed by component transforms of this stage.
   1130                 { # Description of an interstitial value between transforms in an execution
   1131                     # stage.
   1132                   "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   1133                   "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   1134                       # source is most closely associated.
   1135                   "name": "A String", # Dataflow service generated name for this source.
   1136                 },
   1137               ],
   1138               "kind": "A String", # Type of tranform this stage is executing.
   1139               "name": "A String", # Dataflow service generated name for this stage.
   1140               "outputSource": [ # Output sources for this stage.
   1141                 { # Description of an input or output of an execution stage.
   1142                   "userName": "A String", # Human-readable name for this source; may be user or system generated.
   1143                   "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   1144                       # source is most closely associated.
   1145                   "name": "A String", # Dataflow service generated name for this source.
   1146                   "sizeBytes": "A String", # Size of the source, if measurable.
   1147                 },
   1148               ],
   1149               "inputSource": [ # Input sources for this stage.
   1150                 { # Description of an input or output of an execution stage.
   1151                   "userName": "A String", # Human-readable name for this source; may be user or system generated.
   1152                   "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   1153                       # source is most closely associated.
   1154                   "name": "A String", # Dataflow service generated name for this source.
   1155                   "sizeBytes": "A String", # Size of the source, if measurable.
   1156                 },
   1157               ],
   1158               "componentTransform": [ # Transforms that comprise this execution stage.
   1159                 { # Description of a transform executed as part of an execution stage.
   1160                   "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   1161                   "originalTransform": "A String", # User name for the original user transform with which this transform is
   1162                       # most closely associated.
   1163                   "name": "A String", # Dataflow service generated name for this source.
   1164                 },
   1165               ],
   1166               "id": "A String", # Dataflow service generated id for this stage.
   1167             },
   1168           ],
   1169         },
   1170         "steps": [ # The top-level steps that constitute the entire job.
   1171           { # Defines a particular step within a Cloud Dataflow job.
   1172               #
   1173               # A job consists of multiple steps, each of which performs some
   1174               # specific operation as part of the overall job.  Data is typically
   1175               # passed from one step to another as part of the job.
   1176               #
   1177               # Here's an example of a sequence of steps which together implement a
   1178               # Map-Reduce job:
   1179               #
   1180               #   * Read a collection of data from some source, parsing the
   1181               #     collection's elements.
   1182               #
   1183               #   * Validate the elements.
   1184               #
   1185               #   * Apply a user-defined function to map each element to some value
   1186               #     and extract an element-specific key value.
   1187               #
   1188               #   * Group elements with the same key into a single element with
   1189               #     that key, transforming a multiply-keyed collection into a
   1190               #     uniquely-keyed collection.
   1191               #
   1192               #   * Write the elements out to some data sink.
   1193               #
   1194               # Note that the Cloud Dataflow service may be used to run many different
   1195               # types of jobs, not just Map-Reduce.
   1196             "kind": "A String", # The kind of step in the Cloud Dataflow job.
   1197             "properties": { # Named properties associated with the step. Each kind of
   1198                 # predefined step has its own required set of properties.
   1199                 # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
   1200               "a_key": "", # Properties of the object.
   1201             },
   1202             "name": "A String", # The name that identifies the step. This must be unique for each
   1203                 # step with respect to all other steps in the Cloud Dataflow job.
   1204           },
   1205         ],
   1206         "currentStateTime": "A String", # The timestamp associated with the current state.
   1207         "tempFiles": [ # A set of files the system should be aware of that are used
   1208             # for temporary storage. These temporary files will be
   1209             # removed on job completion.
   1210             # No duplicates are allowed.
   1211             # No file patterns are supported.
   1212             #
   1213             # The supported files are:
   1214             #
   1215             # Google Cloud Storage:
   1216             #
   1217             #    storage.googleapis.com/{bucket}/{object}
   1218             #    bucket.storage.googleapis.com/{object}
   1219           "A String",
   1220         ],
   1221         "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
   1222             # callers cannot mutate it.
   1223           { # A message describing the state of a particular execution stage.
   1224             "executionStageName": "A String", # The name of the execution stage.
   1225             "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
   1226             "currentStateTime": "A String", # The time at which the stage transitioned to this state.
   1227           },
   1228         ],
   1229         "type": "A String", # The type of Cloud Dataflow job.
   1230         "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
   1231             # Cloud Dataflow service.
   1232         "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
   1233             # of the job it replaced.
   1234             #
   1235             # When sending a `CreateJobRequest`, you can update a job by specifying it
   1236             # here. The job named here is stopped, and its intermediate state is
   1237             # transferred to this job.
   1238         "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
   1239             # isn't contained in the submitted job.
   1240           "stages": { # A mapping from each stage to the information about that stage.
   1241             "a_key": { # Contains information about how a particular
   1242                 # google.dataflow.v1beta3.Step will be executed.
   1243               "stepName": [ # The steps associated with the execution stage.
   1244                   # Note that stages may have several steps, and that a given step
   1245                   # might be run by more than one stage.
   1246                 "A String",
   1247               ],
   1248             },
   1249           },
   1250         },
   1251       },
   1252   }
1253
1254 1255