Home | History | Annotate | Download | only in dyn
      1 <html><body>
      2 <style>
      3 
      4 body, h1, h2, h3, div, span, p, pre, a {
      5   margin: 0;
      6   padding: 0;
      7   border: 0;
      8   font-weight: inherit;
      9   font-style: inherit;
     10   font-size: 100%;
     11   font-family: inherit;
     12   vertical-align: baseline;
     13 }
     14 
     15 body {
     16   font-size: 13px;
     17   padding: 1em;
     18 }
     19 
     20 h1 {
     21   font-size: 26px;
     22   margin-bottom: 1em;
     23 }
     24 
     25 h2 {
     26   font-size: 24px;
     27   margin-bottom: 1em;
     28 }
     29 
     30 h3 {
     31   font-size: 20px;
     32   margin-bottom: 1em;
     33   margin-top: 1em;
     34 }
     35 
     36 pre, code {
     37   line-height: 1.5;
     38   font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
     39 }
     40 
     41 pre {
     42   margin-top: 0.5em;
     43 }
     44 
     45 h1, h2, h3, p {
     46   font-family: Arial, sans serif;
     47 }
     48 
     49 h1, h2, h3 {
     50   border-bottom: solid #CCC 1px;
     51 }
     52 
     53 .toc_element {
     54   margin-top: 0.5em;
     55 }
     56 
     57 .firstline {
     58   margin-left: 2 em;
     59 }
     60 
     61 .method  {
     62   margin-top: 1em;
     63   border: solid 1px #CCC;
     64   padding: 1em;
     65   background: #EEE;
     66 }
     67 
     68 .details {
     69   font-weight: bold;
     70   font-size: 14px;
     71 }
     72 
     73 </style>
     74 
     75 <h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.locations.html">locations</a> . <a href="dataflow_v1b3.projects.locations.templates.html">templates</a></h1>
     76 <h2>Instance Methods</h2>
     77 <p class="toc_element">
     78   <code><a href="#create">create(projectId, location, body, x__xgafv=None)</a></code></p>
     79 <p class="firstline">Creates a Cloud Dataflow job from a template.</p>
     80 <p class="toc_element">
     81   <code><a href="#get">get(projectId, location, gcsPath=None, x__xgafv=None, view=None)</a></code></p>
     82 <p class="firstline">Get the template associated with a template.</p>
     83 <p class="toc_element">
     84   <code><a href="#launch">launch(projectId, location, body, gcsPath=None, validateOnly=None, x__xgafv=None)</a></code></p>
     85 <p class="firstline">Launch a template.</p>
     86 <h3>Method Details</h3>
     87 <div class="method">
     88     <code class="details" id="create">create(projectId, location, body, x__xgafv=None)</code>
     89   <pre>Creates a Cloud Dataflow job from a template.
     90 
     91 Args:
     92   projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
     93   location: string, The location to which to direct the request. (required)
     94   body: object, The request body. (required)
     95     The object takes the form of:
     96 
     97 { # A request to create a Cloud Dataflow job from a template.
     98     "environment": { # The environment values to set at runtime. # The runtime environment for the job.
     99       "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
    100           # template if not specified.
    101       "zone": "A String", # The Compute Engine [availability
    102           # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
    103           # for launching worker instances to run your pipeline.
    104       "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
    105           # Use with caution.
    106       "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
    107           # Must be a valid Cloud Storage URL, beginning with `gs://`.
    108       "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
    109       "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
    110           # available to your pipeline during execution, from 1 to 1000.
    111     },
    112     "gcsPath": "A String", # Required. A Cloud Storage path to the template from which to
    113         # create the job.
    114         # Must be a valid Cloud Storage URL, beginning with `gs://`.
    115     "location": "A String", # The location to which to direct the request.
    116     "parameters": { # The runtime parameters to pass to the job.
    117       "a_key": "A String",
    118     },
    119     "jobName": "A String", # Required. The job name to use for the created job.
    120   }
    121 
    122   x__xgafv: string, V1 error format.
    123     Allowed values
    124       1 - v1 error format
    125       2 - v2 error format
    126 
    127 Returns:
    128   An object of the form:
    129 
    130     { # Defines a job to be run by the Cloud Dataflow service.
    131       "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
    132           # If this field is set, the service will ensure its uniqueness.
    133           # The request to create a job will fail if the service has knowledge of a
    134           # previously submitted job with the same client's ID and job name.
    135           # The caller may use this field to ensure idempotence of job
    136           # creation across retried attempts to create a job.
    137           # By default, the field is empty and, in that case, the service ignores it.
    138       "requestedState": "A String", # The job's requested state.
    139           #
    140           # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
    141           # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
    142           # also be used to directly set a job's requested state to
    143           # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
    144           # job if it has not already reached a terminal state.
    145       "name": "A String", # The user-specified Cloud Dataflow job name.
    146           #
    147           # Only one Job with a given name may exist in a project at any
    148           # given time. If a caller attempts to create a Job with the same
    149           # name as an already-existing Job, the attempt returns the
    150           # existing Job.
    151           #
    152           # The name must match the regular expression
    153           # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
    154       "location": "A String", # The location that contains this job.
    155       "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
    156           # `JOB_STATE_UPDATED`), this field contains the ID of that job.
    157       "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
    158       "currentState": "A String", # The current state of the job.
    159           #
    160           # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
    161           # specified.
    162           #
    163           # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
    164           # terminal state. After a job has reached a terminal state, no
    165           # further state updates may be made.
    166           #
    167           # This field may be mutated by the Cloud Dataflow service;
    168           # callers cannot mutate it.
    169       "labels": { # User-defined labels for this job.
    170           #
    171           # The labels map can contain no more than 64 entries.  Entries of the labels
    172           # map are UTF8 strings that comply with the following restrictions:
    173           #
    174           # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
    175           # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
    176           # * Both keys and values are additionally constrained to be <= 128 bytes in
    177           # size.
    178         "a_key": "A String",
    179       },
    180       "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
    181           # corresponding name prefixes of the new job.
    182         "a_key": "A String",
    183       },
    184       "id": "A String", # The unique ID of this job.
    185           #
    186           # This field is set by the Cloud Dataflow service when the Job is
    187           # created, and is immutable for the life of the job.
    188       "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
    189         "version": { # A structure describing which components and their versions of the service
    190             # are required in order to run the job.
    191           "a_key": "", # Properties of the object.
    192         },
    193         "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
    194             # storage.  The system will append the suffix "/temp-{JOBNAME} to
    195             # this resource prefix, where {JOBNAME} is the value of the
    196             # job_name field.  The resulting bucket and object prefix is used
    197             # as the prefix of the resources used to store temporary data
    198             # needed during the job execution.  NOTE: This will override the
    199             # value in taskrunner_settings.
    200             # The supported resource type is:
    201             #
    202             # Google Cloud Storage:
    203             #
    204             #   storage.googleapis.com/{bucket}/{object}
    205             #   bucket.storage.googleapis.com/{object}
    206         "internalExperiments": { # Experimental settings.
    207           "a_key": "", # Properties of the object. Contains field @type with type URL.
    208         },
    209         "dataset": "A String", # The dataset for the current project where various workflow
    210             # related tables are stored.
    211             #
    212             # The supported resource type is:
    213             #
    214             # Google BigQuery:
    215             #   bigquery.googleapis.com/{dataset}
    216         "experiments": [ # The list of experiments to enable.
    217           "A String",
    218         ],
    219         "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
    220         "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
    221             # options are passed through the service and are used to recreate the
    222             # SDK pipeline options on the worker in a language agnostic and platform
    223             # independent way.
    224           "a_key": "", # Properties of the object.
    225         },
    226         "userAgent": { # A description of the process that generated the request.
    227           "a_key": "", # Properties of the object.
    228         },
    229         "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
    230             # unspecified, the service will attempt to choose a reasonable
    231             # default.  This should be in the form of the API service name,
    232             # e.g. "compute.googleapis.com".
    233         "workerPools": [ # The worker pools. At least one "harness" worker pool must be
    234             # specified in order for the job to have workers.
    235           { # Describes one particular pool of Cloud Dataflow workers to be
    236               # instantiated by the Cloud Dataflow service in order to perform the
    237               # computations required by a job.  Note that a workflow job may use
    238               # multiple pools, in order to match the various computational
    239               # requirements of the various stages of the job.
    240             "diskSourceImage": "A String", # Fully qualified source image for disks.
    241             "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
    242                 # using the standard Dataflow task runner.  Users should ignore
    243                 # this field.
    244               "workflowFileName": "A String", # The file to store the workflow in.
    245               "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
    246                   # will not be uploaded.
    247                   #
    248                   # The supported resource type is:
    249                   #
    250                   # Google Cloud Storage:
    251                   #   storage.googleapis.com/{bucket}/{object}
    252                   #   bucket.storage.googleapis.com/{object}
    253               "commandlinesFileName": "A String", # The file to store preprocessing commands in.
    254               "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
    255                 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
    256                 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
    257                     # "shuffle/v1beta1".
    258                 "workerId": "A String", # The ID of the worker running this pipeline.
    259                 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
    260                     #
    261                     # When workers access Google Cloud APIs, they logically do so via
    262                     # relative URLs.  If this field is specified, it supplies the base
    263                     # URL to use for resolving these relative URLs.  The normative
    264                     # algorithm used is defined by RFC 1808, "Relative Uniform Resource
    265                     # Locators".
    266                     #
    267                     # If not specified, the default value is "http://www.googleapis.com/"
    268                 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
    269                     # "dataflow/v1b3/projects".
    270                 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
    271                     # storage.
    272                     #
    273                     # The supported resource type is:
    274                     #
    275                     # Google Cloud Storage:
    276                     #
    277                     #   storage.googleapis.com/{bucket}/{object}
    278                     #   bucket.storage.googleapis.com/{object}
    279               },
    280               "vmId": "A String", # The ID string of the VM.
    281               "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
    282               "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
    283               "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
    284                   # access the Cloud Dataflow API.
    285                 "A String",
    286               ],
    287               "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
    288                   # taskrunner; e.g. "root".
    289               "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
    290                   #
    291                   # When workers access Google Cloud APIs, they logically do so via
    292                   # relative URLs.  If this field is specified, it supplies the base
    293                   # URL to use for resolving these relative URLs.  The normative
    294                   # algorithm used is defined by RFC 1808, "Relative Uniform Resource
    295                   # Locators".
    296                   #
    297                   # If not specified, the default value is "http://www.googleapis.com/"
    298               "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
    299                   # taskrunner; e.g. "wheel".
    300               "languageHint": "A String", # The suggested backend language.
    301               "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
    302                   # console.
    303               "streamingWorkerMainClass": "A String", # The streaming worker main class name.
    304               "logDir": "A String", # The directory on the VM to store logs.
    305               "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
    306               "harnessCommand": "A String", # The command to launch the worker harness.
    307               "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
    308                   # temporary storage.
    309                   #
    310                   # The supported resource type is:
    311                   #
    312                   # Google Cloud Storage:
    313                   #   storage.googleapis.com/{bucket}/{object}
    314                   #   bucket.storage.googleapis.com/{object}
    315               "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
    316             },
    317             "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
    318                 # are supported.
    319             "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
    320                 # service will attempt to choose a reasonable default.
    321             "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
    322                 # the service will use the network "default".
    323             "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
    324                 # will attempt to choose a reasonable default.
    325             "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
    326                 # attempt to choose a reasonable default.
    327             "dataDisks": [ # Data disks that are used by a VM in this workflow.
    328               { # Describes the data disk used by a workflow job.
    329                 "mountPoint": "A String", # Directory in a VM where disk is mounted.
    330                 "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
    331                     # attempt to choose a reasonable default.
    332                 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
    333                     # must be a disk type appropriate to the project and zone in which
    334                     # the workers will run.  If unknown or unspecified, the service
    335                     # will attempt to choose a reasonable default.
    336                     #
    337                     # For example, the standard persistent disk type is a resource name
    338                     # typically ending in "pd-standard".  If SSD persistent disks are
    339                     # available, the resource name typically ends with "pd-ssd".  The
    340                     # actual valid values are defined the Google Compute Engine API,
    341                     # not by the Cloud Dataflow API; consult the Google Compute Engine
    342                     # documentation for more information about determining the set of
    343                     # available disk types for a particular project and zone.
    344                     #
    345                     # Google Compute Engine Disk types are local to a particular
    346                     # project in a particular zone, and so the resource name will
    347                     # typically look something like this:
    348                     #
    349                     # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
    350               },
    351             ],
    352             "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
    353                 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
    354                 # `TEARDOWN_NEVER`.
    355                 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
    356                 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
    357                 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
    358                 # down.
    359                 #
    360                 # If the workers are not torn down by the service, they will
    361                 # continue to run and use Google Compute Engine VM resources in the
    362                 # user's project until they are explicitly terminated by the user.
    363                 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
    364                 # policy except for small, manually supervised test jobs.
    365                 #
    366                 # If unknown or unspecified, the service will attempt to choose a reasonable
    367                 # default.
    368             "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
    369                 # Compute Engine API.
    370             "ipConfiguration": "A String", # Configuration for VM IPs.
    371             "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
    372                 # service will choose a number of threads (according to the number of cores
    373                 # on the selected machine type for batch, or 1 by convention for streaming).
    374             "poolArgs": { # Extra arguments for this worker pool.
    375               "a_key": "", # Properties of the object. Contains field @type with type URL.
    376             },
    377             "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
    378                 # execute the job.  If zero or unspecified, the service will
    379                 # attempt to choose a reasonable default.
    380             "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
    381                 # harness, residing in Google Container Registry.
    382             "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
    383                 # the form "regions/REGION/subnetworks/SUBNETWORK".
    384             "packages": [ # Packages to be installed on workers.
    385               { # The packages that must be installed in order for a worker to run the
    386                   # steps of the Cloud Dataflow job that will be assigned to its worker
    387                   # pool.
    388                   #
    389                   # This is the mechanism by which the Cloud Dataflow SDK causes code to
    390                   # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
    391                   # might use this to install jars containing the user's code and all of the
    392                   # various dependencies (libraries, data files, etc.) required in order
    393                   # for that code to run.
    394                 "location": "A String", # The resource to read the package from. The supported resource type is:
    395                     #
    396                     # Google Cloud Storage:
    397                     #
    398                     #   storage.googleapis.com/{bucket}
    399                     #   bucket.storage.googleapis.com/
    400                 "name": "A String", # The name of the package.
    401               },
    402             ],
    403             "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
    404               "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
    405               "algorithm": "A String", # The algorithm to use for autoscaling.
    406             },
    407             "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
    408                 # select a default set of packages which are useful to worker
    409                 # harnesses written in a particular language.
    410             "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
    411                 # attempt to choose a reasonable default.
    412             "metadata": { # Metadata to set on the Google Compute Engine VMs.
    413               "a_key": "A String",
    414             },
    415           },
    416         ],
    417       },
    418       "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
    419           # A description of the user pipeline and stages through which it is executed.
    420           # Created by Cloud Dataflow service.  Only retrieved with
    421           # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
    422           # form.  This data is provided by the Dataflow service for ease of visualizing
    423           # the pipeline and interpretting Dataflow provided metrics.
    424         "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
    425           { # Description of the type, names/ids, and input/outputs for a transform.
    426             "kind": "A String", # Type of transform.
    427             "name": "A String", # User provided name for this transform instance.
    428             "inputCollectionName": [ # User names for all collection inputs to this transform.
    429               "A String",
    430             ],
    431             "displayData": [ # Transform-specific display data.
    432               { # Data provided with a pipeline or transform to provide descriptive info.
    433                 "shortStrValue": "A String", # A possible additional shorter value to display.
    434                     # For example a java_class_name_value of com.mypackage.MyDoFn
    435                     # will be stored with MyDoFn as the short_str_value and
    436                     # com.mypackage.MyDoFn as the java_class_name value.
    437                     # short_str_value can be displayed and java_class_name_value
    438                     # will be displayed as a tooltip.
    439                 "durationValue": "A String", # Contains value if the data is of duration type.
    440                 "url": "A String", # An optional full URL.
    441                 "floatValue": 3.14, # Contains value if the data is of float type.
    442                 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
    443                     # language namespace (i.e. python module) which defines the display data.
    444                     # This allows a dax monitoring system to specially handle the data
    445                     # and perform custom rendering.
    446                 "javaClassValue": "A String", # Contains value if the data is of java class type.
    447                 "label": "A String", # An optional label to display in a dax UI for the element.
    448                 "boolValue": True or False, # Contains value if the data is of a boolean type.
    449                 "strValue": "A String", # Contains value if the data is of string type.
    450                 "key": "A String", # The key identifying the display data.
    451                     # This is intended to be used as a label for the display data
    452                     # when viewed in a dax monitoring system.
    453                 "int64Value": "A String", # Contains value if the data is of int64 type.
    454                 "timestampValue": "A String", # Contains value if the data is of timestamp type.
    455               },
    456             ],
    457             "outputCollectionName": [ # User  names for all collection outputs to this transform.
    458               "A String",
    459             ],
    460             "id": "A String", # SDK generated id of this transform instance.
    461           },
    462         ],
    463         "displayData": [ # Pipeline level display data.
    464           { # Data provided with a pipeline or transform to provide descriptive info.
    465             "shortStrValue": "A String", # A possible additional shorter value to display.
    466                 # For example a java_class_name_value of com.mypackage.MyDoFn
    467                 # will be stored with MyDoFn as the short_str_value and
    468                 # com.mypackage.MyDoFn as the java_class_name value.
    469                 # short_str_value can be displayed and java_class_name_value
    470                 # will be displayed as a tooltip.
    471             "durationValue": "A String", # Contains value if the data is of duration type.
    472             "url": "A String", # An optional full URL.
    473             "floatValue": 3.14, # Contains value if the data is of float type.
    474             "namespace": "A String", # The namespace for the key. This is usually a class name or programming
    475                 # language namespace (i.e. python module) which defines the display data.
    476                 # This allows a dax monitoring system to specially handle the data
    477                 # and perform custom rendering.
    478             "javaClassValue": "A String", # Contains value if the data is of java class type.
    479             "label": "A String", # An optional label to display in a dax UI for the element.
    480             "boolValue": True or False, # Contains value if the data is of a boolean type.
    481             "strValue": "A String", # Contains value if the data is of string type.
    482             "key": "A String", # The key identifying the display data.
    483                 # This is intended to be used as a label for the display data
    484                 # when viewed in a dax monitoring system.
    485             "int64Value": "A String", # Contains value if the data is of int64 type.
    486             "timestampValue": "A String", # Contains value if the data is of timestamp type.
    487           },
    488         ],
    489         "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
    490           { # Description of the composing transforms, names/ids, and input/outputs of a
    491               # stage of execution.  Some composing transforms and sources may have been
    492               # generated by the Dataflow service during execution planning.
    493             "componentSource": [ # Collections produced and consumed by component transforms of this stage.
    494               { # Description of an interstitial value between transforms in an execution
    495                   # stage.
    496                 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
    497                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
    498                     # source is most closely associated.
    499                 "name": "A String", # Dataflow service generated name for this source.
    500               },
    501             ],
    502             "kind": "A String", # Type of tranform this stage is executing.
    503             "name": "A String", # Dataflow service generated name for this stage.
    504             "outputSource": [ # Output sources for this stage.
    505               { # Description of an input or output of an execution stage.
    506                 "userName": "A String", # Human-readable name for this source; may be user or system generated.
    507                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
    508                     # source is most closely associated.
    509                 "name": "A String", # Dataflow service generated name for this source.
    510                 "sizeBytes": "A String", # Size of the source, if measurable.
    511               },
    512             ],
    513             "inputSource": [ # Input sources for this stage.
    514               { # Description of an input or output of an execution stage.
    515                 "userName": "A String", # Human-readable name for this source; may be user or system generated.
    516                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
    517                     # source is most closely associated.
    518                 "name": "A String", # Dataflow service generated name for this source.
    519                 "sizeBytes": "A String", # Size of the source, if measurable.
    520               },
    521             ],
    522             "componentTransform": [ # Transforms that comprise this execution stage.
    523               { # Description of a transform executed as part of an execution stage.
    524                 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
    525                 "originalTransform": "A String", # User name for the original user transform with which this transform is
    526                     # most closely associated.
    527                 "name": "A String", # Dataflow service generated name for this source.
    528               },
    529             ],
    530             "id": "A String", # Dataflow service generated id for this stage.
    531           },
    532         ],
    533       },
    534       "steps": [ # The top-level steps that constitute the entire job.
    535         { # Defines a particular step within a Cloud Dataflow job.
    536             #
    537             # A job consists of multiple steps, each of which performs some
    538             # specific operation as part of the overall job.  Data is typically
    539             # passed from one step to another as part of the job.
    540             #
    541             # Here's an example of a sequence of steps which together implement a
    542             # Map-Reduce job:
    543             #
    544             #   * Read a collection of data from some source, parsing the
    545             #     collection's elements.
    546             #
    547             #   * Validate the elements.
    548             #
    549             #   * Apply a user-defined function to map each element to some value
    550             #     and extract an element-specific key value.
    551             #
    552             #   * Group elements with the same key into a single element with
    553             #     that key, transforming a multiply-keyed collection into a
    554             #     uniquely-keyed collection.
    555             #
    556             #   * Write the elements out to some data sink.
    557             #
    558             # Note that the Cloud Dataflow service may be used to run many different
    559             # types of jobs, not just Map-Reduce.
    560           "kind": "A String", # The kind of step in the Cloud Dataflow job.
    561           "properties": { # Named properties associated with the step. Each kind of
    562               # predefined step has its own required set of properties.
    563               # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
    564             "a_key": "", # Properties of the object.
    565           },
    566           "name": "A String", # The name that identifies the step. This must be unique for each
    567               # step with respect to all other steps in the Cloud Dataflow job.
    568         },
    569       ],
    570       "currentStateTime": "A String", # The timestamp associated with the current state.
    571       "tempFiles": [ # A set of files the system should be aware of that are used
    572           # for temporary storage. These temporary files will be
    573           # removed on job completion.
    574           # No duplicates are allowed.
    575           # No file patterns are supported.
    576           #
    577           # The supported files are:
    578           #
    579           # Google Cloud Storage:
    580           #
    581           #    storage.googleapis.com/{bucket}/{object}
    582           #    bucket.storage.googleapis.com/{object}
    583         "A String",
    584       ],
    585       "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
    586           # callers cannot mutate it.
    587         { # A message describing the state of a particular execution stage.
    588           "executionStageName": "A String", # The name of the execution stage.
    589           "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
    590           "currentStateTime": "A String", # The time at which the stage transitioned to this state.
    591         },
    592       ],
    593       "type": "A String", # The type of Cloud Dataflow job.
    594       "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
    595           # Cloud Dataflow service.
    596       "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
    597           # of the job it replaced.
    598           #
    599           # When sending a `CreateJobRequest`, you can update a job by specifying it
    600           # here. The job named here is stopped, and its intermediate state is
    601           # transferred to this job.
    602       "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
    603           # isn't contained in the submitted job.
    604         "stages": { # A mapping from each stage to the information about that stage.
    605           "a_key": { # Contains information about how a particular
    606               # google.dataflow.v1beta3.Step will be executed.
    607             "stepName": [ # The steps associated with the execution stage.
    608                 # Note that stages may have several steps, and that a given step
    609                 # might be run by more than one stage.
    610               "A String",
    611             ],
    612           },
    613         },
    614       },
    615     }</pre>
    616 </div>
    617 
    618 <div class="method">
    619     <code class="details" id="get">get(projectId, location, gcsPath=None, x__xgafv=None, view=None)</code>
    620   <pre>Get the template associated with a template.
    621 
    622 Args:
    623   projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
    624   location: string, The location to which to direct the request. (required)
    625   gcsPath: string, Required. A Cloud Storage path to the template from which to
    626 create the job.
    627 Must be a valid Cloud Storage URL, beginning with `gs://`.
    628   x__xgafv: string, V1 error format.
    629     Allowed values
    630       1 - v1 error format
    631       2 - v2 error format
    632   view: string, The view to retrieve. Defaults to METADATA_ONLY.
    633 
    634 Returns:
    635   An object of the form:
    636 
    637     { # The response to a GetTemplate request.
    638     "status": { # The `Status` type defines a logical error model that is suitable for different # The status of the get template request. Any problems with the
    639         # request will be indicated in the error_details.
    640         # programming environments, including REST APIs and RPC APIs. It is used by
    641         # [gRPC](https://github.com/grpc). The error model is designed to be:
    642         #
    643         # - Simple to use and understand for most users
    644         # - Flexible enough to meet unexpected needs
    645         #
    646         # # Overview
    647         #
    648         # The `Status` message contains three pieces of data: error code, error message,
    649         # and error details. The error code should be an enum value of
    650         # google.rpc.Code, but it may accept additional error codes if needed.  The
    651         # error message should be a developer-facing English message that helps
    652         # developers *understand* and *resolve* the error. If a localized user-facing
    653         # error message is needed, put the localized message in the error details or
    654         # localize it in the client. The optional error details may contain arbitrary
    655         # information about the error. There is a predefined set of error detail types
    656         # in the package `google.rpc` that can be used for common error conditions.
    657         #
    658         # # Language mapping
    659         #
    660         # The `Status` message is the logical representation of the error model, but it
    661         # is not necessarily the actual wire format. When the `Status` message is
    662         # exposed in different client libraries and different wire protocols, it can be
    663         # mapped differently. For example, it will likely be mapped to some exceptions
    664         # in Java, but more likely mapped to some error codes in C.
    665         #
    666         # # Other uses
    667         #
    668         # The error model and the `Status` message can be used in a variety of
    669         # environments, either with or without APIs, to provide a
    670         # consistent developer experience across different environments.
    671         #
    672         # Example uses of this error model include:
    673         #
    674         # - Partial errors. If a service needs to return partial errors to the client,
    675         #     it may embed the `Status` in the normal response to indicate the partial
    676         #     errors.
    677         #
    678         # - Workflow errors. A typical workflow has multiple steps. Each step may
    679         #     have a `Status` message for error reporting.
    680         #
    681         # - Batch operations. If a client uses batch request and batch response, the
    682         #     `Status` message should be used directly inside batch response, one for
    683         #     each error sub-response.
    684         #
    685         # - Asynchronous operations. If an API call embeds asynchronous operation
    686         #     results in its response, the status of those operations should be
    687         #     represented directly using the `Status` message.
    688         #
    689         # - Logging. If some API errors are stored in logs, the message `Status` could
    690         #     be used directly after any stripping needed for security/privacy reasons.
    691       "message": "A String", # A developer-facing error message, which should be in English. Any
    692           # user-facing error message should be localized and sent in the
    693           # google.rpc.Status.details field, or localized by the client.
    694       "code": 42, # The status code, which should be an enum value of google.rpc.Code.
    695       "details": [ # A list of messages that carry the error details.  There will be a
    696           # common set of message types for APIs to use.
    697         {
    698           "a_key": "", # Properties of the object. Contains field @type with type URL.
    699         },
    700       ],
    701     },
    702     "metadata": { # Metadata describing a template. # The template metadata describing the template name, available
    703         # parameters, etc.
    704       "name": "A String", # Required. The name of the template.
    705       "parameters": [ # The parameters for the template.
    706         { # Metadata for a specific parameter.
    707           "regexes": [ # Optional. Regexes that the parameter must match.
    708             "A String",
    709           ],
    710           "helpText": "A String", # Required. The help text to display for the parameter.
    711           "name": "A String", # Required. The name of the parameter.
    712           "isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.
    713           "label": "A String", # Required. The label to display for the parameter.
    714         },
    715       ],
    716       "description": "A String", # Optional. A description of the template.
    717     },
    718   }</pre>
    719 </div>
    720 
    721 <div class="method">
    722     <code class="details" id="launch">launch(projectId, location, body, gcsPath=None, validateOnly=None, x__xgafv=None)</code>
    723   <pre>Launch a template.
    724 
    725 Args:
    726   projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
    727   location: string, The location to which to direct the request. (required)
    728   body: object, The request body. (required)
    729     The object takes the form of:
    730 
    731 { # Parameters to provide to the template being launched.
    732     "environment": { # The environment values to set at runtime. # The runtime environment for the job.
    733       "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
    734           # template if not specified.
    735       "zone": "A String", # The Compute Engine [availability
    736           # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
    737           # for launching worker instances to run your pipeline.
    738       "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
    739           # Use with caution.
    740       "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
    741           # Must be a valid Cloud Storage URL, beginning with `gs://`.
    742       "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
    743       "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
    744           # available to your pipeline during execution, from 1 to 1000.
    745     },
    746     "parameters": { # The runtime parameters to pass to the job.
    747       "a_key": "A String",
    748     },
    749     "jobName": "A String", # Required. The job name to use for the created job.
    750   }
    751 
    752   gcsPath: string, Required. A Cloud Storage path to the template from which to create
    753 the job.
    754 Must be valid Cloud Storage URL, beginning with 'gs://'.
    755   validateOnly: boolean, If true, the request is validated but not actually executed.
    756 Defaults to false.
    757   x__xgafv: string, V1 error format.
    758     Allowed values
    759       1 - v1 error format
    760       2 - v2 error format
    761 
    762 Returns:
    763   An object of the form:
    764 
    765     { # Response to the request to launch a template.
    766     "job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and
    767         # the job was successfully launched.
    768         "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
    769             # If this field is set, the service will ensure its uniqueness.
    770             # The request to create a job will fail if the service has knowledge of a
    771             # previously submitted job with the same client's ID and job name.
    772             # The caller may use this field to ensure idempotence of job
    773             # creation across retried attempts to create a job.
    774             # By default, the field is empty and, in that case, the service ignores it.
    775         "requestedState": "A String", # The job's requested state.
    776             #
    777             # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
    778             # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
    779             # also be used to directly set a job's requested state to
    780             # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
    781             # job if it has not already reached a terminal state.
    782         "name": "A String", # The user-specified Cloud Dataflow job name.
    783             #
    784             # Only one Job with a given name may exist in a project at any
    785             # given time. If a caller attempts to create a Job with the same
    786             # name as an already-existing Job, the attempt returns the
    787             # existing Job.
    788             #
    789             # The name must match the regular expression
    790             # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
    791         "location": "A String", # The location that contains this job.
    792         "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
    793             # `JOB_STATE_UPDATED`), this field contains the ID of that job.
    794         "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
    795         "currentState": "A String", # The current state of the job.
    796             #
    797             # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
    798             # specified.
    799             #
    800             # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
    801             # terminal state. After a job has reached a terminal state, no
    802             # further state updates may be made.
    803             #
    804             # This field may be mutated by the Cloud Dataflow service;
    805             # callers cannot mutate it.
    806         "labels": { # User-defined labels for this job.
    807             #
    808             # The labels map can contain no more than 64 entries.  Entries of the labels
    809             # map are UTF8 strings that comply with the following restrictions:
    810             #
    811             # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
    812             # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
    813             # * Both keys and values are additionally constrained to be <= 128 bytes in
    814             # size.
    815           "a_key": "A String",
    816         },
    817         "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
    818             # corresponding name prefixes of the new job.
    819           "a_key": "A String",
    820         },
    821         "id": "A String", # The unique ID of this job.
    822             #
    823             # This field is set by the Cloud Dataflow service when the Job is
    824             # created, and is immutable for the life of the job.
    825         "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
    826           "version": { # A structure describing which components and their versions of the service
    827               # are required in order to run the job.
    828             "a_key": "", # Properties of the object.
    829           },
    830           "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
    831               # storage.  The system will append the suffix "/temp-{JOBNAME} to
    832               # this resource prefix, where {JOBNAME} is the value of the
    833               # job_name field.  The resulting bucket and object prefix is used
    834               # as the prefix of the resources used to store temporary data
    835               # needed during the job execution.  NOTE: This will override the
    836               # value in taskrunner_settings.
    837               # The supported resource type is:
    838               #
    839               # Google Cloud Storage:
    840               #
    841               #   storage.googleapis.com/{bucket}/{object}
    842               #   bucket.storage.googleapis.com/{object}
    843           "internalExperiments": { # Experimental settings.
    844             "a_key": "", # Properties of the object. Contains field @type with type URL.
    845           },
    846           "dataset": "A String", # The dataset for the current project where various workflow
    847               # related tables are stored.
    848               #
    849               # The supported resource type is:
    850               #
    851               # Google BigQuery:
    852               #   bigquery.googleapis.com/{dataset}
    853           "experiments": [ # The list of experiments to enable.
    854             "A String",
    855           ],
    856           "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
    857           "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
    858               # options are passed through the service and are used to recreate the
    859               # SDK pipeline options on the worker in a language agnostic and platform
    860               # independent way.
    861             "a_key": "", # Properties of the object.
    862           },
    863           "userAgent": { # A description of the process that generated the request.
    864             "a_key": "", # Properties of the object.
    865           },
    866           "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
    867               # unspecified, the service will attempt to choose a reasonable
    868               # default.  This should be in the form of the API service name,
    869               # e.g. "compute.googleapis.com".
    870           "workerPools": [ # The worker pools. At least one "harness" worker pool must be
    871               # specified in order for the job to have workers.
    872             { # Describes one particular pool of Cloud Dataflow workers to be
    873                 # instantiated by the Cloud Dataflow service in order to perform the
    874                 # computations required by a job.  Note that a workflow job may use
    875                 # multiple pools, in order to match the various computational
    876                 # requirements of the various stages of the job.
    877               "diskSourceImage": "A String", # Fully qualified source image for disks.
    878               "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
    879                   # using the standard Dataflow task runner.  Users should ignore
    880                   # this field.
    881                 "workflowFileName": "A String", # The file to store the workflow in.
    882                 "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
    883                     # will not be uploaded.
    884                     #
    885                     # The supported resource type is:
    886                     #
    887                     # Google Cloud Storage:
    888                     #   storage.googleapis.com/{bucket}/{object}
    889                     #   bucket.storage.googleapis.com/{object}
    890                 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
    891                 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
    892                   "reportingEnabled": True or False, # Whether to send work progress updates to the service.
    893                   "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
    894                       # "shuffle/v1beta1".
    895                   "workerId": "A String", # The ID of the worker running this pipeline.
    896                   "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
    897                       #
    898                       # When workers access Google Cloud APIs, they logically do so via
    899                       # relative URLs.  If this field is specified, it supplies the base
    900                       # URL to use for resolving these relative URLs.  The normative
    901                       # algorithm used is defined by RFC 1808, "Relative Uniform Resource
    902                       # Locators".
    903                       #
    904                       # If not specified, the default value is "http://www.googleapis.com/"
    905                   "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
    906                       # "dataflow/v1b3/projects".
    907                   "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
    908                       # storage.
    909                       #
    910                       # The supported resource type is:
    911                       #
    912                       # Google Cloud Storage:
    913                       #
    914                       #   storage.googleapis.com/{bucket}/{object}
    915                       #   bucket.storage.googleapis.com/{object}
    916                 },
    917                 "vmId": "A String", # The ID string of the VM.
    918                 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
    919                 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
    920                 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
    921                     # access the Cloud Dataflow API.
    922                   "A String",
    923                 ],
    924                 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
    925                     # taskrunner; e.g. "root".
    926                 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
    927                     #
    928                     # When workers access Google Cloud APIs, they logically do so via
    929                     # relative URLs.  If this field is specified, it supplies the base
    930                     # URL to use for resolving these relative URLs.  The normative
    931                     # algorithm used is defined by RFC 1808, "Relative Uniform Resource
    932                     # Locators".
    933                     #
    934                     # If not specified, the default value is "http://www.googleapis.com/"
    935                 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
    936                     # taskrunner; e.g. "wheel".
    937                 "languageHint": "A String", # The suggested backend language.
    938                 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
    939                     # console.
    940                 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
    941                 "logDir": "A String", # The directory on the VM to store logs.
    942                 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
    943                 "harnessCommand": "A String", # The command to launch the worker harness.
    944                 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
    945                     # temporary storage.
    946                     #
    947                     # The supported resource type is:
    948                     #
    949                     # Google Cloud Storage:
    950                     #   storage.googleapis.com/{bucket}/{object}
    951                     #   bucket.storage.googleapis.com/{object}
    952                 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
    953               },
    954               "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
    955                   # are supported.
    956               "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
    957                   # service will attempt to choose a reasonable default.
    958               "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
    959                   # the service will use the network "default".
    960               "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
    961                   # will attempt to choose a reasonable default.
    962               "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
    963                   # attempt to choose a reasonable default.
    964               "dataDisks": [ # Data disks that are used by a VM in this workflow.
    965                 { # Describes the data disk used by a workflow job.
    966                   "mountPoint": "A String", # Directory in a VM where disk is mounted.
    967                   "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
    968                       # attempt to choose a reasonable default.
    969                   "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
    970                       # must be a disk type appropriate to the project and zone in which
    971                       # the workers will run.  If unknown or unspecified, the service
    972                       # will attempt to choose a reasonable default.
    973                       #
    974                       # For example, the standard persistent disk type is a resource name
    975                       # typically ending in "pd-standard".  If SSD persistent disks are
    976                       # available, the resource name typically ends with "pd-ssd".  The
    977                       # actual valid values are defined the Google Compute Engine API,
    978                       # not by the Cloud Dataflow API; consult the Google Compute Engine
    979                       # documentation for more information about determining the set of
    980                       # available disk types for a particular project and zone.
    981                       #
    982                       # Google Compute Engine Disk types are local to a particular
    983                       # project in a particular zone, and so the resource name will
    984                       # typically look something like this:
    985                       #
    986                       # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
    987                 },
    988               ],
    989               "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
    990                   # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
    991                   # `TEARDOWN_NEVER`.
    992                   # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
    993                   # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
    994                   # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
    995                   # down.
    996                   #
    997                   # If the workers are not torn down by the service, they will
    998                   # continue to run and use Google Compute Engine VM resources in the
    999                   # user's project until they are explicitly terminated by the user.
   1000                   # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
   1001                   # policy except for small, manually supervised test jobs.
   1002                   #
   1003                   # If unknown or unspecified, the service will attempt to choose a reasonable
   1004                   # default.
   1005               "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
   1006                   # Compute Engine API.
   1007               "ipConfiguration": "A String", # Configuration for VM IPs.
   1008               "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
   1009                   # service will choose a number of threads (according to the number of cores
   1010                   # on the selected machine type for batch, or 1 by convention for streaming).
   1011               "poolArgs": { # Extra arguments for this worker pool.
   1012                 "a_key": "", # Properties of the object. Contains field @type with type URL.
   1013               },
   1014               "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
   1015                   # execute the job.  If zero or unspecified, the service will
   1016                   # attempt to choose a reasonable default.
   1017               "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
   1018                   # harness, residing in Google Container Registry.
   1019               "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
   1020                   # the form "regions/REGION/subnetworks/SUBNETWORK".
   1021               "packages": [ # Packages to be installed on workers.
   1022                 { # The packages that must be installed in order for a worker to run the
   1023                     # steps of the Cloud Dataflow job that will be assigned to its worker
   1024                     # pool.
   1025                     #
   1026                     # This is the mechanism by which the Cloud Dataflow SDK causes code to
   1027                     # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
   1028                     # might use this to install jars containing the user's code and all of the
   1029                     # various dependencies (libraries, data files, etc.) required in order
   1030                     # for that code to run.
   1031                   "location": "A String", # The resource to read the package from. The supported resource type is:
   1032                       #
   1033                       # Google Cloud Storage:
   1034                       #
   1035                       #   storage.googleapis.com/{bucket}
   1036                       #   bucket.storage.googleapis.com/
   1037                   "name": "A String", # The name of the package.
   1038                 },
   1039               ],
   1040               "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
   1041                 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
   1042                 "algorithm": "A String", # The algorithm to use for autoscaling.
   1043               },
   1044               "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
   1045                   # select a default set of packages which are useful to worker
   1046                   # harnesses written in a particular language.
   1047               "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
   1048                   # attempt to choose a reasonable default.
   1049               "metadata": { # Metadata to set on the Google Compute Engine VMs.
   1050                 "a_key": "A String",
   1051               },
   1052             },
   1053           ],
   1054         },
   1055         "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
   1056             # A description of the user pipeline and stages through which it is executed.
   1057             # Created by Cloud Dataflow service.  Only retrieved with
   1058             # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
   1059             # form.  This data is provided by the Dataflow service for ease of visualizing
   1060             # the pipeline and interpretting Dataflow provided metrics.
   1061           "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
   1062             { # Description of the type, names/ids, and input/outputs for a transform.
   1063               "kind": "A String", # Type of transform.
   1064               "name": "A String", # User provided name for this transform instance.
   1065               "inputCollectionName": [ # User names for all collection inputs to this transform.
   1066                 "A String",
   1067               ],
   1068               "displayData": [ # Transform-specific display data.
   1069                 { # Data provided with a pipeline or transform to provide descriptive info.
   1070                   "shortStrValue": "A String", # A possible additional shorter value to display.
   1071                       # For example a java_class_name_value of com.mypackage.MyDoFn
   1072                       # will be stored with MyDoFn as the short_str_value and
   1073                       # com.mypackage.MyDoFn as the java_class_name value.
   1074                       # short_str_value can be displayed and java_class_name_value
   1075                       # will be displayed as a tooltip.
   1076                   "durationValue": "A String", # Contains value if the data is of duration type.
   1077                   "url": "A String", # An optional full URL.
   1078                   "floatValue": 3.14, # Contains value if the data is of float type.
   1079                   "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   1080                       # language namespace (i.e. python module) which defines the display data.
   1081                       # This allows a dax monitoring system to specially handle the data
   1082                       # and perform custom rendering.
   1083                   "javaClassValue": "A String", # Contains value if the data is of java class type.
   1084                   "label": "A String", # An optional label to display in a dax UI for the element.
   1085                   "boolValue": True or False, # Contains value if the data is of a boolean type.
   1086                   "strValue": "A String", # Contains value if the data is of string type.
   1087                   "key": "A String", # The key identifying the display data.
   1088                       # This is intended to be used as a label for the display data
   1089                       # when viewed in a dax monitoring system.
   1090                   "int64Value": "A String", # Contains value if the data is of int64 type.
   1091                   "timestampValue": "A String", # Contains value if the data is of timestamp type.
   1092                 },
   1093               ],
   1094               "outputCollectionName": [ # User  names for all collection outputs to this transform.
   1095                 "A String",
   1096               ],
   1097               "id": "A String", # SDK generated id of this transform instance.
   1098             },
   1099           ],
   1100           "displayData": [ # Pipeline level display data.
   1101             { # Data provided with a pipeline or transform to provide descriptive info.
   1102               "shortStrValue": "A String", # A possible additional shorter value to display.
   1103                   # For example a java_class_name_value of com.mypackage.MyDoFn
   1104                   # will be stored with MyDoFn as the short_str_value and
   1105                   # com.mypackage.MyDoFn as the java_class_name value.
   1106                   # short_str_value can be displayed and java_class_name_value
   1107                   # will be displayed as a tooltip.
   1108               "durationValue": "A String", # Contains value if the data is of duration type.
   1109               "url": "A String", # An optional full URL.
   1110               "floatValue": 3.14, # Contains value if the data is of float type.
   1111               "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   1112                   # language namespace (i.e. python module) which defines the display data.
   1113                   # This allows a dax monitoring system to specially handle the data
   1114                   # and perform custom rendering.
   1115               "javaClassValue": "A String", # Contains value if the data is of java class type.
   1116               "label": "A String", # An optional label to display in a dax UI for the element.
   1117               "boolValue": True or False, # Contains value if the data is of a boolean type.
   1118               "strValue": "A String", # Contains value if the data is of string type.
   1119               "key": "A String", # The key identifying the display data.
   1120                   # This is intended to be used as a label for the display data
   1121                   # when viewed in a dax monitoring system.
   1122               "int64Value": "A String", # Contains value if the data is of int64 type.
   1123               "timestampValue": "A String", # Contains value if the data is of timestamp type.
   1124             },
   1125           ],
   1126           "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
   1127             { # Description of the composing transforms, names/ids, and input/outputs of a
   1128                 # stage of execution.  Some composing transforms and sources may have been
   1129                 # generated by the Dataflow service during execution planning.
   1130               "componentSource": [ # Collections produced and consumed by component transforms of this stage.
   1131                 { # Description of an interstitial value between transforms in an execution
   1132                     # stage.
   1133                   "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   1134                   "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   1135                       # source is most closely associated.
   1136                   "name": "A String", # Dataflow service generated name for this source.
   1137                 },
   1138               ],
   1139               "kind": "A String", # Type of tranform this stage is executing.
   1140               "name": "A String", # Dataflow service generated name for this stage.
   1141               "outputSource": [ # Output sources for this stage.
   1142                 { # Description of an input or output of an execution stage.
   1143                   "userName": "A String", # Human-readable name for this source; may be user or system generated.
   1144                   "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   1145                       # source is most closely associated.
   1146                   "name": "A String", # Dataflow service generated name for this source.
   1147                   "sizeBytes": "A String", # Size of the source, if measurable.
   1148                 },
   1149               ],
   1150               "inputSource": [ # Input sources for this stage.
   1151                 { # Description of an input or output of an execution stage.
   1152                   "userName": "A String", # Human-readable name for this source; may be user or system generated.
   1153                   "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   1154                       # source is most closely associated.
   1155                   "name": "A String", # Dataflow service generated name for this source.
   1156                   "sizeBytes": "A String", # Size of the source, if measurable.
   1157                 },
   1158               ],
   1159               "componentTransform": [ # Transforms that comprise this execution stage.
   1160                 { # Description of a transform executed as part of an execution stage.
   1161                   "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   1162                   "originalTransform": "A String", # User name for the original user transform with which this transform is
   1163                       # most closely associated.
   1164                   "name": "A String", # Dataflow service generated name for this source.
   1165                 },
   1166               ],
   1167               "id": "A String", # Dataflow service generated id for this stage.
   1168             },
   1169           ],
   1170         },
   1171         "steps": [ # The top-level steps that constitute the entire job.
   1172           { # Defines a particular step within a Cloud Dataflow job.
   1173               #
   1174               # A job consists of multiple steps, each of which performs some
   1175               # specific operation as part of the overall job.  Data is typically
   1176               # passed from one step to another as part of the job.
   1177               #
   1178               # Here's an example of a sequence of steps which together implement a
   1179               # Map-Reduce job:
   1180               #
   1181               #   * Read a collection of data from some source, parsing the
   1182               #     collection's elements.
   1183               #
   1184               #   * Validate the elements.
   1185               #
   1186               #   * Apply a user-defined function to map each element to some value
   1187               #     and extract an element-specific key value.
   1188               #
   1189               #   * Group elements with the same key into a single element with
   1190               #     that key, transforming a multiply-keyed collection into a
   1191               #     uniquely-keyed collection.
   1192               #
   1193               #   * Write the elements out to some data sink.
   1194               #
   1195               # Note that the Cloud Dataflow service may be used to run many different
   1196               # types of jobs, not just Map-Reduce.
   1197             "kind": "A String", # The kind of step in the Cloud Dataflow job.
   1198             "properties": { # Named properties associated with the step. Each kind of
   1199                 # predefined step has its own required set of properties.
   1200                 # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
   1201               "a_key": "", # Properties of the object.
   1202             },
   1203             "name": "A String", # The name that identifies the step. This must be unique for each
   1204                 # step with respect to all other steps in the Cloud Dataflow job.
   1205           },
   1206         ],
   1207         "currentStateTime": "A String", # The timestamp associated with the current state.
   1208         "tempFiles": [ # A set of files the system should be aware of that are used
   1209             # for temporary storage. These temporary files will be
   1210             # removed on job completion.
   1211             # No duplicates are allowed.
   1212             # No file patterns are supported.
   1213             #
   1214             # The supported files are:
   1215             #
   1216             # Google Cloud Storage:
   1217             #
   1218             #    storage.googleapis.com/{bucket}/{object}
   1219             #    bucket.storage.googleapis.com/{object}
   1220           "A String",
   1221         ],
   1222         "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
   1223             # callers cannot mutate it.
   1224           { # A message describing the state of a particular execution stage.
   1225             "executionStageName": "A String", # The name of the execution stage.
   1226             "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
   1227             "currentStateTime": "A String", # The time at which the stage transitioned to this state.
   1228           },
   1229         ],
   1230         "type": "A String", # The type of Cloud Dataflow job.
   1231         "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
   1232             # Cloud Dataflow service.
   1233         "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
   1234             # of the job it replaced.
   1235             #
   1236             # When sending a `CreateJobRequest`, you can update a job by specifying it
   1237             # here. The job named here is stopped, and its intermediate state is
   1238             # transferred to this job.
   1239         "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
   1240             # isn't contained in the submitted job.
   1241           "stages": { # A mapping from each stage to the information about that stage.
   1242             "a_key": { # Contains information about how a particular
   1243                 # google.dataflow.v1beta3.Step will be executed.
   1244               "stepName": [ # The steps associated with the execution stage.
   1245                   # Note that stages may have several steps, and that a given step
   1246                   # might be run by more than one stage.
   1247                 "A String",
   1248               ],
   1249             },
   1250           },
   1251         },
   1252       },
   1253   }
1254
1255 1256