Home | History | Annotate | Download | only in dyn
      1 <html><body>
      2 <style>
      3 
      4 body, h1, h2, h3, div, span, p, pre, a {
      5   margin: 0;
      6   padding: 0;
      7   border: 0;
      8   font-weight: inherit;
      9   font-style: inherit;
     10   font-size: 100%;
     11   font-family: inherit;
     12   vertical-align: baseline;
     13 }
     14 
     15 body {
     16   font-size: 13px;
     17   padding: 1em;
     18 }
     19 
     20 h1 {
     21   font-size: 26px;
     22   margin-bottom: 1em;
     23 }
     24 
     25 h2 {
     26   font-size: 24px;
     27   margin-bottom: 1em;
     28 }
     29 
     30 h3 {
     31   font-size: 20px;
     32   margin-bottom: 1em;
     33   margin-top: 1em;
     34 }
     35 
     36 pre, code {
     37   line-height: 1.5;
     38   font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
     39 }
     40 
     41 pre {
     42   margin-top: 0.5em;
     43 }
     44 
     45 h1, h2, h3, p {
     46   font-family: Arial, sans serif;
     47 }
     48 
     49 h1, h2, h3 {
     50   border-bottom: solid #CCC 1px;
     51 }
     52 
     53 .toc_element {
     54   margin-top: 0.5em;
     55 }
     56 
     57 .firstline {
     58   margin-left: 2 em;
     59 }
     60 
     61 .method  {
     62   margin-top: 1em;
     63   border: solid 1px #CCC;
     64   padding: 1em;
     65   background: #EEE;
     66 }
     67 
     68 .details {
     69   font-weight: bold;
     70   font-size: 14px;
     71 }
     72 
     73 </style>
     74 
     75 <h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.locations.html">locations</a> . <a href="dataflow_v1b3.projects.locations.jobs.html">jobs</a></h1>
     76 <h2>Instance Methods</h2>
     77 <p class="toc_element">
     78   <code><a href="dataflow_v1b3.projects.locations.jobs.debug.html">debug()</a></code>
     79 </p>
     80 <p class="firstline">Returns the debug Resource.</p>
     81 
     82 <p class="toc_element">
     83   <code><a href="dataflow_v1b3.projects.locations.jobs.messages.html">messages()</a></code>
     84 </p>
     85 <p class="firstline">Returns the messages Resource.</p>
     86 
     87 <p class="toc_element">
     88   <code><a href="dataflow_v1b3.projects.locations.jobs.workItems.html">workItems()</a></code>
     89 </p>
     90 <p class="firstline">Returns the workItems Resource.</p>
     91 
     92 <p class="toc_element">
     93   <code><a href="#create">create(projectId, location, body, x__xgafv=None, replaceJobId=None, view=None)</a></code></p>
     94 <p class="firstline">Creates a Cloud Dataflow job.</p>
     95 <p class="toc_element">
     96   <code><a href="#get">get(projectId, location, jobId, x__xgafv=None, view=None)</a></code></p>
     97 <p class="firstline">Gets the state of the specified Cloud Dataflow job.</p>
     98 <p class="toc_element">
     99   <code><a href="#getMetrics">getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)</a></code></p>
    100 <p class="firstline">Request the job status.</p>
    101 <p class="toc_element">
    102   <code><a href="#list">list(projectId, location, pageSize=None, x__xgafv=None, pageToken=None, filter=None, view=None)</a></code></p>
    103 <p class="firstline">List the jobs of a project.</p>
    104 <p class="toc_element">
    105   <code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>
    106 <p class="firstline">Retrieves the next page of results.</p>
    107 <p class="toc_element">
    108   <code><a href="#update">update(projectId, location, jobId, body, x__xgafv=None)</a></code></p>
    109 <p class="firstline">Updates the state of an existing Cloud Dataflow job.</p>
    110 <h3>Method Details</h3>
    111 <div class="method">
    112     <code class="details" id="create">create(projectId, location, body, x__xgafv=None, replaceJobId=None, view=None)</code>
    113   <pre>Creates a Cloud Dataflow job.
    114 
    115 Args:
    116   projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
    117   location: string, The location that contains this job. (required)
    118   body: object, The request body. (required)
    119     The object takes the form of:
    120 
    121 { # Defines a job to be run by the Cloud Dataflow service.
    122     "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
    123         # If this field is set, the service will ensure its uniqueness.
    124         # The request to create a job will fail if the service has knowledge of a
    125         # previously submitted job with the same client's ID and job name.
    126         # The caller may use this field to ensure idempotence of job
    127         # creation across retried attempts to create a job.
    128         # By default, the field is empty and, in that case, the service ignores it.
    129     "requestedState": "A String", # The job's requested state.
    130         # 
    131         # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
    132         # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
    133         # also be used to directly set a job's requested state to
    134         # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
    135         # job if it has not already reached a terminal state.
    136     "name": "A String", # The user-specified Cloud Dataflow job name.
    137         # 
    138         # Only one Job with a given name may exist in a project at any
    139         # given time. If a caller attempts to create a Job with the same
    140         # name as an already-existing Job, the attempt returns the
    141         # existing Job.
    142         # 
    143         # The name must match the regular expression
    144         # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
    145     "location": "A String", # The location that contains this job.
    146     "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
    147         # `JOB_STATE_UPDATED`), this field contains the ID of that job.
    148     "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
    149     "currentState": "A String", # The current state of the job.
    150         # 
    151         # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
    152         # specified.
    153         # 
    154         # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
    155         # terminal state. After a job has reached a terminal state, no
    156         # further state updates may be made.
    157         # 
    158         # This field may be mutated by the Cloud Dataflow service;
    159         # callers cannot mutate it.
    160     "labels": { # User-defined labels for this job.
    161         # 
    162         # The labels map can contain no more than 64 entries.  Entries of the labels
    163         # map are UTF8 strings that comply with the following restrictions:
    164         # 
    165         # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
    166         # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
    167         # * Both keys and values are additionally constrained to be <= 128 bytes in
    168         # size.
    169       "a_key": "A String",
    170     },
    171     "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
    172         # corresponding name prefixes of the new job.
    173       "a_key": "A String",
    174     },
    175     "id": "A String", # The unique ID of this job.
    176         # 
    177         # This field is set by the Cloud Dataflow service when the Job is
    178         # created, and is immutable for the life of the job.
    179     "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
    180       "version": { # A structure describing which components and their versions of the service
    181           # are required in order to run the job.
    182         "a_key": "", # Properties of the object.
    183       },
    184       "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
    185           # storage.  The system will append the suffix "/temp-{JOBNAME} to
    186           # this resource prefix, where {JOBNAME} is the value of the
    187           # job_name field.  The resulting bucket and object prefix is used
    188           # as the prefix of the resources used to store temporary data
    189           # needed during the job execution.  NOTE: This will override the
    190           # value in taskrunner_settings.
    191           # The supported resource type is:
    192           #
    193           # Google Cloud Storage:
    194           #
    195           #   storage.googleapis.com/{bucket}/{object}
    196           #   bucket.storage.googleapis.com/{object}
    197       "internalExperiments": { # Experimental settings.
    198         "a_key": "", # Properties of the object. Contains field @type with type URL.
    199       },
    200       "dataset": "A String", # The dataset for the current project where various workflow
    201           # related tables are stored.
    202           #
    203           # The supported resource type is:
    204           #
    205           # Google BigQuery:
    206           #   bigquery.googleapis.com/{dataset}
    207       "experiments": [ # The list of experiments to enable.
    208         "A String",
    209       ],
    210       "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
    211       "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
    212           # options are passed through the service and are used to recreate the
    213           # SDK pipeline options on the worker in a language agnostic and platform
    214           # independent way.
    215         "a_key": "", # Properties of the object.
    216       },
    217       "userAgent": { # A description of the process that generated the request.
    218         "a_key": "", # Properties of the object.
    219       },
    220       "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
    221           # unspecified, the service will attempt to choose a reasonable
    222           # default.  This should be in the form of the API service name,
    223           # e.g. "compute.googleapis.com".
    224       "workerPools": [ # The worker pools. At least one "harness" worker pool must be
    225           # specified in order for the job to have workers.
    226         { # Describes one particular pool of Cloud Dataflow workers to be
    227             # instantiated by the Cloud Dataflow service in order to perform the
    228             # computations required by a job.  Note that a workflow job may use
    229             # multiple pools, in order to match the various computational
    230             # requirements of the various stages of the job.
    231           "diskSourceImage": "A String", # Fully qualified source image for disks.
    232           "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
    233               # using the standard Dataflow task runner.  Users should ignore
    234               # this field.
    235             "workflowFileName": "A String", # The file to store the workflow in.
    236             "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
    237                 # will not be uploaded.
    238                 #
    239                 # The supported resource type is:
    240                 #
    241                 # Google Cloud Storage:
    242                 #   storage.googleapis.com/{bucket}/{object}
    243                 #   bucket.storage.googleapis.com/{object}
    244             "commandlinesFileName": "A String", # The file to store preprocessing commands in.
    245             "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
    246               "reportingEnabled": True or False, # Whether to send work progress updates to the service.
    247               "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
    248                   # "shuffle/v1beta1".
    249               "workerId": "A String", # The ID of the worker running this pipeline.
    250               "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
    251                   #
    252                   # When workers access Google Cloud APIs, they logically do so via
    253                   # relative URLs.  If this field is specified, it supplies the base
    254                   # URL to use for resolving these relative URLs.  The normative
    255                   # algorithm used is defined by RFC 1808, "Relative Uniform Resource
    256                   # Locators".
    257                   #
    258                   # If not specified, the default value is "http://www.googleapis.com/"
    259               "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
    260                   # "dataflow/v1b3/projects".
    261               "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
    262                   # storage.
    263                   #
    264                   # The supported resource type is:
    265                   #
    266                   # Google Cloud Storage:
    267                   #
    268                   #   storage.googleapis.com/{bucket}/{object}
    269                   #   bucket.storage.googleapis.com/{object}
    270             },
    271             "vmId": "A String", # The ID string of the VM.
    272             "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
    273             "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
    274             "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
    275                 # access the Cloud Dataflow API.
    276               "A String",
    277             ],
    278             "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
    279                 # taskrunner; e.g. "root".
    280             "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
    281                 #
    282                 # When workers access Google Cloud APIs, they logically do so via
    283                 # relative URLs.  If this field is specified, it supplies the base
    284                 # URL to use for resolving these relative URLs.  The normative
    285                 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
    286                 # Locators".
    287                 #
    288                 # If not specified, the default value is "http://www.googleapis.com/"
    289             "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
    290                 # taskrunner; e.g. "wheel".
    291             "languageHint": "A String", # The suggested backend language.
    292             "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
    293                 # console.
    294             "streamingWorkerMainClass": "A String", # The streaming worker main class name.
    295             "logDir": "A String", # The directory on the VM to store logs.
    296             "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
    297             "harnessCommand": "A String", # The command to launch the worker harness.
    298             "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
    299                 # temporary storage.
    300                 #
    301                 # The supported resource type is:
    302                 #
    303                 # Google Cloud Storage:
    304                 #   storage.googleapis.com/{bucket}/{object}
    305                 #   bucket.storage.googleapis.com/{object}
    306             "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
    307           },
    308           "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
    309               # are supported.
    310           "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
    311               # service will attempt to choose a reasonable default.
    312           "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
    313               # the service will use the network "default".
    314           "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
    315               # will attempt to choose a reasonable default.
    316           "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
    317               # attempt to choose a reasonable default.
    318           "dataDisks": [ # Data disks that are used by a VM in this workflow.
    319             { # Describes the data disk used by a workflow job.
    320               "mountPoint": "A String", # Directory in a VM where disk is mounted.
    321               "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
    322                   # attempt to choose a reasonable default.
    323               "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
    324                   # must be a disk type appropriate to the project and zone in which
    325                   # the workers will run.  If unknown or unspecified, the service
    326                   # will attempt to choose a reasonable default.
    327                   #
    328                   # For example, the standard persistent disk type is a resource name
    329                   # typically ending in "pd-standard".  If SSD persistent disks are
    330                   # available, the resource name typically ends with "pd-ssd".  The
    331                   # actual valid values are defined the Google Compute Engine API,
    332                   # not by the Cloud Dataflow API; consult the Google Compute Engine
    333                   # documentation for more information about determining the set of
    334                   # available disk types for a particular project and zone.
    335                   #
    336                   # Google Compute Engine Disk types are local to a particular
    337                   # project in a particular zone, and so the resource name will
    338                   # typically look something like this:
    339                   #
    340                   # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
    341             },
    342           ],
    343           "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
    344               # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
    345               # `TEARDOWN_NEVER`.
    346               # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
    347               # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
    348               # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
    349               # down.
    350               #
    351               # If the workers are not torn down by the service, they will
    352               # continue to run and use Google Compute Engine VM resources in the
    353               # user's project until they are explicitly terminated by the user.
    354               # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
    355               # policy except for small, manually supervised test jobs.
    356               #
    357               # If unknown or unspecified, the service will attempt to choose a reasonable
    358               # default.
    359           "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
    360               # Compute Engine API.
    361           "ipConfiguration": "A String", # Configuration for VM IPs.
    362           "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
    363               # service will choose a number of threads (according to the number of cores
    364               # on the selected machine type for batch, or 1 by convention for streaming).
    365           "poolArgs": { # Extra arguments for this worker pool.
    366             "a_key": "", # Properties of the object. Contains field @type with type URL.
    367           },
    368           "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
    369               # execute the job.  If zero or unspecified, the service will
    370               # attempt to choose a reasonable default.
    371           "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
    372               # harness, residing in Google Container Registry.
    373           "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
    374               # the form "regions/REGION/subnetworks/SUBNETWORK".
    375           "packages": [ # Packages to be installed on workers.
    376             { # The packages that must be installed in order for a worker to run the
    377                 # steps of the Cloud Dataflow job that will be assigned to its worker
    378                 # pool.
    379                 #
    380                 # This is the mechanism by which the Cloud Dataflow SDK causes code to
    381                 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
    382                 # might use this to install jars containing the user's code and all of the
    383                 # various dependencies (libraries, data files, etc.) required in order
    384                 # for that code to run.
    385               "location": "A String", # The resource to read the package from. The supported resource type is:
    386                   #
    387                   # Google Cloud Storage:
    388                   #
    389                   #   storage.googleapis.com/{bucket}
    390                   #   bucket.storage.googleapis.com/
    391               "name": "A String", # The name of the package.
    392             },
    393           ],
    394           "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
    395             "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
    396             "algorithm": "A String", # The algorithm to use for autoscaling.
    397           },
    398           "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
    399               # select a default set of packages which are useful to worker
    400               # harnesses written in a particular language.
    401           "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
    402               # attempt to choose a reasonable default.
    403           "metadata": { # Metadata to set on the Google Compute Engine VMs.
    404             "a_key": "A String",
    405           },
    406         },
    407       ],
    408     },
    409     "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
    410         # A description of the user pipeline and stages through which it is executed.
    411         # Created by Cloud Dataflow service.  Only retrieved with
    412         # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
    413         # form.  This data is provided by the Dataflow service for ease of visualizing
    414         # the pipeline and interpretting Dataflow provided metrics.
    415       "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
    416         { # Description of the type, names/ids, and input/outputs for a transform.
    417           "kind": "A String", # Type of transform.
    418           "name": "A String", # User provided name for this transform instance.
    419           "inputCollectionName": [ # User names for all collection inputs to this transform.
    420             "A String",
    421           ],
    422           "displayData": [ # Transform-specific display data.
    423             { # Data provided with a pipeline or transform to provide descriptive info.
    424               "shortStrValue": "A String", # A possible additional shorter value to display.
    425                   # For example a java_class_name_value of com.mypackage.MyDoFn
    426                   # will be stored with MyDoFn as the short_str_value and
    427                   # com.mypackage.MyDoFn as the java_class_name value.
    428                   # short_str_value can be displayed and java_class_name_value
    429                   # will be displayed as a tooltip.
    430               "durationValue": "A String", # Contains value if the data is of duration type.
    431               "url": "A String", # An optional full URL.
    432               "floatValue": 3.14, # Contains value if the data is of float type.
    433               "namespace": "A String", # The namespace for the key. This is usually a class name or programming
    434                   # language namespace (i.e. python module) which defines the display data.
    435                   # This allows a dax monitoring system to specially handle the data
    436                   # and perform custom rendering.
    437               "javaClassValue": "A String", # Contains value if the data is of java class type.
    438               "label": "A String", # An optional label to display in a dax UI for the element.
    439               "boolValue": True or False, # Contains value if the data is of a boolean type.
    440               "strValue": "A String", # Contains value if the data is of string type.
    441               "key": "A String", # The key identifying the display data.
    442                   # This is intended to be used as a label for the display data
    443                   # when viewed in a dax monitoring system.
    444               "int64Value": "A String", # Contains value if the data is of int64 type.
    445               "timestampValue": "A String", # Contains value if the data is of timestamp type.
    446             },
    447           ],
    448           "outputCollectionName": [ # User  names for all collection outputs to this transform.
    449             "A String",
    450           ],
    451           "id": "A String", # SDK generated id of this transform instance.
    452         },
    453       ],
    454       "displayData": [ # Pipeline level display data.
    455         { # Data provided with a pipeline or transform to provide descriptive info.
    456           "shortStrValue": "A String", # A possible additional shorter value to display.
    457               # For example a java_class_name_value of com.mypackage.MyDoFn
    458               # will be stored with MyDoFn as the short_str_value and
    459               # com.mypackage.MyDoFn as the java_class_name value.
    460               # short_str_value can be displayed and java_class_name_value
    461               # will be displayed as a tooltip.
    462           "durationValue": "A String", # Contains value if the data is of duration type.
    463           "url": "A String", # An optional full URL.
    464           "floatValue": 3.14, # Contains value if the data is of float type.
    465           "namespace": "A String", # The namespace for the key. This is usually a class name or programming
    466               # language namespace (i.e. python module) which defines the display data.
    467               # This allows a dax monitoring system to specially handle the data
    468               # and perform custom rendering.
    469           "javaClassValue": "A String", # Contains value if the data is of java class type.
    470           "label": "A String", # An optional label to display in a dax UI for the element.
    471           "boolValue": True or False, # Contains value if the data is of a boolean type.
    472           "strValue": "A String", # Contains value if the data is of string type.
    473           "key": "A String", # The key identifying the display data.
    474               # This is intended to be used as a label for the display data
    475               # when viewed in a dax monitoring system.
    476           "int64Value": "A String", # Contains value if the data is of int64 type.
    477           "timestampValue": "A String", # Contains value if the data is of timestamp type.
    478         },
    479       ],
    480       "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
    481         { # Description of the composing transforms, names/ids, and input/outputs of a
    482             # stage of execution.  Some composing transforms and sources may have been
    483             # generated by the Dataflow service during execution planning.
    484           "componentSource": [ # Collections produced and consumed by component transforms of this stage.
    485             { # Description of an interstitial value between transforms in an execution
    486                 # stage.
    487               "userName": "A String", # Human-readable name for this transform; may be user or system generated.
    488               "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
    489                   # source is most closely associated.
    490               "name": "A String", # Dataflow service generated name for this source.
    491             },
    492           ],
    493           "kind": "A String", # Type of tranform this stage is executing.
    494           "name": "A String", # Dataflow service generated name for this stage.
    495           "outputSource": [ # Output sources for this stage.
    496             { # Description of an input or output of an execution stage.
    497               "userName": "A String", # Human-readable name for this source; may be user or system generated.
    498               "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
    499                   # source is most closely associated.
    500               "name": "A String", # Dataflow service generated name for this source.
    501               "sizeBytes": "A String", # Size of the source, if measurable.
    502             },
    503           ],
    504           "inputSource": [ # Input sources for this stage.
    505             { # Description of an input or output of an execution stage.
    506               "userName": "A String", # Human-readable name for this source; may be user or system generated.
    507               "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
    508                   # source is most closely associated.
    509               "name": "A String", # Dataflow service generated name for this source.
    510               "sizeBytes": "A String", # Size of the source, if measurable.
    511             },
    512           ],
    513           "componentTransform": [ # Transforms that comprise this execution stage.
    514             { # Description of a transform executed as part of an execution stage.
    515               "userName": "A String", # Human-readable name for this transform; may be user or system generated.
    516               "originalTransform": "A String", # User name for the original user transform with which this transform is
    517                   # most closely associated.
    518               "name": "A String", # Dataflow service generated name for this source.
    519             },
    520           ],
    521           "id": "A String", # Dataflow service generated id for this stage.
    522         },
    523       ],
    524     },
    525     "steps": [ # The top-level steps that constitute the entire job.
    526       { # Defines a particular step within a Cloud Dataflow job.
    527           #
    528           # A job consists of multiple steps, each of which performs some
    529           # specific operation as part of the overall job.  Data is typically
    530           # passed from one step to another as part of the job.
    531           #
    532           # Here's an example of a sequence of steps which together implement a
    533           # Map-Reduce job:
    534           #
    535           #   * Read a collection of data from some source, parsing the
    536           #     collection's elements.
    537           #
    538           #   * Validate the elements.
    539           #
    540           #   * Apply a user-defined function to map each element to some value
    541           #     and extract an element-specific key value.
    542           #
    543           #   * Group elements with the same key into a single element with
    544           #     that key, transforming a multiply-keyed collection into a
    545           #     uniquely-keyed collection.
    546           #
    547           #   * Write the elements out to some data sink.
    548           #
    549           # Note that the Cloud Dataflow service may be used to run many different
    550           # types of jobs, not just Map-Reduce.
    551         "kind": "A String", # The kind of step in the Cloud Dataflow job.
    552         "properties": { # Named properties associated with the step. Each kind of
    553             # predefined step has its own required set of properties.
    554             # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
    555           "a_key": "", # Properties of the object.
    556         },
    557         "name": "A String", # The name that identifies the step. This must be unique for each
    558             # step with respect to all other steps in the Cloud Dataflow job.
    559       },
    560     ],
    561     "currentStateTime": "A String", # The timestamp associated with the current state.
    562     "tempFiles": [ # A set of files the system should be aware of that are used
    563         # for temporary storage. These temporary files will be
    564         # removed on job completion.
    565         # No duplicates are allowed.
    566         # No file patterns are supported.
    567         # 
    568         # The supported files are:
    569         # 
    570         # Google Cloud Storage:
    571         # 
    572         #    storage.googleapis.com/{bucket}/{object}
    573         #    bucket.storage.googleapis.com/{object}
    574       "A String",
    575     ],
    576     "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
    577         # callers cannot mutate it.
    578       { # A message describing the state of a particular execution stage.
    579         "executionStageName": "A String", # The name of the execution stage.
    580         "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
    581         "currentStateTime": "A String", # The time at which the stage transitioned to this state.
    582       },
    583     ],
    584     "type": "A String", # The type of Cloud Dataflow job.
    585     "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
    586         # Cloud Dataflow service.
    587     "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
    588         # of the job it replaced.
    589         # 
    590         # When sending a `CreateJobRequest`, you can update a job by specifying it
    591         # here. The job named here is stopped, and its intermediate state is
    592         # transferred to this job.
    593     "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
    594         # isn't contained in the submitted job.
    595       "stages": { # A mapping from each stage to the information about that stage.
    596         "a_key": { # Contains information about how a particular
    597             # google.dataflow.v1beta3.Step will be executed.
    598           "stepName": [ # The steps associated with the execution stage.
    599               # Note that stages may have several steps, and that a given step
    600               # might be run by more than one stage.
    601             "A String",
    602           ],
    603         },
    604       },
    605     },
    606   }
    607 
    608   x__xgafv: string, V1 error format.
    609     Allowed values
    610       1 - v1 error format
    611       2 - v2 error format
    612   replaceJobId: string, Deprecated. This field is now in the Job message.
    613   view: string, The level of information requested in response.
    614 
    615 Returns:
    616   An object of the form:
    617 
    618     { # Defines a job to be run by the Cloud Dataflow service.
    619       "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
    620           # If this field is set, the service will ensure its uniqueness.
    621           # The request to create a job will fail if the service has knowledge of a
    622           # previously submitted job with the same client's ID and job name.
    623           # The caller may use this field to ensure idempotence of job
    624           # creation across retried attempts to create a job.
    625           # By default, the field is empty and, in that case, the service ignores it.
    626       "requestedState": "A String", # The job's requested state.
    627           #
    628           # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
    629           # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
    630           # also be used to directly set a job's requested state to
    631           # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
    632           # job if it has not already reached a terminal state.
    633       "name": "A String", # The user-specified Cloud Dataflow job name.
    634           #
    635           # Only one Job with a given name may exist in a project at any
    636           # given time. If a caller attempts to create a Job with the same
    637           # name as an already-existing Job, the attempt returns the
    638           # existing Job.
    639           #
    640           # The name must match the regular expression
    641           # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
    642       "location": "A String", # The location that contains this job.
    643       "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
    644           # `JOB_STATE_UPDATED`), this field contains the ID of that job.
    645       "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
    646       "currentState": "A String", # The current state of the job.
    647           #
    648           # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
    649           # specified.
    650           #
    651           # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
    652           # terminal state. After a job has reached a terminal state, no
    653           # further state updates may be made.
    654           #
    655           # This field may be mutated by the Cloud Dataflow service;
    656           # callers cannot mutate it.
    657       "labels": { # User-defined labels for this job.
    658           #
    659           # The labels map can contain no more than 64 entries.  Entries of the labels
    660           # map are UTF8 strings that comply with the following restrictions:
    661           #
    662           # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
    663           # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
    664           # * Both keys and values are additionally constrained to be <= 128 bytes in
    665           # size.
    666         "a_key": "A String",
    667       },
    668       "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
    669           # corresponding name prefixes of the new job.
    670         "a_key": "A String",
    671       },
    672       "id": "A String", # The unique ID of this job.
    673           #
    674           # This field is set by the Cloud Dataflow service when the Job is
    675           # created, and is immutable for the life of the job.
    676       "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
    677         "version": { # A structure describing which components and their versions of the service
    678             # are required in order to run the job.
    679           "a_key": "", # Properties of the object.
    680         },
    681         "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
    682             # storage.  The system will append the suffix "/temp-{JOBNAME} to
    683             # this resource prefix, where {JOBNAME} is the value of the
    684             # job_name field.  The resulting bucket and object prefix is used
    685             # as the prefix of the resources used to store temporary data
    686             # needed during the job execution.  NOTE: This will override the
    687             # value in taskrunner_settings.
    688             # The supported resource type is:
    689             #
    690             # Google Cloud Storage:
    691             #
    692             #   storage.googleapis.com/{bucket}/{object}
    693             #   bucket.storage.googleapis.com/{object}
    694         "internalExperiments": { # Experimental settings.
    695           "a_key": "", # Properties of the object. Contains field @type with type URL.
    696         },
    697         "dataset": "A String", # The dataset for the current project where various workflow
    698             # related tables are stored.
    699             #
    700             # The supported resource type is:
    701             #
    702             # Google BigQuery:
    703             #   bigquery.googleapis.com/{dataset}
    704         "experiments": [ # The list of experiments to enable.
    705           "A String",
    706         ],
    707         "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
    708         "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
    709             # options are passed through the service and are used to recreate the
    710             # SDK pipeline options on the worker in a language agnostic and platform
    711             # independent way.
    712           "a_key": "", # Properties of the object.
    713         },
    714         "userAgent": { # A description of the process that generated the request.
    715           "a_key": "", # Properties of the object.
    716         },
    717         "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
    718             # unspecified, the service will attempt to choose a reasonable
    719             # default.  This should be in the form of the API service name,
    720             # e.g. "compute.googleapis.com".
    721         "workerPools": [ # The worker pools. At least one "harness" worker pool must be
    722             # specified in order for the job to have workers.
    723           { # Describes one particular pool of Cloud Dataflow workers to be
    724               # instantiated by the Cloud Dataflow service in order to perform the
    725               # computations required by a job.  Note that a workflow job may use
    726               # multiple pools, in order to match the various computational
    727               # requirements of the various stages of the job.
    728             "diskSourceImage": "A String", # Fully qualified source image for disks.
    729             "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
    730                 # using the standard Dataflow task runner.  Users should ignore
    731                 # this field.
    732               "workflowFileName": "A String", # The file to store the workflow in.
    733               "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
    734                   # will not be uploaded.
    735                   #
    736                   # The supported resource type is:
    737                   #
    738                   # Google Cloud Storage:
    739                   #   storage.googleapis.com/{bucket}/{object}
    740                   #   bucket.storage.googleapis.com/{object}
    741               "commandlinesFileName": "A String", # The file to store preprocessing commands in.
    742               "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
    743                 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
    744                 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
    745                     # "shuffle/v1beta1".
    746                 "workerId": "A String", # The ID of the worker running this pipeline.
    747                 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
    748                     #
    749                     # When workers access Google Cloud APIs, they logically do so via
    750                     # relative URLs.  If this field is specified, it supplies the base
    751                     # URL to use for resolving these relative URLs.  The normative
    752                     # algorithm used is defined by RFC 1808, "Relative Uniform Resource
    753                     # Locators".
    754                     #
    755                     # If not specified, the default value is "http://www.googleapis.com/"
    756                 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
    757                     # "dataflow/v1b3/projects".
    758                 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
    759                     # storage.
    760                     #
    761                     # The supported resource type is:
    762                     #
    763                     # Google Cloud Storage:
    764                     #
    765                     #   storage.googleapis.com/{bucket}/{object}
    766                     #   bucket.storage.googleapis.com/{object}
    767               },
    768               "vmId": "A String", # The ID string of the VM.
    769               "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
    770               "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
    771               "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
    772                   # access the Cloud Dataflow API.
    773                 "A String",
    774               ],
    775               "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
    776                   # taskrunner; e.g. "root".
    777               "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
    778                   #
    779                   # When workers access Google Cloud APIs, they logically do so via
    780                   # relative URLs.  If this field is specified, it supplies the base
    781                   # URL to use for resolving these relative URLs.  The normative
    782                   # algorithm used is defined by RFC 1808, "Relative Uniform Resource
    783                   # Locators".
    784                   #
    785                   # If not specified, the default value is "http://www.googleapis.com/"
    786               "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
    787                   # taskrunner; e.g. "wheel".
    788               "languageHint": "A String", # The suggested backend language.
    789               "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
    790                   # console.
    791               "streamingWorkerMainClass": "A String", # The streaming worker main class name.
    792               "logDir": "A String", # The directory on the VM to store logs.
    793               "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
    794               "harnessCommand": "A String", # The command to launch the worker harness.
    795               "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
    796                   # temporary storage.
    797                   #
    798                   # The supported resource type is:
    799                   #
    800                   # Google Cloud Storage:
    801                   #   storage.googleapis.com/{bucket}/{object}
    802                   #   bucket.storage.googleapis.com/{object}
    803               "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
    804             },
    805             "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
    806                 # are supported.
    807             "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
    808                 # service will attempt to choose a reasonable default.
    809             "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
    810                 # the service will use the network "default".
    811             "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
    812                 # will attempt to choose a reasonable default.
    813             "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
    814                 # attempt to choose a reasonable default.
    815             "dataDisks": [ # Data disks that are used by a VM in this workflow.
    816               { # Describes the data disk used by a workflow job.
    817                 "mountPoint": "A String", # Directory in a VM where disk is mounted.
    818                 "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
    819                     # attempt to choose a reasonable default.
    820                 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
    821                     # must be a disk type appropriate to the project and zone in which
    822                     # the workers will run.  If unknown or unspecified, the service
    823                     # will attempt to choose a reasonable default.
    824                     #
    825                     # For example, the standard persistent disk type is a resource name
    826                     # typically ending in "pd-standard".  If SSD persistent disks are
    827                     # available, the resource name typically ends with "pd-ssd".  The
    828                     # actual valid values are defined the Google Compute Engine API,
    829                     # not by the Cloud Dataflow API; consult the Google Compute Engine
    830                     # documentation for more information about determining the set of
    831                     # available disk types for a particular project and zone.
    832                     #
    833                     # Google Compute Engine Disk types are local to a particular
    834                     # project in a particular zone, and so the resource name will
    835                     # typically look something like this:
    836                     #
    837                     # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
    838               },
    839             ],
    840             "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
    841                 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
    842                 # `TEARDOWN_NEVER`.
    843                 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
    844                 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
    845                 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
    846                 # down.
    847                 #
    848                 # If the workers are not torn down by the service, they will
    849                 # continue to run and use Google Compute Engine VM resources in the
    850                 # user's project until they are explicitly terminated by the user.
    851                 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
    852                 # policy except for small, manually supervised test jobs.
    853                 #
    854                 # If unknown or unspecified, the service will attempt to choose a reasonable
    855                 # default.
    856             "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
    857                 # Compute Engine API.
    858             "ipConfiguration": "A String", # Configuration for VM IPs.
    859             "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
    860                 # service will choose a number of threads (according to the number of cores
    861                 # on the selected machine type for batch, or 1 by convention for streaming).
    862             "poolArgs": { # Extra arguments for this worker pool.
    863               "a_key": "", # Properties of the object. Contains field @type with type URL.
    864             },
    865             "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
    866                 # execute the job.  If zero or unspecified, the service will
    867                 # attempt to choose a reasonable default.
    868             "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
    869                 # harness, residing in Google Container Registry.
    870             "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
    871                 # the form "regions/REGION/subnetworks/SUBNETWORK".
    872             "packages": [ # Packages to be installed on workers.
    873               { # The packages that must be installed in order for a worker to run the
    874                   # steps of the Cloud Dataflow job that will be assigned to its worker
    875                   # pool.
    876                   #
    877                   # This is the mechanism by which the Cloud Dataflow SDK causes code to
    878                   # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
    879                   # might use this to install jars containing the user's code and all of the
    880                   # various dependencies (libraries, data files, etc.) required in order
    881                   # for that code to run.
    882                 "location": "A String", # The resource to read the package from. The supported resource type is:
    883                     #
    884                     # Google Cloud Storage:
    885                     #
    886                     #   storage.googleapis.com/{bucket}
    887                     #   bucket.storage.googleapis.com/
    888                 "name": "A String", # The name of the package.
    889               },
    890             ],
    891             "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
    892               "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
    893               "algorithm": "A String", # The algorithm to use for autoscaling.
    894             },
    895             "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
    896                 # select a default set of packages which are useful to worker
    897                 # harnesses written in a particular language.
    898             "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
    899                 # attempt to choose a reasonable default.
    900             "metadata": { # Metadata to set on the Google Compute Engine VMs.
    901               "a_key": "A String",
    902             },
    903           },
    904         ],
    905       },
    906       "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
    907           # A description of the user pipeline and stages through which it is executed.
    908           # Created by Cloud Dataflow service.  Only retrieved with
    909           # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
    910           # form.  This data is provided by the Dataflow service for ease of visualizing
    911           # the pipeline and interpretting Dataflow provided metrics.
    912         "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
    913           { # Description of the type, names/ids, and input/outputs for a transform.
    914             "kind": "A String", # Type of transform.
    915             "name": "A String", # User provided name for this transform instance.
    916             "inputCollectionName": [ # User names for all collection inputs to this transform.
    917               "A String",
    918             ],
    919             "displayData": [ # Transform-specific display data.
    920               { # Data provided with a pipeline or transform to provide descriptive info.
    921                 "shortStrValue": "A String", # A possible additional shorter value to display.
    922                     # For example a java_class_name_value of com.mypackage.MyDoFn
    923                     # will be stored with MyDoFn as the short_str_value and
    924                     # com.mypackage.MyDoFn as the java_class_name value.
    925                     # short_str_value can be displayed and java_class_name_value
    926                     # will be displayed as a tooltip.
    927                 "durationValue": "A String", # Contains value if the data is of duration type.
    928                 "url": "A String", # An optional full URL.
    929                 "floatValue": 3.14, # Contains value if the data is of float type.
    930                 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
    931                     # language namespace (i.e. python module) which defines the display data.
    932                     # This allows a dax monitoring system to specially handle the data
    933                     # and perform custom rendering.
    934                 "javaClassValue": "A String", # Contains value if the data is of java class type.
    935                 "label": "A String", # An optional label to display in a dax UI for the element.
    936                 "boolValue": True or False, # Contains value if the data is of a boolean type.
    937                 "strValue": "A String", # Contains value if the data is of string type.
    938                 "key": "A String", # The key identifying the display data.
    939                     # This is intended to be used as a label for the display data
    940                     # when viewed in a dax monitoring system.
    941                 "int64Value": "A String", # Contains value if the data is of int64 type.
    942                 "timestampValue": "A String", # Contains value if the data is of timestamp type.
    943               },
    944             ],
    945             "outputCollectionName": [ # User  names for all collection outputs to this transform.
    946               "A String",
    947             ],
    948             "id": "A String", # SDK generated id of this transform instance.
    949           },
    950         ],
    951         "displayData": [ # Pipeline level display data.
    952           { # Data provided with a pipeline or transform to provide descriptive info.
    953             "shortStrValue": "A String", # A possible additional shorter value to display.
    954                 # For example a java_class_name_value of com.mypackage.MyDoFn
    955                 # will be stored with MyDoFn as the short_str_value and
    956                 # com.mypackage.MyDoFn as the java_class_name value.
    957                 # short_str_value can be displayed and java_class_name_value
    958                 # will be displayed as a tooltip.
    959             "durationValue": "A String", # Contains value if the data is of duration type.
    960             "url": "A String", # An optional full URL.
    961             "floatValue": 3.14, # Contains value if the data is of float type.
    962             "namespace": "A String", # The namespace for the key. This is usually a class name or programming
    963                 # language namespace (i.e. python module) which defines the display data.
    964                 # This allows a dax monitoring system to specially handle the data
    965                 # and perform custom rendering.
    966             "javaClassValue": "A String", # Contains value if the data is of java class type.
    967             "label": "A String", # An optional label to display in a dax UI for the element.
    968             "boolValue": True or False, # Contains value if the data is of a boolean type.
    969             "strValue": "A String", # Contains value if the data is of string type.
    970             "key": "A String", # The key identifying the display data.
    971                 # This is intended to be used as a label for the display data
    972                 # when viewed in a dax monitoring system.
    973             "int64Value": "A String", # Contains value if the data is of int64 type.
    974             "timestampValue": "A String", # Contains value if the data is of timestamp type.
    975           },
    976         ],
    977         "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
    978           { # Description of the composing transforms, names/ids, and input/outputs of a
    979               # stage of execution.  Some composing transforms and sources may have been
    980               # generated by the Dataflow service during execution planning.
    981             "componentSource": [ # Collections produced and consumed by component transforms of this stage.
    982               { # Description of an interstitial value between transforms in an execution
    983                   # stage.
    984                 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
    985                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
    986                     # source is most closely associated.
    987                 "name": "A String", # Dataflow service generated name for this source.
    988               },
    989             ],
    990             "kind": "A String", # Type of tranform this stage is executing.
    991             "name": "A String", # Dataflow service generated name for this stage.
    992             "outputSource": [ # Output sources for this stage.
    993               { # Description of an input or output of an execution stage.
    994                 "userName": "A String", # Human-readable name for this source; may be user or system generated.
    995                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
    996                     # source is most closely associated.
    997                 "name": "A String", # Dataflow service generated name for this source.
    998                 "sizeBytes": "A String", # Size of the source, if measurable.
    999               },
   1000             ],
   1001             "inputSource": [ # Input sources for this stage.
   1002               { # Description of an input or output of an execution stage.
   1003                 "userName": "A String", # Human-readable name for this source; may be user or system generated.
   1004                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   1005                     # source is most closely associated.
   1006                 "name": "A String", # Dataflow service generated name for this source.
   1007                 "sizeBytes": "A String", # Size of the source, if measurable.
   1008               },
   1009             ],
   1010             "componentTransform": [ # Transforms that comprise this execution stage.
   1011               { # Description of a transform executed as part of an execution stage.
   1012                 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   1013                 "originalTransform": "A String", # User name for the original user transform with which this transform is
   1014                     # most closely associated.
   1015                 "name": "A String", # Dataflow service generated name for this source.
   1016               },
   1017             ],
   1018             "id": "A String", # Dataflow service generated id for this stage.
   1019           },
   1020         ],
   1021       },
   1022       "steps": [ # The top-level steps that constitute the entire job.
   1023         { # Defines a particular step within a Cloud Dataflow job.
   1024             #
   1025             # A job consists of multiple steps, each of which performs some
   1026             # specific operation as part of the overall job.  Data is typically
   1027             # passed from one step to another as part of the job.
   1028             #
   1029             # Here's an example of a sequence of steps which together implement a
   1030             # Map-Reduce job:
   1031             #
   1032             #   * Read a collection of data from some source, parsing the
   1033             #     collection's elements.
   1034             #
   1035             #   * Validate the elements.
   1036             #
   1037             #   * Apply a user-defined function to map each element to some value
   1038             #     and extract an element-specific key value.
   1039             #
   1040             #   * Group elements with the same key into a single element with
   1041             #     that key, transforming a multiply-keyed collection into a
   1042             #     uniquely-keyed collection.
   1043             #
   1044             #   * Write the elements out to some data sink.
   1045             #
   1046             # Note that the Cloud Dataflow service may be used to run many different
   1047             # types of jobs, not just Map-Reduce.
   1048           "kind": "A String", # The kind of step in the Cloud Dataflow job.
   1049           "properties": { # Named properties associated with the step. Each kind of
   1050               # predefined step has its own required set of properties.
   1051               # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
   1052             "a_key": "", # Properties of the object.
   1053           },
   1054           "name": "A String", # The name that identifies the step. This must be unique for each
   1055               # step with respect to all other steps in the Cloud Dataflow job.
   1056         },
   1057       ],
   1058       "currentStateTime": "A String", # The timestamp associated with the current state.
   1059       "tempFiles": [ # A set of files the system should be aware of that are used
   1060           # for temporary storage. These temporary files will be
   1061           # removed on job completion.
   1062           # No duplicates are allowed.
   1063           # No file patterns are supported.
   1064           #
   1065           # The supported files are:
   1066           #
   1067           # Google Cloud Storage:
   1068           #
   1069           #    storage.googleapis.com/{bucket}/{object}
   1070           #    bucket.storage.googleapis.com/{object}
   1071         "A String",
   1072       ],
   1073       "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
   1074           # callers cannot mutate it.
   1075         { # A message describing the state of a particular execution stage.
   1076           "executionStageName": "A String", # The name of the execution stage.
   1077           "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
   1078           "currentStateTime": "A String", # The time at which the stage transitioned to this state.
   1079         },
   1080       ],
   1081       "type": "A String", # The type of Cloud Dataflow job.
   1082       "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
   1083           # Cloud Dataflow service.
   1084       "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
   1085           # of the job it replaced.
   1086           #
   1087           # When sending a `CreateJobRequest`, you can update a job by specifying it
   1088           # here. The job named here is stopped, and its intermediate state is
   1089           # transferred to this job.
   1090       "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
   1091           # isn't contained in the submitted job.
   1092         "stages": { # A mapping from each stage to the information about that stage.
   1093           "a_key": { # Contains information about how a particular
   1094               # google.dataflow.v1beta3.Step will be executed.
   1095             "stepName": [ # The steps associated with the execution stage.
   1096                 # Note that stages may have several steps, and that a given step
   1097                 # might be run by more than one stage.
   1098               "A String",
   1099             ],
   1100           },
   1101         },
   1102       },
   1103     }
1104
1105 1106
1107 get(projectId, location, jobId, x__xgafv=None, view=None) 1108
Gets the state of the specified Cloud Dataflow job.
   1109 
   1110 Args:
   1111   projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
   1112   location: string, The location that contains this job. (required)
   1113   jobId: string, The job ID. (required)
   1114   x__xgafv: string, V1 error format.
   1115     Allowed values
   1116       1 - v1 error format
   1117       2 - v2 error format
   1118   view: string, The level of information requested in response.
   1119 
   1120 Returns:
   1121   An object of the form:
   1122 
   1123     { # Defines a job to be run by the Cloud Dataflow service.
   1124       "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
   1125           # If this field is set, the service will ensure its uniqueness.
   1126           # The request to create a job will fail if the service has knowledge of a
   1127           # previously submitted job with the same client's ID and job name.
   1128           # The caller may use this field to ensure idempotence of job
   1129           # creation across retried attempts to create a job.
   1130           # By default, the field is empty and, in that case, the service ignores it.
   1131       "requestedState": "A String", # The job's requested state.
   1132           #
   1133           # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
   1134           # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
   1135           # also be used to directly set a job's requested state to
   1136           # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
   1137           # job if it has not already reached a terminal state.
   1138       "name": "A String", # The user-specified Cloud Dataflow job name.
   1139           #
   1140           # Only one Job with a given name may exist in a project at any
   1141           # given time. If a caller attempts to create a Job with the same
   1142           # name as an already-existing Job, the attempt returns the
   1143           # existing Job.
   1144           #
   1145           # The name must match the regular expression
   1146           # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
   1147       "location": "A String", # The location that contains this job.
   1148       "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
   1149           # `JOB_STATE_UPDATED`), this field contains the ID of that job.
   1150       "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
   1151       "currentState": "A String", # The current state of the job.
   1152           #
   1153           # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
   1154           # specified.
   1155           #
   1156           # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
   1157           # terminal state. After a job has reached a terminal state, no
   1158           # further state updates may be made.
   1159           #
   1160           # This field may be mutated by the Cloud Dataflow service;
   1161           # callers cannot mutate it.
   1162       "labels": { # User-defined labels for this job.
   1163           #
   1164           # The labels map can contain no more than 64 entries.  Entries of the labels
   1165           # map are UTF8 strings that comply with the following restrictions:
   1166           #
   1167           # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
   1168           # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
   1169           # * Both keys and values are additionally constrained to be <= 128 bytes in
   1170           # size.
   1171         "a_key": "A String",
   1172       },
   1173       "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
   1174           # corresponding name prefixes of the new job.
   1175         "a_key": "A String",
   1176       },
   1177       "id": "A String", # The unique ID of this job.
   1178           #
   1179           # This field is set by the Cloud Dataflow service when the Job is
   1180           # created, and is immutable for the life of the job.
   1181       "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
   1182         "version": { # A structure describing which components and their versions of the service
   1183             # are required in order to run the job.
   1184           "a_key": "", # Properties of the object.
   1185         },
   1186         "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
   1187             # storage.  The system will append the suffix "/temp-{JOBNAME} to
   1188             # this resource prefix, where {JOBNAME} is the value of the
   1189             # job_name field.  The resulting bucket and object prefix is used
   1190             # as the prefix of the resources used to store temporary data
   1191             # needed during the job execution.  NOTE: This will override the
   1192             # value in taskrunner_settings.
   1193             # The supported resource type is:
   1194             #
   1195             # Google Cloud Storage:
   1196             #
   1197             #   storage.googleapis.com/{bucket}/{object}
   1198             #   bucket.storage.googleapis.com/{object}
   1199         "internalExperiments": { # Experimental settings.
   1200           "a_key": "", # Properties of the object. Contains field @type with type URL.
   1201         },
   1202         "dataset": "A String", # The dataset for the current project where various workflow
   1203             # related tables are stored.
   1204             #
   1205             # The supported resource type is:
   1206             #
   1207             # Google BigQuery:
   1208             #   bigquery.googleapis.com/{dataset}
   1209         "experiments": [ # The list of experiments to enable.
   1210           "A String",
   1211         ],
   1212         "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
   1213         "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
   1214             # options are passed through the service and are used to recreate the
   1215             # SDK pipeline options on the worker in a language agnostic and platform
   1216             # independent way.
   1217           "a_key": "", # Properties of the object.
   1218         },
   1219         "userAgent": { # A description of the process that generated the request.
   1220           "a_key": "", # Properties of the object.
   1221         },
   1222         "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
   1223             # unspecified, the service will attempt to choose a reasonable
   1224             # default.  This should be in the form of the API service name,
   1225             # e.g. "compute.googleapis.com".
   1226         "workerPools": [ # The worker pools. At least one "harness" worker pool must be
   1227             # specified in order for the job to have workers.
   1228           { # Describes one particular pool of Cloud Dataflow workers to be
   1229               # instantiated by the Cloud Dataflow service in order to perform the
   1230               # computations required by a job.  Note that a workflow job may use
   1231               # multiple pools, in order to match the various computational
   1232               # requirements of the various stages of the job.
   1233             "diskSourceImage": "A String", # Fully qualified source image for disks.
   1234             "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
   1235                 # using the standard Dataflow task runner.  Users should ignore
   1236                 # this field.
   1237               "workflowFileName": "A String", # The file to store the workflow in.
   1238               "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
   1239                   # will not be uploaded.
   1240                   #
   1241                   # The supported resource type is:
   1242                   #
   1243                   # Google Cloud Storage:
   1244                   #   storage.googleapis.com/{bucket}/{object}
   1245                   #   bucket.storage.googleapis.com/{object}
   1246               "commandlinesFileName": "A String", # The file to store preprocessing commands in.
   1247               "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
   1248                 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
   1249                 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
   1250                     # "shuffle/v1beta1".
   1251                 "workerId": "A String", # The ID of the worker running this pipeline.
   1252                 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
   1253                     #
   1254                     # When workers access Google Cloud APIs, they logically do so via
   1255                     # relative URLs.  If this field is specified, it supplies the base
   1256                     # URL to use for resolving these relative URLs.  The normative
   1257                     # algorithm used is defined by RFC 1808, "Relative Uniform Resource
   1258                     # Locators".
   1259                     #
   1260                     # If not specified, the default value is "http://www.googleapis.com/"
   1261                 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
   1262                     # "dataflow/v1b3/projects".
   1263                 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
   1264                     # storage.
   1265                     #
   1266                     # The supported resource type is:
   1267                     #
   1268                     # Google Cloud Storage:
   1269                     #
   1270                     #   storage.googleapis.com/{bucket}/{object}
   1271                     #   bucket.storage.googleapis.com/{object}
   1272               },
   1273               "vmId": "A String", # The ID string of the VM.
   1274               "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
   1275               "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
   1276               "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
   1277                   # access the Cloud Dataflow API.
   1278                 "A String",
   1279               ],
   1280               "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
   1281                   # taskrunner; e.g. "root".
   1282               "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
   1283                   #
   1284                   # When workers access Google Cloud APIs, they logically do so via
   1285                   # relative URLs.  If this field is specified, it supplies the base
   1286                   # URL to use for resolving these relative URLs.  The normative
   1287                   # algorithm used is defined by RFC 1808, "Relative Uniform Resource
   1288                   # Locators".
   1289                   #
   1290                   # If not specified, the default value is "http://www.googleapis.com/"
   1291               "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
   1292                   # taskrunner; e.g. "wheel".
   1293               "languageHint": "A String", # The suggested backend language.
   1294               "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
   1295                   # console.
   1296               "streamingWorkerMainClass": "A String", # The streaming worker main class name.
   1297               "logDir": "A String", # The directory on the VM to store logs.
   1298               "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
   1299               "harnessCommand": "A String", # The command to launch the worker harness.
   1300               "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
   1301                   # temporary storage.
   1302                   #
   1303                   # The supported resource type is:
   1304                   #
   1305                   # Google Cloud Storage:
   1306                   #   storage.googleapis.com/{bucket}/{object}
   1307                   #   bucket.storage.googleapis.com/{object}
   1308               "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
   1309             },
   1310             "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
   1311                 # are supported.
   1312             "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
   1313                 # service will attempt to choose a reasonable default.
   1314             "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
   1315                 # the service will use the network "default".
   1316             "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
   1317                 # will attempt to choose a reasonable default.
   1318             "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
   1319                 # attempt to choose a reasonable default.
   1320             "dataDisks": [ # Data disks that are used by a VM in this workflow.
   1321               { # Describes the data disk used by a workflow job.
   1322                 "mountPoint": "A String", # Directory in a VM where disk is mounted.
   1323                 "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
   1324                     # attempt to choose a reasonable default.
   1325                 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
   1326                     # must be a disk type appropriate to the project and zone in which
   1327                     # the workers will run.  If unknown or unspecified, the service
   1328                     # will attempt to choose a reasonable default.
   1329                     #
   1330                     # For example, the standard persistent disk type is a resource name
   1331                     # typically ending in "pd-standard".  If SSD persistent disks are
   1332                     # available, the resource name typically ends with "pd-ssd".  The
   1333                     # actual valid values are defined the Google Compute Engine API,
   1334                     # not by the Cloud Dataflow API; consult the Google Compute Engine
   1335                     # documentation for more information about determining the set of
   1336                     # available disk types for a particular project and zone.
   1337                     #
   1338                     # Google Compute Engine Disk types are local to a particular
   1339                     # project in a particular zone, and so the resource name will
   1340                     # typically look something like this:
   1341                     #
   1342                     # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
   1343               },
   1344             ],
   1345             "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
   1346                 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
   1347                 # `TEARDOWN_NEVER`.
   1348                 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
   1349                 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
   1350                 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
   1351                 # down.
   1352                 #
   1353                 # If the workers are not torn down by the service, they will
   1354                 # continue to run and use Google Compute Engine VM resources in the
   1355                 # user's project until they are explicitly terminated by the user.
   1356                 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
   1357                 # policy except for small, manually supervised test jobs.
   1358                 #
   1359                 # If unknown or unspecified, the service will attempt to choose a reasonable
   1360                 # default.
   1361             "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
   1362                 # Compute Engine API.
   1363             "ipConfiguration": "A String", # Configuration for VM IPs.
   1364             "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
   1365                 # service will choose a number of threads (according to the number of cores
   1366                 # on the selected machine type for batch, or 1 by convention for streaming).
   1367             "poolArgs": { # Extra arguments for this worker pool.
   1368               "a_key": "", # Properties of the object. Contains field @type with type URL.
   1369             },
   1370             "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
   1371                 # execute the job.  If zero or unspecified, the service will
   1372                 # attempt to choose a reasonable default.
   1373             "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
   1374                 # harness, residing in Google Container Registry.
   1375             "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
   1376                 # the form "regions/REGION/subnetworks/SUBNETWORK".
   1377             "packages": [ # Packages to be installed on workers.
   1378               { # The packages that must be installed in order for a worker to run the
   1379                   # steps of the Cloud Dataflow job that will be assigned to its worker
   1380                   # pool.
   1381                   #
   1382                   # This is the mechanism by which the Cloud Dataflow SDK causes code to
   1383                   # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
   1384                   # might use this to install jars containing the user's code and all of the
   1385                   # various dependencies (libraries, data files, etc.) required in order
   1386                   # for that code to run.
   1387                 "location": "A String", # The resource to read the package from. The supported resource type is:
   1388                     #
   1389                     # Google Cloud Storage:
   1390                     #
   1391                     #   storage.googleapis.com/{bucket}
   1392                     #   bucket.storage.googleapis.com/
   1393                 "name": "A String", # The name of the package.
   1394               },
   1395             ],
   1396             "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
   1397               "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
   1398               "algorithm": "A String", # The algorithm to use for autoscaling.
   1399             },
   1400             "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
   1401                 # select a default set of packages which are useful to worker
   1402                 # harnesses written in a particular language.
   1403             "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
   1404                 # attempt to choose a reasonable default.
   1405             "metadata": { # Metadata to set on the Google Compute Engine VMs.
   1406               "a_key": "A String",
   1407             },
   1408           },
   1409         ],
   1410       },
   1411       "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
   1412           # A description of the user pipeline and stages through which it is executed.
   1413           # Created by Cloud Dataflow service.  Only retrieved with
   1414           # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
   1415           # form.  This data is provided by the Dataflow service for ease of visualizing
   1416           # the pipeline and interpretting Dataflow provided metrics.
   1417         "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
   1418           { # Description of the type, names/ids, and input/outputs for a transform.
   1419             "kind": "A String", # Type of transform.
   1420             "name": "A String", # User provided name for this transform instance.
   1421             "inputCollectionName": [ # User names for all collection inputs to this transform.
   1422               "A String",
   1423             ],
   1424             "displayData": [ # Transform-specific display data.
   1425               { # Data provided with a pipeline or transform to provide descriptive info.
   1426                 "shortStrValue": "A String", # A possible additional shorter value to display.
   1427                     # For example a java_class_name_value of com.mypackage.MyDoFn
   1428                     # will be stored with MyDoFn as the short_str_value and
   1429                     # com.mypackage.MyDoFn as the java_class_name value.
   1430                     # short_str_value can be displayed and java_class_name_value
   1431                     # will be displayed as a tooltip.
   1432                 "durationValue": "A String", # Contains value if the data is of duration type.
   1433                 "url": "A String", # An optional full URL.
   1434                 "floatValue": 3.14, # Contains value if the data is of float type.
   1435                 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   1436                     # language namespace (i.e. python module) which defines the display data.
   1437                     # This allows a dax monitoring system to specially handle the data
   1438                     # and perform custom rendering.
   1439                 "javaClassValue": "A String", # Contains value if the data is of java class type.
   1440                 "label": "A String", # An optional label to display in a dax UI for the element.
   1441                 "boolValue": True or False, # Contains value if the data is of a boolean type.
   1442                 "strValue": "A String", # Contains value if the data is of string type.
   1443                 "key": "A String", # The key identifying the display data.
   1444                     # This is intended to be used as a label for the display data
   1445                     # when viewed in a dax monitoring system.
   1446                 "int64Value": "A String", # Contains value if the data is of int64 type.
   1447                 "timestampValue": "A String", # Contains value if the data is of timestamp type.
   1448               },
   1449             ],
   1450             "outputCollectionName": [ # User  names for all collection outputs to this transform.
   1451               "A String",
   1452             ],
   1453             "id": "A String", # SDK generated id of this transform instance.
   1454           },
   1455         ],
   1456         "displayData": [ # Pipeline level display data.
   1457           { # Data provided with a pipeline or transform to provide descriptive info.
   1458             "shortStrValue": "A String", # A possible additional shorter value to display.
   1459                 # For example a java_class_name_value of com.mypackage.MyDoFn
   1460                 # will be stored with MyDoFn as the short_str_value and
   1461                 # com.mypackage.MyDoFn as the java_class_name value.
   1462                 # short_str_value can be displayed and java_class_name_value
   1463                 # will be displayed as a tooltip.
   1464             "durationValue": "A String", # Contains value if the data is of duration type.
   1465             "url": "A String", # An optional full URL.
   1466             "floatValue": 3.14, # Contains value if the data is of float type.
   1467             "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   1468                 # language namespace (i.e. python module) which defines the display data.
   1469                 # This allows a dax monitoring system to specially handle the data
   1470                 # and perform custom rendering.
   1471             "javaClassValue": "A String", # Contains value if the data is of java class type.
   1472             "label": "A String", # An optional label to display in a dax UI for the element.
   1473             "boolValue": True or False, # Contains value if the data is of a boolean type.
   1474             "strValue": "A String", # Contains value if the data is of string type.
   1475             "key": "A String", # The key identifying the display data.
   1476                 # This is intended to be used as a label for the display data
   1477                 # when viewed in a dax monitoring system.
   1478             "int64Value": "A String", # Contains value if the data is of int64 type.
   1479             "timestampValue": "A String", # Contains value if the data is of timestamp type.
   1480           },
   1481         ],
   1482         "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
   1483           { # Description of the composing transforms, names/ids, and input/outputs of a
   1484               # stage of execution.  Some composing transforms and sources may have been
   1485               # generated by the Dataflow service during execution planning.
   1486             "componentSource": [ # Collections produced and consumed by component transforms of this stage.
   1487               { # Description of an interstitial value between transforms in an execution
   1488                   # stage.
   1489                 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   1490                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   1491                     # source is most closely associated.
   1492                 "name": "A String", # Dataflow service generated name for this source.
   1493               },
   1494             ],
   1495             "kind": "A String", # Type of tranform this stage is executing.
   1496             "name": "A String", # Dataflow service generated name for this stage.
   1497             "outputSource": [ # Output sources for this stage.
   1498               { # Description of an input or output of an execution stage.
   1499                 "userName": "A String", # Human-readable name for this source; may be user or system generated.
   1500                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   1501                     # source is most closely associated.
   1502                 "name": "A String", # Dataflow service generated name for this source.
   1503                 "sizeBytes": "A String", # Size of the source, if measurable.
   1504               },
   1505             ],
   1506             "inputSource": [ # Input sources for this stage.
   1507               { # Description of an input or output of an execution stage.
   1508                 "userName": "A String", # Human-readable name for this source; may be user or system generated.
   1509                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   1510                     # source is most closely associated.
   1511                 "name": "A String", # Dataflow service generated name for this source.
   1512                 "sizeBytes": "A String", # Size of the source, if measurable.
   1513               },
   1514             ],
   1515             "componentTransform": [ # Transforms that comprise this execution stage.
   1516               { # Description of a transform executed as part of an execution stage.
   1517                 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   1518                 "originalTransform": "A String", # User name for the original user transform with which this transform is
   1519                     # most closely associated.
   1520                 "name": "A String", # Dataflow service generated name for this source.
   1521               },
   1522             ],
   1523             "id": "A String", # Dataflow service generated id for this stage.
   1524           },
   1525         ],
   1526       },
   1527       "steps": [ # The top-level steps that constitute the entire job.
   1528         { # Defines a particular step within a Cloud Dataflow job.
   1529             #
   1530             # A job consists of multiple steps, each of which performs some
   1531             # specific operation as part of the overall job.  Data is typically
   1532             # passed from one step to another as part of the job.
   1533             #
   1534             # Here's an example of a sequence of steps which together implement a
   1535             # Map-Reduce job:
   1536             #
   1537             #   * Read a collection of data from some source, parsing the
   1538             #     collection's elements.
   1539             #
   1540             #   * Validate the elements.
   1541             #
   1542             #   * Apply a user-defined function to map each element to some value
   1543             #     and extract an element-specific key value.
   1544             #
   1545             #   * Group elements with the same key into a single element with
   1546             #     that key, transforming a multiply-keyed collection into a
   1547             #     uniquely-keyed collection.
   1548             #
   1549             #   * Write the elements out to some data sink.
   1550             #
   1551             # Note that the Cloud Dataflow service may be used to run many different
   1552             # types of jobs, not just Map-Reduce.
   1553           "kind": "A String", # The kind of step in the Cloud Dataflow job.
   1554           "properties": { # Named properties associated with the step. Each kind of
   1555               # predefined step has its own required set of properties.
   1556               # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
   1557             "a_key": "", # Properties of the object.
   1558           },
   1559           "name": "A String", # The name that identifies the step. This must be unique for each
   1560               # step with respect to all other steps in the Cloud Dataflow job.
   1561         },
   1562       ],
   1563       "currentStateTime": "A String", # The timestamp associated with the current state.
   1564       "tempFiles": [ # A set of files the system should be aware of that are used
   1565           # for temporary storage. These temporary files will be
   1566           # removed on job completion.
   1567           # No duplicates are allowed.
   1568           # No file patterns are supported.
   1569           #
   1570           # The supported files are:
   1571           #
   1572           # Google Cloud Storage:
   1573           #
   1574           #    storage.googleapis.com/{bucket}/{object}
   1575           #    bucket.storage.googleapis.com/{object}
   1576         "A String",
   1577       ],
   1578       "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
   1579           # callers cannot mutate it.
   1580         { # A message describing the state of a particular execution stage.
   1581           "executionStageName": "A String", # The name of the execution stage.
   1582           "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
   1583           "currentStateTime": "A String", # The time at which the stage transitioned to this state.
   1584         },
   1585       ],
   1586       "type": "A String", # The type of Cloud Dataflow job.
   1587       "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
   1588           # Cloud Dataflow service.
   1589       "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
   1590           # of the job it replaced.
   1591           #
   1592           # When sending a `CreateJobRequest`, you can update a job by specifying it
   1593           # here. The job named here is stopped, and its intermediate state is
   1594           # transferred to this job.
   1595       "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
   1596           # isn't contained in the submitted job.
   1597         "stages": { # A mapping from each stage to the information about that stage.
   1598           "a_key": { # Contains information about how a particular
   1599               # google.dataflow.v1beta3.Step will be executed.
   1600             "stepName": [ # The steps associated with the execution stage.
   1601                 # Note that stages may have several steps, and that a given step
   1602                 # might be run by more than one stage.
   1603               "A String",
   1604             ],
   1605           },
   1606         },
   1607       },
   1608     }</pre>
   1609 </div>
   1610 
   1611 <div class="method">
   1612     <code class="details" id="getMetrics">getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)</code>
   1613   <pre>Request the job status.
   1614 
   1615 Args:
   1616   projectId: string, A project id. (required)
   1617   location: string, The location which contains the job specified by job_id. (required)
   1618   jobId: string, The job to get messages for. (required)
   1619   startTime: string, Return only metric data that has changed since this time.
   1620 Default is to return all information about all metrics for the job.
   1621   x__xgafv: string, V1 error format.
   1622     Allowed values
   1623       1 - v1 error format
   1624       2 - v2 error format
   1625 
   1626 Returns:
   1627   An object of the form:
   1628 
   1629     { # JobMetrics contains a collection of metrics descibing the detailed progress
   1630       # of a Dataflow job. Metrics correspond to user-defined and system-defined
   1631       # metrics in the job.
   1632       #
   1633       # This resource captures only the most recent values of each metric;
   1634       # time-series data can be queried for them (under the same metric names)
   1635       # from Cloud Monitoring.
   1636     "metrics": [ # All metrics for this job.
   1637       { # Describes the state of a metric.
   1638         "meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
   1639             # This holds the count of the aggregated values and is used in combination
   1640             # with mean_sum above to obtain the actual mean aggregate value.
   1641             # The only possible value type is Long.
   1642         "updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are
   1643             # reporting work progress; it will be filled in responses from the
   1644             # metrics API.
   1645         "set": "", # Worker-computed aggregate value for the "Set" aggregation kind.  The only
   1646             # possible value type is a list of Values whose type can be Long, Double,
   1647             # or String, according to the metric's type.  All Values in the list must
   1648             # be of the same type.
   1649         "name": { # Identifies a metric, by describing the source which generated the # Name of the metric.
   1650             # metric.
   1651           "origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;
   1652               # will be "dataflow" for metrics defined by the Dataflow service or SDK.
   1653           "name": "A String", # Worker-defined metric name.
   1654           "context": { # Zero or more labeled fields which identify the part of the job this
   1655               # metric is associated with, such as the name of a step or collection.
   1656               #
   1657               # For example, built-in counters associated with steps will have
   1658               # context['step'] = <step-name>. Counters associated with PCollections
   1659               # in the SDK will have context['pcollection'] = <pcollection-name>.
   1660             "a_key": "A String",
   1661           },
   1662         },
   1663         "cumulative": True or False, # True if this metric is reported as the total cumulative aggregate
   1664             # value accumulated since the worker started working on this WorkItem.
   1665             # By default this is false, indicating that this metric is reported
   1666             # as a delta that is not associated with any WorkItem.
   1667         "kind": "A String", # Metric aggregation kind.  The possible metric aggregation kinds are
   1668             # "Sum", "Max", "Min", "Mean", "Set", "And", "Or", and "Distribution".
   1669             # The specified aggregation kind is case-insensitive.
   1670             #
   1671             # If omitted, this is not an aggregated value but instead
   1672             # a single metric sample value.
   1673         "scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",
   1674             # "And", and "Or".  The possible value types are Long, Double, and Boolean.
   1675         "meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
   1676             # This holds the sum of the aggregated values and is used in combination
   1677             # with mean_count below to obtain the actual mean aggregate value.
   1678             # The only possible value types are Long and Double.
   1679         "distribution": "", # A struct value describing properties of a distribution of numeric values.
   1680         "internal": "", # Worker-computed aggregate value for internal use by the Dataflow
   1681             # service.
   1682       },
   1683     ],
   1684     "metricTime": "A String", # Timestamp as of which metric values are current.
   1685   }</pre>
   1686 </div>
   1687 
   1688 <div class="method">
   1689     <code class="details" id="list">list(projectId, location, pageSize=None, x__xgafv=None, pageToken=None, filter=None, view=None)</code>
   1690   <pre>List the jobs of a project.
   1691 
   1692 Args:
   1693   projectId: string, The project which owns the jobs. (required)
   1694   location: string, The location that contains this job. (required)
   1695   pageSize: integer, If there are many jobs, limit response to at most this many.
   1696 The actual number of jobs returned will be the lesser of max_responses
   1697 and an unspecified server-defined limit.
   1698   x__xgafv: string, V1 error format.
   1699     Allowed values
   1700       1 - v1 error format
   1701       2 - v2 error format
   1702   pageToken: string, Set this to the 'next_page_token' field of a previous response
   1703 to request additional results in a long list.
   1704   filter: string, The kind of filter to use.
   1705   view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.
   1706 
   1707 Returns:
   1708   An object of the form:
   1709 
   1710     { # Response to a request to list Cloud Dataflow jobs.  This may be a partial
   1711       # response, depending on the page size in the ListJobsRequest.
   1712     "nextPageToken": "A String", # Set if there may be more results than fit in this response.
   1713     "failedLocation": [ # Zero or more messages describing locations that failed to respond.
   1714       { # Indicates which location failed to respond to a request for data.
   1715         "name": "A String", # The name of the failed location.
   1716       },
   1717     ],
   1718     "jobs": [ # A subset of the requested job information.
   1719       { # Defines a job to be run by the Cloud Dataflow service.
   1720           "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
   1721               # If this field is set, the service will ensure its uniqueness.
   1722               # The request to create a job will fail if the service has knowledge of a
   1723               # previously submitted job with the same client's ID and job name.
   1724               # The caller may use this field to ensure idempotence of job
   1725               # creation across retried attempts to create a job.
   1726               # By default, the field is empty and, in that case, the service ignores it.
   1727           "requestedState": "A String", # The job's requested state.
   1728               #
   1729               # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
   1730               # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
   1731               # also be used to directly set a job's requested state to
   1732               # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
   1733               # job if it has not already reached a terminal state.
   1734           "name": "A String", # The user-specified Cloud Dataflow job name.
   1735               #
   1736               # Only one Job with a given name may exist in a project at any
   1737               # given time. If a caller attempts to create a Job with the same
   1738               # name as an already-existing Job, the attempt returns the
   1739               # existing Job.
   1740               #
   1741               # The name must match the regular expression
   1742               # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
   1743           "location": "A String", # The location that contains this job.
   1744           "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
   1745               # `JOB_STATE_UPDATED`), this field contains the ID of that job.
   1746           "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
   1747           "currentState": "A String", # The current state of the job.
   1748               #
   1749               # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
   1750               # specified.
   1751               #
   1752               # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
   1753               # terminal state. After a job has reached a terminal state, no
   1754               # further state updates may be made.
   1755               #
   1756               # This field may be mutated by the Cloud Dataflow service;
   1757               # callers cannot mutate it.
   1758           "labels": { # User-defined labels for this job.
   1759               #
   1760               # The labels map can contain no more than 64 entries.  Entries of the labels
   1761               # map are UTF8 strings that comply with the following restrictions:
   1762               #
   1763               # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
   1764               # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
   1765               # * Both keys and values are additionally constrained to be <= 128 bytes in
   1766               # size.
   1767             "a_key": "A String",
   1768           },
   1769           "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
   1770               # corresponding name prefixes of the new job.
   1771             "a_key": "A String",
   1772           },
   1773           "id": "A String", # The unique ID of this job.
   1774               #
   1775               # This field is set by the Cloud Dataflow service when the Job is
   1776               # created, and is immutable for the life of the job.
   1777           "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
   1778             "version": { # A structure describing which components and their versions of the service
   1779                 # are required in order to run the job.
   1780               "a_key": "", # Properties of the object.
   1781             },
   1782             "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
   1783                 # storage.  The system will append the suffix "/temp-{JOBNAME} to
   1784                 # this resource prefix, where {JOBNAME} is the value of the
   1785                 # job_name field.  The resulting bucket and object prefix is used
   1786                 # as the prefix of the resources used to store temporary data
   1787                 # needed during the job execution.  NOTE: This will override the
   1788                 # value in taskrunner_settings.
   1789                 # The supported resource type is:
   1790                 #
   1791                 # Google Cloud Storage:
   1792                 #
   1793                 #   storage.googleapis.com/{bucket}/{object}
   1794                 #   bucket.storage.googleapis.com/{object}
   1795             "internalExperiments": { # Experimental settings.
   1796               "a_key": "", # Properties of the object. Contains field @type with type URL.
   1797             },
   1798             "dataset": "A String", # The dataset for the current project where various workflow
   1799                 # related tables are stored.
   1800                 #
   1801                 # The supported resource type is:
   1802                 #
   1803                 # Google BigQuery:
   1804                 #   bigquery.googleapis.com/{dataset}
   1805             "experiments": [ # The list of experiments to enable.
   1806               "A String",
   1807             ],
   1808             "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
   1809             "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
   1810                 # options are passed through the service and are used to recreate the
   1811                 # SDK pipeline options on the worker in a language agnostic and platform
   1812                 # independent way.
   1813               "a_key": "", # Properties of the object.
   1814             },
   1815             "userAgent": { # A description of the process that generated the request.
   1816               "a_key": "", # Properties of the object.
   1817             },
   1818             "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
   1819                 # unspecified, the service will attempt to choose a reasonable
   1820                 # default.  This should be in the form of the API service name,
   1821                 # e.g. "compute.googleapis.com".
   1822             "workerPools": [ # The worker pools. At least one "harness" worker pool must be
   1823                 # specified in order for the job to have workers.
   1824               { # Describes one particular pool of Cloud Dataflow workers to be
   1825                   # instantiated by the Cloud Dataflow service in order to perform the
   1826                   # computations required by a job.  Note that a workflow job may use
   1827                   # multiple pools, in order to match the various computational
   1828                   # requirements of the various stages of the job.
   1829                 "diskSourceImage": "A String", # Fully qualified source image for disks.
   1830                 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
   1831                     # using the standard Dataflow task runner.  Users should ignore
   1832                     # this field.
   1833                   "workflowFileName": "A String", # The file to store the workflow in.
   1834                   "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
   1835                       # will not be uploaded.
   1836                       #
   1837                       # The supported resource type is:
   1838                       #
   1839                       # Google Cloud Storage:
   1840                       #   storage.googleapis.com/{bucket}/{object}
   1841                       #   bucket.storage.googleapis.com/{object}
   1842                   "commandlinesFileName": "A String", # The file to store preprocessing commands in.
   1843                   "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
   1844                     "reportingEnabled": True or False, # Whether to send work progress updates to the service.
   1845                     "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
   1846                         # "shuffle/v1beta1".
   1847                     "workerId": "A String", # The ID of the worker running this pipeline.
   1848                     "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
   1849                         #
   1850                         # When workers access Google Cloud APIs, they logically do so via
   1851                         # relative URLs.  If this field is specified, it supplies the base
   1852                         # URL to use for resolving these relative URLs.  The normative
   1853                         # algorithm used is defined by RFC 1808, "Relative Uniform Resource
   1854                         # Locators".
   1855                         #
   1856                         # If not specified, the default value is "http://www.googleapis.com/"
   1857                     "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
   1858                         # "dataflow/v1b3/projects".
   1859                     "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
   1860                         # storage.
   1861                         #
   1862                         # The supported resource type is:
   1863                         #
   1864                         # Google Cloud Storage:
   1865                         #
   1866                         #   storage.googleapis.com/{bucket}/{object}
   1867                         #   bucket.storage.googleapis.com/{object}
   1868                   },
   1869                   "vmId": "A String", # The ID string of the VM.
   1870                   "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
   1871                   "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
   1872                   "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
   1873                       # access the Cloud Dataflow API.
   1874                     "A String",
   1875                   ],
   1876                   "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
   1877                       # taskrunner; e.g. "root".
   1878                   "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
   1879                       #
   1880                       # When workers access Google Cloud APIs, they logically do so via
   1881                       # relative URLs.  If this field is specified, it supplies the base
   1882                       # URL to use for resolving these relative URLs.  The normative
   1883                       # algorithm used is defined by RFC 1808, "Relative Uniform Resource
   1884                       # Locators".
   1885                       #
   1886                       # If not specified, the default value is "http://www.googleapis.com/"
   1887                   "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
   1888                       # taskrunner; e.g. "wheel".
   1889                   "languageHint": "A String", # The suggested backend language.
   1890                   "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
   1891                       # console.
   1892                   "streamingWorkerMainClass": "A String", # The streaming worker main class name.
   1893                   "logDir": "A String", # The directory on the VM to store logs.
   1894                   "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
   1895                   "harnessCommand": "A String", # The command to launch the worker harness.
   1896                   "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
   1897                       # temporary storage.
   1898                       #
   1899                       # The supported resource type is:
   1900                       #
   1901                       # Google Cloud Storage:
   1902                       #   storage.googleapis.com/{bucket}/{object}
   1903                       #   bucket.storage.googleapis.com/{object}
   1904                   "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
   1905                 },
   1906                 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
   1907                     # are supported.
   1908                 "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
   1909                     # service will attempt to choose a reasonable default.
   1910                 "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
   1911                     # the service will use the network "default".
   1912                 "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
   1913                     # will attempt to choose a reasonable default.
   1914                 "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
   1915                     # attempt to choose a reasonable default.
   1916                 "dataDisks": [ # Data disks that are used by a VM in this workflow.
   1917                   { # Describes the data disk used by a workflow job.
   1918                     "mountPoint": "A String", # Directory in a VM where disk is mounted.
   1919                     "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
   1920                         # attempt to choose a reasonable default.
   1921                     "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
   1922                         # must be a disk type appropriate to the project and zone in which
   1923                         # the workers will run.  If unknown or unspecified, the service
   1924                         # will attempt to choose a reasonable default.
   1925                         #
   1926                         # For example, the standard persistent disk type is a resource name
   1927                         # typically ending in "pd-standard".  If SSD persistent disks are
   1928                         # available, the resource name typically ends with "pd-ssd".  The
   1929                         # actual valid values are defined the Google Compute Engine API,
   1930                         # not by the Cloud Dataflow API; consult the Google Compute Engine
   1931                         # documentation for more information about determining the set of
   1932                         # available disk types for a particular project and zone.
   1933                         #
   1934                         # Google Compute Engine Disk types are local to a particular
   1935                         # project in a particular zone, and so the resource name will
   1936                         # typically look something like this:
   1937                         #
   1938                         # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
   1939                   },
   1940                 ],
   1941                 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
   1942                     # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
   1943                     # `TEARDOWN_NEVER`.
   1944                     # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
   1945                     # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
   1946                     # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
   1947                     # down.
   1948                     #
   1949                     # If the workers are not torn down by the service, they will
   1950                     # continue to run and use Google Compute Engine VM resources in the
   1951                     # user's project until they are explicitly terminated by the user.
   1952                     # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
   1953                     # policy except for small, manually supervised test jobs.
   1954                     #
   1955                     # If unknown or unspecified, the service will attempt to choose a reasonable
   1956                     # default.
   1957                 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
   1958                     # Compute Engine API.
   1959                 "ipConfiguration": "A String", # Configuration for VM IPs.
   1960                 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
   1961                     # service will choose a number of threads (according to the number of cores
   1962                     # on the selected machine type for batch, or 1 by convention for streaming).
   1963                 "poolArgs": { # Extra arguments for this worker pool.
   1964                   "a_key": "", # Properties of the object. Contains field @type with type URL.
   1965                 },
   1966                 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
   1967                     # execute the job.  If zero or unspecified, the service will
   1968                     # attempt to choose a reasonable default.
   1969                 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
   1970                     # harness, residing in Google Container Registry.
   1971                 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
   1972                     # the form "regions/REGION/subnetworks/SUBNETWORK".
   1973                 "packages": [ # Packages to be installed on workers.
   1974                   { # The packages that must be installed in order for a worker to run the
   1975                       # steps of the Cloud Dataflow job that will be assigned to its worker
   1976                       # pool.
   1977                       #
   1978                       # This is the mechanism by which the Cloud Dataflow SDK causes code to
   1979                       # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
   1980                       # might use this to install jars containing the user's code and all of the
   1981                       # various dependencies (libraries, data files, etc.) required in order
   1982                       # for that code to run.
   1983                     "location": "A String", # The resource to read the package from. The supported resource type is:
   1984                         #
   1985                         # Google Cloud Storage:
   1986                         #
   1987                         #   storage.googleapis.com/{bucket}
   1988                         #   bucket.storage.googleapis.com/
   1989                     "name": "A String", # The name of the package.
   1990                   },
   1991                 ],
   1992                 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
   1993                   "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
   1994                   "algorithm": "A String", # The algorithm to use for autoscaling.
   1995                 },
   1996                 "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
   1997                     # select a default set of packages which are useful to worker
   1998                     # harnesses written in a particular language.
   1999                 "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
   2000                     # attempt to choose a reasonable default.
   2001                 "metadata": { # Metadata to set on the Google Compute Engine VMs.
   2002                   "a_key": "A String",
   2003                 },
   2004               },
   2005             ],
   2006           },
   2007           "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
   2008               # A description of the user pipeline and stages through which it is executed.
   2009               # Created by Cloud Dataflow service.  Only retrieved with
   2010               # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
   2011               # form.  This data is provided by the Dataflow service for ease of visualizing
   2012               # the pipeline and interpretting Dataflow provided metrics.
   2013             "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
   2014               { # Description of the type, names/ids, and input/outputs for a transform.
   2015                 "kind": "A String", # Type of transform.
   2016                 "name": "A String", # User provided name for this transform instance.
   2017                 "inputCollectionName": [ # User names for all collection inputs to this transform.
   2018                   "A String",
   2019                 ],
   2020                 "displayData": [ # Transform-specific display data.
   2021                   { # Data provided with a pipeline or transform to provide descriptive info.
   2022                     "shortStrValue": "A String", # A possible additional shorter value to display.
   2023                         # For example a java_class_name_value of com.mypackage.MyDoFn
   2024                         # will be stored with MyDoFn as the short_str_value and
   2025                         # com.mypackage.MyDoFn as the java_class_name value.
   2026                         # short_str_value can be displayed and java_class_name_value
   2027                         # will be displayed as a tooltip.
   2028                     "durationValue": "A String", # Contains value if the data is of duration type.
   2029                     "url": "A String", # An optional full URL.
   2030                     "floatValue": 3.14, # Contains value if the data is of float type.
   2031                     "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   2032                         # language namespace (i.e. python module) which defines the display data.
   2033                         # This allows a dax monitoring system to specially handle the data
   2034                         # and perform custom rendering.
   2035                     "javaClassValue": "A String", # Contains value if the data is of java class type.
   2036                     "label": "A String", # An optional label to display in a dax UI for the element.
   2037                     "boolValue": True or False, # Contains value if the data is of a boolean type.
   2038                     "strValue": "A String", # Contains value if the data is of string type.
   2039                     "key": "A String", # The key identifying the display data.
   2040                         # This is intended to be used as a label for the display data
   2041                         # when viewed in a dax monitoring system.
   2042                     "int64Value": "A String", # Contains value if the data is of int64 type.
   2043                     "timestampValue": "A String", # Contains value if the data is of timestamp type.
   2044                   },
   2045                 ],
   2046                 "outputCollectionName": [ # User  names for all collection outputs to this transform.
   2047                   "A String",
   2048                 ],
   2049                 "id": "A String", # SDK generated id of this transform instance.
   2050               },
   2051             ],
   2052             "displayData": [ # Pipeline level display data.
   2053               { # Data provided with a pipeline or transform to provide descriptive info.
   2054                 "shortStrValue": "A String", # A possible additional shorter value to display.
   2055                     # For example a java_class_name_value of com.mypackage.MyDoFn
   2056                     # will be stored with MyDoFn as the short_str_value and
   2057                     # com.mypackage.MyDoFn as the java_class_name value.
   2058                     # short_str_value can be displayed and java_class_name_value
   2059                     # will be displayed as a tooltip.
   2060                 "durationValue": "A String", # Contains value if the data is of duration type.
   2061                 "url": "A String", # An optional full URL.
   2062                 "floatValue": 3.14, # Contains value if the data is of float type.
   2063                 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   2064                     # language namespace (i.e. python module) which defines the display data.
   2065                     # This allows a dax monitoring system to specially handle the data
   2066                     # and perform custom rendering.
   2067                 "javaClassValue": "A String", # Contains value if the data is of java class type.
   2068                 "label": "A String", # An optional label to display in a dax UI for the element.
   2069                 "boolValue": True or False, # Contains value if the data is of a boolean type.
   2070                 "strValue": "A String", # Contains value if the data is of string type.
   2071                 "key": "A String", # The key identifying the display data.
   2072                     # This is intended to be used as a label for the display data
   2073                     # when viewed in a dax monitoring system.
   2074                 "int64Value": "A String", # Contains value if the data is of int64 type.
   2075                 "timestampValue": "A String", # Contains value if the data is of timestamp type.
   2076               },
   2077             ],
   2078             "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
   2079               { # Description of the composing transforms, names/ids, and input/outputs of a
   2080                   # stage of execution.  Some composing transforms and sources may have been
   2081                   # generated by the Dataflow service during execution planning.
   2082                 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
   2083                   { # Description of an interstitial value between transforms in an execution
   2084                       # stage.
   2085                     "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   2086                     "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   2087                         # source is most closely associated.
   2088                     "name": "A String", # Dataflow service generated name for this source.
   2089                   },
   2090                 ],
   2091                 "kind": "A String", # Type of tranform this stage is executing.
   2092                 "name": "A String", # Dataflow service generated name for this stage.
   2093                 "outputSource": [ # Output sources for this stage.
   2094                   { # Description of an input or output of an execution stage.
   2095                     "userName": "A String", # Human-readable name for this source; may be user or system generated.
   2096                     "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   2097                         # source is most closely associated.
   2098                     "name": "A String", # Dataflow service generated name for this source.
   2099                     "sizeBytes": "A String", # Size of the source, if measurable.
   2100                   },
   2101                 ],
   2102                 "inputSource": [ # Input sources for this stage.
   2103                   { # Description of an input or output of an execution stage.
   2104                     "userName": "A String", # Human-readable name for this source; may be user or system generated.
   2105                     "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   2106                         # source is most closely associated.
   2107                     "name": "A String", # Dataflow service generated name for this source.
   2108                     "sizeBytes": "A String", # Size of the source, if measurable.
   2109                   },
   2110                 ],
   2111                 "componentTransform": [ # Transforms that comprise this execution stage.
   2112                   { # Description of a transform executed as part of an execution stage.
   2113                     "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   2114                     "originalTransform": "A String", # User name for the original user transform with which this transform is
   2115                         # most closely associated.
   2116                     "name": "A String", # Dataflow service generated name for this source.
   2117                   },
   2118                 ],
   2119                 "id": "A String", # Dataflow service generated id for this stage.
   2120               },
   2121             ],
   2122           },
   2123           "steps": [ # The top-level steps that constitute the entire job.
   2124             { # Defines a particular step within a Cloud Dataflow job.
   2125                 #
   2126                 # A job consists of multiple steps, each of which performs some
   2127                 # specific operation as part of the overall job.  Data is typically
   2128                 # passed from one step to another as part of the job.
   2129                 #
   2130                 # Here's an example of a sequence of steps which together implement a
   2131                 # Map-Reduce job:
   2132                 #
   2133                 #   * Read a collection of data from some source, parsing the
   2134                 #     collection's elements.
   2135                 #
   2136                 #   * Validate the elements.
   2137                 #
   2138                 #   * Apply a user-defined function to map each element to some value
   2139                 #     and extract an element-specific key value.
   2140                 #
   2141                 #   * Group elements with the same key into a single element with
   2142                 #     that key, transforming a multiply-keyed collection into a
   2143                 #     uniquely-keyed collection.
   2144                 #
   2145                 #   * Write the elements out to some data sink.
   2146                 #
   2147                 # Note that the Cloud Dataflow service may be used to run many different
   2148                 # types of jobs, not just Map-Reduce.
   2149               "kind": "A String", # The kind of step in the Cloud Dataflow job.
   2150               "properties": { # Named properties associated with the step. Each kind of
   2151                   # predefined step has its own required set of properties.
   2152                   # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
   2153                 "a_key": "", # Properties of the object.
   2154               },
   2155               "name": "A String", # The name that identifies the step. This must be unique for each
   2156                   # step with respect to all other steps in the Cloud Dataflow job.
   2157             },
   2158           ],
   2159           "currentStateTime": "A String", # The timestamp associated with the current state.
   2160           "tempFiles": [ # A set of files the system should be aware of that are used
   2161               # for temporary storage. These temporary files will be
   2162               # removed on job completion.
   2163               # No duplicates are allowed.
   2164               # No file patterns are supported.
   2165               #
   2166               # The supported files are:
   2167               #
   2168               # Google Cloud Storage:
   2169               #
   2170               #    storage.googleapis.com/{bucket}/{object}
   2171               #    bucket.storage.googleapis.com/{object}
   2172             "A String",
   2173           ],
   2174           "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
   2175               # callers cannot mutate it.
   2176             { # A message describing the state of a particular execution stage.
   2177               "executionStageName": "A String", # The name of the execution stage.
   2178               "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
   2179               "currentStateTime": "A String", # The time at which the stage transitioned to this state.
   2180             },
   2181           ],
   2182           "type": "A String", # The type of Cloud Dataflow job.
   2183           "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
   2184               # Cloud Dataflow service.
   2185           "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
   2186               # of the job it replaced.
   2187               #
   2188               # When sending a `CreateJobRequest`, you can update a job by specifying it
   2189               # here. The job named here is stopped, and its intermediate state is
   2190               # transferred to this job.
   2191           "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
   2192               # isn't contained in the submitted job.
   2193             "stages": { # A mapping from each stage to the information about that stage.
   2194               "a_key": { # Contains information about how a particular
   2195                   # google.dataflow.v1beta3.Step will be executed.
   2196                 "stepName": [ # The steps associated with the execution stage.
   2197                     # Note that stages may have several steps, and that a given step
   2198                     # might be run by more than one stage.
   2199                   "A String",
   2200                 ],
   2201               },
   2202             },
   2203           },
   2204         },
   2205     ],
   2206   }</pre>
   2207 </div>
   2208 
   2209 <div class="method">
   2210     <code class="details" id="list_next">list_next(previous_request, previous_response)</code>
   2211   <pre>Retrieves the next page of results.
   2212 
   2213 Args:
   2214   previous_request: The request for the previous page. (required)
   2215   previous_response: The response from the request for the previous page. (required)
   2216 
   2217 Returns:
   2218   A request object that you can call 'execute()' on to request the next
   2219   page. Returns None if there are no more items in the collection.
   2220     </pre>
   2221 </div>
   2222 
   2223 <div class="method">
   2224     <code class="details" id="update">update(projectId, location, jobId, body, x__xgafv=None)</code>
   2225   <pre>Updates the state of an existing Cloud Dataflow job.
   2226 
   2227 Args:
   2228   projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
   2229   location: string, The location that contains this job. (required)
   2230   jobId: string, The job ID. (required)
   2231   body: object, The request body. (required)
   2232     The object takes the form of:
   2233 
   2234 { # Defines a job to be run by the Cloud Dataflow service.
   2235     "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
   2236         # If this field is set, the service will ensure its uniqueness.
   2237         # The request to create a job will fail if the service has knowledge of a
   2238         # previously submitted job with the same client's ID and job name.
   2239         # The caller may use this field to ensure idempotence of job
   2240         # creation across retried attempts to create a job.
   2241         # By default, the field is empty and, in that case, the service ignores it.
   2242     "requestedState": "A String", # The job's requested state.
   2243         # 
   2244         # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
   2245         # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
   2246         # also be used to directly set a job's requested state to
   2247         # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
   2248         # job if it has not already reached a terminal state.
   2249     "name": "A String", # The user-specified Cloud Dataflow job name.
   2250         # 
   2251         # Only one Job with a given name may exist in a project at any
   2252         # given time. If a caller attempts to create a Job with the same
   2253         # name as an already-existing Job, the attempt returns the
   2254         # existing Job.
   2255         # 
   2256         # The name must match the regular expression
   2257         # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
   2258     "location": "A String", # The location that contains this job.
   2259     "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
   2260         # `JOB_STATE_UPDATED`), this field contains the ID of that job.
   2261     "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
   2262     "currentState": "A String", # The current state of the job.
   2263         # 
   2264         # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
   2265         # specified.
   2266         # 
   2267         # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
   2268         # terminal state. After a job has reached a terminal state, no
   2269         # further state updates may be made.
   2270         # 
   2271         # This field may be mutated by the Cloud Dataflow service;
   2272         # callers cannot mutate it.
   2273     "labels": { # User-defined labels for this job.
   2274         # 
   2275         # The labels map can contain no more than 64 entries.  Entries of the labels
   2276         # map are UTF8 strings that comply with the following restrictions:
   2277         # 
   2278         # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
   2279         # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
   2280         # * Both keys and values are additionally constrained to be <= 128 bytes in
   2281         # size.
   2282       "a_key": "A String",
   2283     },
   2284     "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
   2285         # corresponding name prefixes of the new job.
   2286       "a_key": "A String",
   2287     },
   2288     "id": "A String", # The unique ID of this job.
   2289         # 
   2290         # This field is set by the Cloud Dataflow service when the Job is
   2291         # created, and is immutable for the life of the job.
   2292     "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
   2293       "version": { # A structure describing which components and their versions of the service
   2294           # are required in order to run the job.
   2295         "a_key": "", # Properties of the object.
   2296       },
   2297       "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
   2298           # storage.  The system will append the suffix "/temp-{JOBNAME} to
   2299           # this resource prefix, where {JOBNAME} is the value of the
   2300           # job_name field.  The resulting bucket and object prefix is used
   2301           # as the prefix of the resources used to store temporary data
   2302           # needed during the job execution.  NOTE: This will override the
   2303           # value in taskrunner_settings.
   2304           # The supported resource type is:
   2305           #
   2306           # Google Cloud Storage:
   2307           #
   2308           #   storage.googleapis.com/{bucket}/{object}
   2309           #   bucket.storage.googleapis.com/{object}
   2310       "internalExperiments": { # Experimental settings.
   2311         "a_key": "", # Properties of the object. Contains field @type with type URL.
   2312       },
   2313       "dataset": "A String", # The dataset for the current project where various workflow
   2314           # related tables are stored.
   2315           #
   2316           # The supported resource type is:
   2317           #
   2318           # Google BigQuery:
   2319           #   bigquery.googleapis.com/{dataset}
   2320       "experiments": [ # The list of experiments to enable.
   2321         "A String",
   2322       ],
   2323       "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
   2324       "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
   2325           # options are passed through the service and are used to recreate the
   2326           # SDK pipeline options on the worker in a language agnostic and platform
   2327           # independent way.
   2328         "a_key": "", # Properties of the object.
   2329       },
   2330       "userAgent": { # A description of the process that generated the request.
   2331         "a_key": "", # Properties of the object.
   2332       },
   2333       "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
   2334           # unspecified, the service will attempt to choose a reasonable
   2335           # default.  This should be in the form of the API service name,
   2336           # e.g. "compute.googleapis.com".
   2337       "workerPools": [ # The worker pools. At least one "harness" worker pool must be
   2338           # specified in order for the job to have workers.
   2339         { # Describes one particular pool of Cloud Dataflow workers to be
   2340             # instantiated by the Cloud Dataflow service in order to perform the
   2341             # computations required by a job.  Note that a workflow job may use
   2342             # multiple pools, in order to match the various computational
   2343             # requirements of the various stages of the job.
   2344           "diskSourceImage": "A String", # Fully qualified source image for disks.
   2345           "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
   2346               # using the standard Dataflow task runner.  Users should ignore
   2347               # this field.
   2348             "workflowFileName": "A String", # The file to store the workflow in.
   2349             "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
   2350                 # will not be uploaded.
   2351                 #
   2352                 # The supported resource type is:
   2353                 #
   2354                 # Google Cloud Storage:
   2355                 #   storage.googleapis.com/{bucket}/{object}
   2356                 #   bucket.storage.googleapis.com/{object}
   2357             "commandlinesFileName": "A String", # The file to store preprocessing commands in.
   2358             "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
   2359               "reportingEnabled": True or False, # Whether to send work progress updates to the service.
   2360               "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
   2361                   # "shuffle/v1beta1".
   2362               "workerId": "A String", # The ID of the worker running this pipeline.
   2363               "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
   2364                   #
   2365                   # When workers access Google Cloud APIs, they logically do so via
   2366                   # relative URLs.  If this field is specified, it supplies the base
   2367                   # URL to use for resolving these relative URLs.  The normative
   2368                   # algorithm used is defined by RFC 1808, "Relative Uniform Resource
   2369                   # Locators".
   2370                   #
   2371                   # If not specified, the default value is "http://www.googleapis.com/"
   2372               "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
   2373                   # "dataflow/v1b3/projects".
   2374               "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
   2375                   # storage.
   2376                   #
   2377                   # The supported resource type is:
   2378                   #
   2379                   # Google Cloud Storage:
   2380                   #
   2381                   #   storage.googleapis.com/{bucket}/{object}
   2382                   #   bucket.storage.googleapis.com/{object}
   2383             },
   2384             "vmId": "A String", # The ID string of the VM.
   2385             "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
   2386             "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
   2387             "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
   2388                 # access the Cloud Dataflow API.
   2389               "A String",
   2390             ],
   2391             "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
   2392                 # taskrunner; e.g. "root".
   2393             "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
   2394                 #
   2395                 # When workers access Google Cloud APIs, they logically do so via
   2396                 # relative URLs.  If this field is specified, it supplies the base
   2397                 # URL to use for resolving these relative URLs.  The normative
   2398                 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
   2399                 # Locators".
   2400                 #
   2401                 # If not specified, the default value is "http://www.googleapis.com/"
   2402             "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
   2403                 # taskrunner; e.g. "wheel".
   2404             "languageHint": "A String", # The suggested backend language.
   2405             "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
   2406                 # console.
   2407             "streamingWorkerMainClass": "A String", # The streaming worker main class name.
   2408             "logDir": "A String", # The directory on the VM to store logs.
   2409             "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
   2410             "harnessCommand": "A String", # The command to launch the worker harness.
   2411             "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
   2412                 # temporary storage.
   2413                 #
   2414                 # The supported resource type is:
   2415                 #
   2416                 # Google Cloud Storage:
   2417                 #   storage.googleapis.com/{bucket}/{object}
   2418                 #   bucket.storage.googleapis.com/{object}
   2419             "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
   2420           },
   2421           "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
   2422               # are supported.
   2423           "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
   2424               # service will attempt to choose a reasonable default.
   2425           "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
   2426               # the service will use the network "default".
   2427           "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
   2428               # will attempt to choose a reasonable default.
   2429           "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
   2430               # attempt to choose a reasonable default.
   2431           "dataDisks": [ # Data disks that are used by a VM in this workflow.
   2432             { # Describes the data disk used by a workflow job.
   2433               "mountPoint": "A String", # Directory in a VM where disk is mounted.
   2434               "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
   2435                   # attempt to choose a reasonable default.
   2436               "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
   2437                   # must be a disk type appropriate to the project and zone in which
   2438                   # the workers will run.  If unknown or unspecified, the service
   2439                   # will attempt to choose a reasonable default.
   2440                   #
   2441                   # For example, the standard persistent disk type is a resource name
   2442                   # typically ending in "pd-standard".  If SSD persistent disks are
   2443                   # available, the resource name typically ends with "pd-ssd".  The
   2444                   # actual valid values are defined the Google Compute Engine API,
   2445                   # not by the Cloud Dataflow API; consult the Google Compute Engine
   2446                   # documentation for more information about determining the set of
   2447                   # available disk types for a particular project and zone.
   2448                   #
   2449                   # Google Compute Engine Disk types are local to a particular
   2450                   # project in a particular zone, and so the resource name will
   2451                   # typically look something like this:
   2452                   #
   2453                   # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
   2454             },
   2455           ],
   2456           "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
   2457               # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
   2458               # `TEARDOWN_NEVER`.
   2459               # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
   2460               # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
   2461               # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
   2462               # down.
   2463               #
   2464               # If the workers are not torn down by the service, they will
   2465               # continue to run and use Google Compute Engine VM resources in the
   2466               # user's project until they are explicitly terminated by the user.
   2467               # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
   2468               # policy except for small, manually supervised test jobs.
   2469               #
   2470               # If unknown or unspecified, the service will attempt to choose a reasonable
   2471               # default.
   2472           "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
   2473               # Compute Engine API.
   2474           "ipConfiguration": "A String", # Configuration for VM IPs.
   2475           "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
   2476               # service will choose a number of threads (according to the number of cores
   2477               # on the selected machine type for batch, or 1 by convention for streaming).
   2478           "poolArgs": { # Extra arguments for this worker pool.
   2479             "a_key": "", # Properties of the object. Contains field @type with type URL.
   2480           },
   2481           "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
   2482               # execute the job.  If zero or unspecified, the service will
   2483               # attempt to choose a reasonable default.
   2484           "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
   2485               # harness, residing in Google Container Registry.
   2486           "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
   2487               # the form "regions/REGION/subnetworks/SUBNETWORK".
   2488           "packages": [ # Packages to be installed on workers.
   2489             { # The packages that must be installed in order for a worker to run the
   2490                 # steps of the Cloud Dataflow job that will be assigned to its worker
   2491                 # pool.
   2492                 #
   2493                 # This is the mechanism by which the Cloud Dataflow SDK causes code to
   2494                 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
   2495                 # might use this to install jars containing the user's code and all of the
   2496                 # various dependencies (libraries, data files, etc.) required in order
   2497                 # for that code to run.
   2498               "location": "A String", # The resource to read the package from. The supported resource type is:
   2499                   #
   2500                   # Google Cloud Storage:
   2501                   #
   2502                   #   storage.googleapis.com/{bucket}
   2503                   #   bucket.storage.googleapis.com/
   2504               "name": "A String", # The name of the package.
   2505             },
   2506           ],
   2507           "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
   2508             "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
   2509             "algorithm": "A String", # The algorithm to use for autoscaling.
   2510           },
   2511           "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
   2512               # select a default set of packages which are useful to worker
   2513               # harnesses written in a particular language.
   2514           "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
   2515               # attempt to choose a reasonable default.
   2516           "metadata": { # Metadata to set on the Google Compute Engine VMs.
   2517             "a_key": "A String",
   2518           },
   2519         },
   2520       ],
   2521     },
   2522     "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
   2523         # A description of the user pipeline and stages through which it is executed.
   2524         # Created by Cloud Dataflow service.  Only retrieved with
   2525         # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
   2526         # form.  This data is provided by the Dataflow service for ease of visualizing
   2527         # the pipeline and interpretting Dataflow provided metrics.
   2528       "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
   2529         { # Description of the type, names/ids, and input/outputs for a transform.
   2530           "kind": "A String", # Type of transform.
   2531           "name": "A String", # User provided name for this transform instance.
   2532           "inputCollectionName": [ # User names for all collection inputs to this transform.
   2533             "A String",
   2534           ],
   2535           "displayData": [ # Transform-specific display data.
   2536             { # Data provided with a pipeline or transform to provide descriptive info.
   2537               "shortStrValue": "A String", # A possible additional shorter value to display.
   2538                   # For example a java_class_name_value of com.mypackage.MyDoFn
   2539                   # will be stored with MyDoFn as the short_str_value and
   2540                   # com.mypackage.MyDoFn as the java_class_name value.
   2541                   # short_str_value can be displayed and java_class_name_value
   2542                   # will be displayed as a tooltip.
   2543               "durationValue": "A String", # Contains value if the data is of duration type.
   2544               "url": "A String", # An optional full URL.
   2545               "floatValue": 3.14, # Contains value if the data is of float type.
   2546               "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   2547                   # language namespace (i.e. python module) which defines the display data.
   2548                   # This allows a dax monitoring system to specially handle the data
   2549                   # and perform custom rendering.
   2550               "javaClassValue": "A String", # Contains value if the data is of java class type.
   2551               "label": "A String", # An optional label to display in a dax UI for the element.
   2552               "boolValue": True or False, # Contains value if the data is of a boolean type.
   2553               "strValue": "A String", # Contains value if the data is of string type.
   2554               "key": "A String", # The key identifying the display data.
   2555                   # This is intended to be used as a label for the display data
   2556                   # when viewed in a dax monitoring system.
   2557               "int64Value": "A String", # Contains value if the data is of int64 type.
   2558               "timestampValue": "A String", # Contains value if the data is of timestamp type.
   2559             },
   2560           ],
   2561           "outputCollectionName": [ # User  names for all collection outputs to this transform.
   2562             "A String",
   2563           ],
   2564           "id": "A String", # SDK generated id of this transform instance.
   2565         },
   2566       ],
   2567       "displayData": [ # Pipeline level display data.
   2568         { # Data provided with a pipeline or transform to provide descriptive info.
   2569           "shortStrValue": "A String", # A possible additional shorter value to display.
   2570               # For example a java_class_name_value of com.mypackage.MyDoFn
   2571               # will be stored with MyDoFn as the short_str_value and
   2572               # com.mypackage.MyDoFn as the java_class_name value.
   2573               # short_str_value can be displayed and java_class_name_value
   2574               # will be displayed as a tooltip.
   2575           "durationValue": "A String", # Contains value if the data is of duration type.
   2576           "url": "A String", # An optional full URL.
   2577           "floatValue": 3.14, # Contains value if the data is of float type.
   2578           "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   2579               # language namespace (i.e. python module) which defines the display data.
   2580               # This allows a dax monitoring system to specially handle the data
   2581               # and perform custom rendering.
   2582           "javaClassValue": "A String", # Contains value if the data is of java class type.
   2583           "label": "A String", # An optional label to display in a dax UI for the element.
   2584           "boolValue": True or False, # Contains value if the data is of a boolean type.
   2585           "strValue": "A String", # Contains value if the data is of string type.
   2586           "key": "A String", # The key identifying the display data.
   2587               # This is intended to be used as a label for the display data
   2588               # when viewed in a dax monitoring system.
   2589           "int64Value": "A String", # Contains value if the data is of int64 type.
   2590           "timestampValue": "A String", # Contains value if the data is of timestamp type.
   2591         },
   2592       ],
   2593       "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
   2594         { # Description of the composing transforms, names/ids, and input/outputs of a
   2595             # stage of execution.  Some composing transforms and sources may have been
   2596             # generated by the Dataflow service during execution planning.
   2597           "componentSource": [ # Collections produced and consumed by component transforms of this stage.
   2598             { # Description of an interstitial value between transforms in an execution
   2599                 # stage.
   2600               "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   2601               "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   2602                   # source is most closely associated.
   2603               "name": "A String", # Dataflow service generated name for this source.
   2604             },
   2605           ],
   2606           "kind": "A String", # Type of tranform this stage is executing.
   2607           "name": "A String", # Dataflow service generated name for this stage.
   2608           "outputSource": [ # Output sources for this stage.
   2609             { # Description of an input or output of an execution stage.
   2610               "userName": "A String", # Human-readable name for this source; may be user or system generated.
   2611               "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   2612                   # source is most closely associated.
   2613               "name": "A String", # Dataflow service generated name for this source.
   2614               "sizeBytes": "A String", # Size of the source, if measurable.
   2615             },
   2616           ],
   2617           "inputSource": [ # Input sources for this stage.
   2618             { # Description of an input or output of an execution stage.
   2619               "userName": "A String", # Human-readable name for this source; may be user or system generated.
   2620               "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   2621                   # source is most closely associated.
   2622               "name": "A String", # Dataflow service generated name for this source.
   2623               "sizeBytes": "A String", # Size of the source, if measurable.
   2624             },
   2625           ],
   2626           "componentTransform": [ # Transforms that comprise this execution stage.
   2627             { # Description of a transform executed as part of an execution stage.
   2628               "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   2629               "originalTransform": "A String", # User name for the original user transform with which this transform is
   2630                   # most closely associated.
   2631               "name": "A String", # Dataflow service generated name for this source.
   2632             },
   2633           ],
   2634           "id": "A String", # Dataflow service generated id for this stage.
   2635         },
   2636       ],
   2637     },
   2638     "steps": [ # The top-level steps that constitute the entire job.
   2639       { # Defines a particular step within a Cloud Dataflow job.
   2640           #
   2641           # A job consists of multiple steps, each of which performs some
   2642           # specific operation as part of the overall job.  Data is typically
   2643           # passed from one step to another as part of the job.
   2644           #
   2645           # Here's an example of a sequence of steps which together implement a
   2646           # Map-Reduce job:
   2647           #
   2648           #   * Read a collection of data from some source, parsing the
   2649           #     collection's elements.
   2650           #
   2651           #   * Validate the elements.
   2652           #
   2653           #   * Apply a user-defined function to map each element to some value
   2654           #     and extract an element-specific key value.
   2655           #
   2656           #   * Group elements with the same key into a single element with
   2657           #     that key, transforming a multiply-keyed collection into a
   2658           #     uniquely-keyed collection.
   2659           #
   2660           #   * Write the elements out to some data sink.
   2661           #
   2662           # Note that the Cloud Dataflow service may be used to run many different
   2663           # types of jobs, not just Map-Reduce.
   2664         "kind": "A String", # The kind of step in the Cloud Dataflow job.
   2665         "properties": { # Named properties associated with the step. Each kind of
   2666             # predefined step has its own required set of properties.
   2667             # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
   2668           "a_key": "", # Properties of the object.
   2669         },
   2670         "name": "A String", # The name that identifies the step. This must be unique for each
   2671             # step with respect to all other steps in the Cloud Dataflow job.
   2672       },
   2673     ],
   2674     "currentStateTime": "A String", # The timestamp associated with the current state.
   2675     "tempFiles": [ # A set of files the system should be aware of that are used
   2676         # for temporary storage. These temporary files will be
   2677         # removed on job completion.
   2678         # No duplicates are allowed.
   2679         # No file patterns are supported.
   2680         # 
   2681         # The supported files are:
   2682         # 
   2683         # Google Cloud Storage:
   2684         # 
   2685         #    storage.googleapis.com/{bucket}/{object}
   2686         #    bucket.storage.googleapis.com/{object}
   2687       "A String",
   2688     ],
   2689     "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
   2690         # callers cannot mutate it.
   2691       { # A message describing the state of a particular execution stage.
   2692         "executionStageName": "A String", # The name of the execution stage.
   2693         "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
   2694         "currentStateTime": "A String", # The time at which the stage transitioned to this state.
   2695       },
   2696     ],
   2697     "type": "A String", # The type of Cloud Dataflow job.
   2698     "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
   2699         # Cloud Dataflow service.
   2700     "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
   2701         # of the job it replaced.
   2702         # 
   2703         # When sending a `CreateJobRequest`, you can update a job by specifying it
   2704         # here. The job named here is stopped, and its intermediate state is
   2705         # transferred to this job.
   2706     "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
   2707         # isn't contained in the submitted job.
   2708       "stages": { # A mapping from each stage to the information about that stage.
   2709         "a_key": { # Contains information about how a particular
   2710             # google.dataflow.v1beta3.Step will be executed.
   2711           "stepName": [ # The steps associated with the execution stage.
   2712               # Note that stages may have several steps, and that a given step
   2713               # might be run by more than one stage.
   2714             "A String",
   2715           ],
   2716         },
   2717       },
   2718     },
   2719   }
   2720 
   2721   x__xgafv: string, V1 error format.
   2722     Allowed values
   2723       1 - v1 error format
   2724       2 - v2 error format
   2725 
   2726 Returns:
   2727   An object of the form:
   2728 
   2729     { # Defines a job to be run by the Cloud Dataflow service.
   2730       "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
   2731           # If this field is set, the service will ensure its uniqueness.
   2732           # The request to create a job will fail if the service has knowledge of a
   2733           # previously submitted job with the same client's ID and job name.
   2734           # The caller may use this field to ensure idempotence of job
   2735           # creation across retried attempts to create a job.
   2736           # By default, the field is empty and, in that case, the service ignores it.
   2737       "requestedState": "A String", # The job's requested state.
   2738           #
   2739           # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
   2740           # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
   2741           # also be used to directly set a job's requested state to
   2742           # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
   2743           # job if it has not already reached a terminal state.
   2744       "name": "A String", # The user-specified Cloud Dataflow job name.
   2745           #
   2746           # Only one Job with a given name may exist in a project at any
   2747           # given time. If a caller attempts to create a Job with the same
   2748           # name as an already-existing Job, the attempt returns the
   2749           # existing Job.
   2750           #
   2751           # The name must match the regular expression
   2752           # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
   2753       "location": "A String", # The location that contains this job.
   2754       "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
   2755           # `JOB_STATE_UPDATED`), this field contains the ID of that job.
   2756       "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
   2757       "currentState": "A String", # The current state of the job.
   2758           #
   2759           # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
   2760           # specified.
   2761           #
   2762           # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
   2763           # terminal state. After a job has reached a terminal state, no
   2764           # further state updates may be made.
   2765           #
   2766           # This field may be mutated by the Cloud Dataflow service;
   2767           # callers cannot mutate it.
   2768       "labels": { # User-defined labels for this job.
   2769           #
   2770           # The labels map can contain no more than 64 entries.  Entries of the labels
   2771           # map are UTF8 strings that comply with the following restrictions:
   2772           #
   2773           # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
   2774           # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
   2775           # * Both keys and values are additionally constrained to be <= 128 bytes in
   2776           # size.
   2777         "a_key": "A String",
   2778       },
   2779       "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
   2780           # corresponding name prefixes of the new job.
   2781         "a_key": "A String",
   2782       },
   2783       "id": "A String", # The unique ID of this job.
   2784           #
   2785           # This field is set by the Cloud Dataflow service when the Job is
   2786           # created, and is immutable for the life of the job.
   2787       "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
   2788         "version": { # A structure describing which components and their versions of the service
   2789             # are required in order to run the job.
   2790           "a_key": "", # Properties of the object.
   2791         },
   2792         "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
   2793             # storage.  The system will append the suffix "/temp-{JOBNAME} to
   2794             # this resource prefix, where {JOBNAME} is the value of the
   2795             # job_name field.  The resulting bucket and object prefix is used
   2796             # as the prefix of the resources used to store temporary data
   2797             # needed during the job execution.  NOTE: This will override the
   2798             # value in taskrunner_settings.
   2799             # The supported resource type is:
   2800             #
   2801             # Google Cloud Storage:
   2802             #
   2803             #   storage.googleapis.com/{bucket}/{object}
   2804             #   bucket.storage.googleapis.com/{object}
   2805         "internalExperiments": { # Experimental settings.
   2806           "a_key": "", # Properties of the object. Contains field @type with type URL.
   2807         },
   2808         "dataset": "A String", # The dataset for the current project where various workflow
   2809             # related tables are stored.
   2810             #
   2811             # The supported resource type is:
   2812             #
   2813             # Google BigQuery:
   2814             #   bigquery.googleapis.com/{dataset}
   2815         "experiments": [ # The list of experiments to enable.
   2816           "A String",
   2817         ],
   2818         "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
   2819         "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
   2820             # options are passed through the service and are used to recreate the
   2821             # SDK pipeline options on the worker in a language agnostic and platform
   2822             # independent way.
   2823           "a_key": "", # Properties of the object.
   2824         },
   2825         "userAgent": { # A description of the process that generated the request.
   2826           "a_key": "", # Properties of the object.
   2827         },
   2828         "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
   2829             # unspecified, the service will attempt to choose a reasonable
   2830             # default.  This should be in the form of the API service name,
   2831             # e.g. "compute.googleapis.com".
   2832         "workerPools": [ # The worker pools. At least one "harness" worker pool must be
   2833             # specified in order for the job to have workers.
   2834           { # Describes one particular pool of Cloud Dataflow workers to be
   2835               # instantiated by the Cloud Dataflow service in order to perform the
   2836               # computations required by a job.  Note that a workflow job may use
   2837               # multiple pools, in order to match the various computational
   2838               # requirements of the various stages of the job.
   2839             "diskSourceImage": "A String", # Fully qualified source image for disks.
   2840             "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
   2841                 # using the standard Dataflow task runner.  Users should ignore
   2842                 # this field.
   2843               "workflowFileName": "A String", # The file to store the workflow in.
   2844               "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
   2845                   # will not be uploaded.
   2846                   #
   2847                   # The supported resource type is:
   2848                   #
   2849                   # Google Cloud Storage:
   2850                   #   storage.googleapis.com/{bucket}/{object}
   2851                   #   bucket.storage.googleapis.com/{object}
   2852               "commandlinesFileName": "A String", # The file to store preprocessing commands in.
   2853               "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
   2854                 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
   2855                 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
   2856                     # "shuffle/v1beta1".
   2857                 "workerId": "A String", # The ID of the worker running this pipeline.
   2858                 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
   2859                     #
   2860                     # When workers access Google Cloud APIs, they logically do so via
   2861                     # relative URLs.  If this field is specified, it supplies the base
   2862                     # URL to use for resolving these relative URLs.  The normative
   2863                     # algorithm used is defined by RFC 1808, "Relative Uniform Resource
   2864                     # Locators".
   2865                     #
   2866                     # If not specified, the default value is "http://www.googleapis.com/"
   2867                 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
   2868                     # "dataflow/v1b3/projects".
   2869                 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
   2870                     # storage.
   2871                     #
   2872                     # The supported resource type is:
   2873                     #
   2874                     # Google Cloud Storage:
   2875                     #
   2876                     #   storage.googleapis.com/{bucket}/{object}
   2877                     #   bucket.storage.googleapis.com/{object}
   2878               },
   2879               "vmId": "A String", # The ID string of the VM.
   2880               "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
   2881               "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
   2882               "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
   2883                   # access the Cloud Dataflow API.
   2884                 "A String",
   2885               ],
   2886               "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
   2887                   # taskrunner; e.g. "root".
   2888               "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
   2889                   #
   2890                   # When workers access Google Cloud APIs, they logically do so via
   2891                   # relative URLs.  If this field is specified, it supplies the base
   2892                   # URL to use for resolving these relative URLs.  The normative
   2893                   # algorithm used is defined by RFC 1808, "Relative Uniform Resource
   2894                   # Locators".
   2895                   #
   2896                   # If not specified, the default value is "http://www.googleapis.com/"
   2897               "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
   2898                   # taskrunner; e.g. "wheel".
   2899               "languageHint": "A String", # The suggested backend language.
   2900               "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
   2901                   # console.
   2902               "streamingWorkerMainClass": "A String", # The streaming worker main class name.
   2903               "logDir": "A String", # The directory on the VM to store logs.
   2904               "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
   2905               "harnessCommand": "A String", # The command to launch the worker harness.
   2906               "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
   2907                   # temporary storage.
   2908                   #
   2909                   # The supported resource type is:
   2910                   #
   2911                   # Google Cloud Storage:
   2912                   #   storage.googleapis.com/{bucket}/{object}
   2913                   #   bucket.storage.googleapis.com/{object}
   2914               "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
   2915             },
   2916             "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
   2917                 # are supported.
   2918             "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
   2919                 # service will attempt to choose a reasonable default.
   2920             "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
   2921                 # the service will use the network "default".
   2922             "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
   2923                 # will attempt to choose a reasonable default.
   2924             "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
   2925                 # attempt to choose a reasonable default.
   2926             "dataDisks": [ # Data disks that are used by a VM in this workflow.
   2927               { # Describes the data disk used by a workflow job.
   2928                 "mountPoint": "A String", # Directory in a VM where disk is mounted.
   2929                 "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
   2930                     # attempt to choose a reasonable default.
   2931                 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
   2932                     # must be a disk type appropriate to the project and zone in which
   2933                     # the workers will run.  If unknown or unspecified, the service
   2934                     # will attempt to choose a reasonable default.
   2935                     #
   2936                     # For example, the standard persistent disk type is a resource name
   2937                     # typically ending in "pd-standard".  If SSD persistent disks are
   2938                     # available, the resource name typically ends with "pd-ssd".  The
   2939                     # actual valid values are defined the Google Compute Engine API,
   2940                     # not by the Cloud Dataflow API; consult the Google Compute Engine
   2941                     # documentation for more information about determining the set of
   2942                     # available disk types for a particular project and zone.
   2943                     #
   2944                     # Google Compute Engine Disk types are local to a particular
   2945                     # project in a particular zone, and so the resource name will
   2946                     # typically look something like this:
   2947                     #
   2948                     # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
   2949               },
   2950             ],
   2951             "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
   2952                 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
   2953                 # `TEARDOWN_NEVER`.
   2954                 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
   2955                 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
   2956                 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
   2957                 # down.
   2958                 #
   2959                 # If the workers are not torn down by the service, they will
   2960                 # continue to run and use Google Compute Engine VM resources in the
   2961                 # user's project until they are explicitly terminated by the user.
   2962                 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
   2963                 # policy except for small, manually supervised test jobs.
   2964                 #
   2965                 # If unknown or unspecified, the service will attempt to choose a reasonable
   2966                 # default.
   2967             "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
   2968                 # Compute Engine API.
   2969             "ipConfiguration": "A String", # Configuration for VM IPs.
   2970             "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
   2971                 # service will choose a number of threads (according to the number of cores
   2972                 # on the selected machine type for batch, or 1 by convention for streaming).
   2973             "poolArgs": { # Extra arguments for this worker pool.
   2974               "a_key": "", # Properties of the object. Contains field @type with type URL.
   2975             },
   2976             "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
   2977                 # execute the job.  If zero or unspecified, the service will
   2978                 # attempt to choose a reasonable default.
   2979             "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
   2980                 # harness, residing in Google Container Registry.
   2981             "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
   2982                 # the form "regions/REGION/subnetworks/SUBNETWORK".
   2983             "packages": [ # Packages to be installed on workers.
   2984               { # The packages that must be installed in order for a worker to run the
   2985                   # steps of the Cloud Dataflow job that will be assigned to its worker
   2986                   # pool.
   2987                   #
   2988                   # This is the mechanism by which the Cloud Dataflow SDK causes code to
   2989                   # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
   2990                   # might use this to install jars containing the user's code and all of the
   2991                   # various dependencies (libraries, data files, etc.) required in order
   2992                   # for that code to run.
   2993                 "location": "A String", # The resource to read the package from. The supported resource type is:
   2994                     #
   2995                     # Google Cloud Storage:
   2996                     #
   2997                     #   storage.googleapis.com/{bucket}
   2998                     #   bucket.storage.googleapis.com/
   2999                 "name": "A String", # The name of the package.
   3000               },
   3001             ],
   3002             "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
   3003               "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
   3004               "algorithm": "A String", # The algorithm to use for autoscaling.
   3005             },
   3006             "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
   3007                 # select a default set of packages which are useful to worker
   3008                 # harnesses written in a particular language.
   3009             "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
   3010                 # attempt to choose a reasonable default.
   3011             "metadata": { # Metadata to set on the Google Compute Engine VMs.
   3012               "a_key": "A String",
   3013             },
   3014           },
   3015         ],
   3016       },
   3017       "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
   3018           # A description of the user pipeline and stages through which it is executed.
   3019           # Created by Cloud Dataflow service.  Only retrieved with
   3020           # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
   3021           # form.  This data is provided by the Dataflow service for ease of visualizing
   3022           # the pipeline and interpretting Dataflow provided metrics.
   3023         "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
   3024           { # Description of the type, names/ids, and input/outputs for a transform.
   3025             "kind": "A String", # Type of transform.
   3026             "name": "A String", # User provided name for this transform instance.
   3027             "inputCollectionName": [ # User names for all collection inputs to this transform.
   3028               "A String",
   3029             ],
   3030             "displayData": [ # Transform-specific display data.
   3031               { # Data provided with a pipeline or transform to provide descriptive info.
   3032                 "shortStrValue": "A String", # A possible additional shorter value to display.
   3033                     # For example a java_class_name_value of com.mypackage.MyDoFn
   3034                     # will be stored with MyDoFn as the short_str_value and
   3035                     # com.mypackage.MyDoFn as the java_class_name value.
   3036                     # short_str_value can be displayed and java_class_name_value
   3037                     # will be displayed as a tooltip.
   3038                 "durationValue": "A String", # Contains value if the data is of duration type.
   3039                 "url": "A String", # An optional full URL.
   3040                 "floatValue": 3.14, # Contains value if the data is of float type.
   3041                 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   3042                     # language namespace (i.e. python module) which defines the display data.
   3043                     # This allows a dax monitoring system to specially handle the data
   3044                     # and perform custom rendering.
   3045                 "javaClassValue": "A String", # Contains value if the data is of java class type.
   3046                 "label": "A String", # An optional label to display in a dax UI for the element.
   3047                 "boolValue": True or False, # Contains value if the data is of a boolean type.
   3048                 "strValue": "A String", # Contains value if the data is of string type.
   3049                 "key": "A String", # The key identifying the display data.
   3050                     # This is intended to be used as a label for the display data
   3051                     # when viewed in a dax monitoring system.
   3052                 "int64Value": "A String", # Contains value if the data is of int64 type.
   3053                 "timestampValue": "A String", # Contains value if the data is of timestamp type.
   3054               },
   3055             ],
   3056             "outputCollectionName": [ # User  names for all collection outputs to this transform.
   3057               "A String",
   3058             ],
   3059             "id": "A String", # SDK generated id of this transform instance.
   3060           },
   3061         ],
   3062         "displayData": [ # Pipeline level display data.
   3063           { # Data provided with a pipeline or transform to provide descriptive info.
   3064             "shortStrValue": "A String", # A possible additional shorter value to display.
   3065                 # For example a java_class_name_value of com.mypackage.MyDoFn
   3066                 # will be stored with MyDoFn as the short_str_value and
   3067                 # com.mypackage.MyDoFn as the java_class_name value.
   3068                 # short_str_value can be displayed and java_class_name_value
   3069                 # will be displayed as a tooltip.
   3070             "durationValue": "A String", # Contains value if the data is of duration type.
   3071             "url": "A String", # An optional full URL.
   3072             "floatValue": 3.14, # Contains value if the data is of float type.
   3073             "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   3074                 # language namespace (i.e. python module) which defines the display data.
   3075                 # This allows a dax monitoring system to specially handle the data
   3076                 # and perform custom rendering.
   3077             "javaClassValue": "A String", # Contains value if the data is of java class type.
   3078             "label": "A String", # An optional label to display in a dax UI for the element.
   3079             "boolValue": True or False, # Contains value if the data is of a boolean type.
   3080             "strValue": "A String", # Contains value if the data is of string type.
   3081             "key": "A String", # The key identifying the display data.
   3082                 # This is intended to be used as a label for the display data
   3083                 # when viewed in a dax monitoring system.
   3084             "int64Value": "A String", # Contains value if the data is of int64 type.
   3085             "timestampValue": "A String", # Contains value if the data is of timestamp type.
   3086           },
   3087         ],
   3088         "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
   3089           { # Description of the composing transforms, names/ids, and input/outputs of a
   3090               # stage of execution.  Some composing transforms and sources may have been
   3091               # generated by the Dataflow service during execution planning.
   3092             "componentSource": [ # Collections produced and consumed by component transforms of this stage.
   3093               { # Description of an interstitial value between transforms in an execution
   3094                   # stage.
   3095                 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   3096                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   3097                     # source is most closely associated.
   3098                 "name": "A String", # Dataflow service generated name for this source.
   3099               },
   3100             ],
   3101             "kind": "A String", # Type of tranform this stage is executing.
   3102             "name": "A String", # Dataflow service generated name for this stage.
   3103             "outputSource": [ # Output sources for this stage.
   3104               { # Description of an input or output of an execution stage.
   3105                 "userName": "A String", # Human-readable name for this source; may be user or system generated.
   3106                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   3107                     # source is most closely associated.
   3108                 "name": "A String", # Dataflow service generated name for this source.
   3109                 "sizeBytes": "A String", # Size of the source, if measurable.
   3110               },
   3111             ],
   3112             "inputSource": [ # Input sources for this stage.
   3113               { # Description of an input or output of an execution stage.
   3114                 "userName": "A String", # Human-readable name for this source; may be user or system generated.
   3115                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   3116                     # source is most closely associated.
   3117                 "name": "A String", # Dataflow service generated name for this source.
   3118                 "sizeBytes": "A String", # Size of the source, if measurable.
   3119               },
   3120             ],
   3121             "componentTransform": [ # Transforms that comprise this execution stage.
   3122               { # Description of a transform executed as part of an execution stage.
   3123                 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   3124                 "originalTransform": "A String", # User name for the original user transform with which this transform is
   3125                     # most closely associated.
   3126                 "name": "A String", # Dataflow service generated name for this source.
   3127               },
   3128             ],
   3129             "id": "A String", # Dataflow service generated id for this stage.
   3130           },
   3131         ],
   3132       },
   3133       "steps": [ # The top-level steps that constitute the entire job.
   3134         { # Defines a particular step within a Cloud Dataflow job.
   3135             #
   3136             # A job consists of multiple steps, each of which performs some
   3137             # specific operation as part of the overall job.  Data is typically
   3138             # passed from one step to another as part of the job.
   3139             #
   3140             # Here's an example of a sequence of steps which together implement a
   3141             # Map-Reduce job:
   3142             #
   3143             #   * Read a collection of data from some source, parsing the
   3144             #     collection's elements.
   3145             #
   3146             #   * Validate the elements.
   3147             #
   3148             #   * Apply a user-defined function to map each element to some value
   3149             #     and extract an element-specific key value.
   3150             #
   3151             #   * Group elements with the same key into a single element with
   3152             #     that key, transforming a multiply-keyed collection into a
   3153             #     uniquely-keyed collection.
   3154             #
   3155             #   * Write the elements out to some data sink.
   3156             #
   3157             # Note that the Cloud Dataflow service may be used to run many different
   3158             # types of jobs, not just Map-Reduce.
   3159           "kind": "A String", # The kind of step in the Cloud Dataflow job.
   3160           "properties": { # Named properties associated with the step. Each kind of
   3161               # predefined step has its own required set of properties.
   3162               # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
   3163             "a_key": "", # Properties of the object.
   3164           },
   3165           "name": "A String", # The name that identifies the step. This must be unique for each
   3166               # step with respect to all other steps in the Cloud Dataflow job.
   3167         },
   3168       ],
   3169       "currentStateTime": "A String", # The timestamp associated with the current state.
   3170       "tempFiles": [ # A set of files the system should be aware of that are used
   3171           # for temporary storage. These temporary files will be
   3172           # removed on job completion.
   3173           # No duplicates are allowed.
   3174           # No file patterns are supported.
   3175           #
   3176           # The supported files are:
   3177           #
   3178           # Google Cloud Storage:
   3179           #
   3180           #    storage.googleapis.com/{bucket}/{object}
   3181           #    bucket.storage.googleapis.com/{object}
   3182         "A String",
   3183       ],
   3184       "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
   3185           # callers cannot mutate it.
   3186         { # A message describing the state of a particular execution stage.
   3187           "executionStageName": "A String", # The name of the execution stage.
   3188           "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
   3189           "currentStateTime": "A String", # The time at which the stage transitioned to this state.
   3190         },
   3191       ],
   3192       "type": "A String", # The type of Cloud Dataflow job.
   3193       "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
   3194           # Cloud Dataflow service.
   3195       "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
   3196           # of the job it replaced.
   3197           #
   3198           # When sending a `CreateJobRequest`, you can update a job by specifying it
   3199           # here. The job named here is stopped, and its intermediate state is
   3200           # transferred to this job.
   3201       "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
   3202           # isn't contained in the submitted job.
   3203         "stages": { # A mapping from each stage to the information about that stage.
   3204           "a_key": { # Contains information about how a particular
   3205               # google.dataflow.v1beta3.Step will be executed.
   3206             "stepName": [ # The steps associated with the execution stage.
   3207                 # Note that stages may have several steps, and that a given step
   3208                 # might be run by more than one stage.
   3209               "A String",
   3210             ],
   3211           },
   3212         },
   3213       },
   3214     }</pre>
   3215 </div>
   3216 
   3217 </body></html>