Home | History | Annotate | Download | only in dyn
      1 <html><body>
      2 <style>
      3 
      4 body, h1, h2, h3, div, span, p, pre, a {
      5   margin: 0;
      6   padding: 0;
      7   border: 0;
      8   font-weight: inherit;
      9   font-style: inherit;
     10   font-size: 100%;
     11   font-family: inherit;
     12   vertical-align: baseline;
     13 }
     14 
     15 body {
     16   font-size: 13px;
     17   padding: 1em;
     18 }
     19 
     20 h1 {
     21   font-size: 26px;
     22   margin-bottom: 1em;
     23 }
     24 
     25 h2 {
     26   font-size: 24px;
     27   margin-bottom: 1em;
     28 }
     29 
     30 h3 {
     31   font-size: 20px;
     32   margin-bottom: 1em;
     33   margin-top: 1em;
     34 }
     35 
     36 pre, code {
     37   line-height: 1.5;
     38   font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
     39 }
     40 
     41 pre {
     42   margin-top: 0.5em;
     43 }
     44 
     45 h1, h2, h3, p {
     46   font-family: Arial, sans serif;
     47 }
     48 
     49 h1, h2, h3 {
     50   border-bottom: solid #CCC 1px;
     51 }
     52 
     53 .toc_element {
     54   margin-top: 0.5em;
     55 }
     56 
     57 .firstline {
     58   margin-left: 2 em;
     59 }
     60 
     61 .method  {
     62   margin-top: 1em;
     63   border: solid 1px #CCC;
     64   padding: 1em;
     65   background: #EEE;
     66 }
     67 
     68 .details {
     69   font-weight: bold;
     70   font-size: 14px;
     71 }
     72 
     73 </style>
     74 
     75 <h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.jobs.html">jobs</a></h1>
     76 <h2>Instance Methods</h2>
     77 <p class="toc_element">
     78   <code><a href="dataflow_v1b3.projects.jobs.debug.html">debug()</a></code>
     79 </p>
     80 <p class="firstline">Returns the debug Resource.</p>
     81 
     82 <p class="toc_element">
     83   <code><a href="dataflow_v1b3.projects.jobs.messages.html">messages()</a></code>
     84 </p>
     85 <p class="firstline">Returns the messages Resource.</p>
     86 
     87 <p class="toc_element">
     88   <code><a href="dataflow_v1b3.projects.jobs.workItems.html">workItems()</a></code>
     89 </p>
     90 <p class="firstline">Returns the workItems Resource.</p>
     91 
     92 <p class="toc_element">
     93   <code><a href="#create">create(projectId, body, location=None, x__xgafv=None, replaceJobId=None, view=None)</a></code></p>
     94 <p class="firstline">Creates a Cloud Dataflow job.</p>
     95 <p class="toc_element">
     96   <code><a href="#get">get(projectId, jobId, location=None, x__xgafv=None, view=None)</a></code></p>
     97 <p class="firstline">Gets the state of the specified Cloud Dataflow job.</p>
     98 <p class="toc_element">
     99   <code><a href="#getMetrics">getMetrics(projectId, jobId, startTime=None, location=None, x__xgafv=None)</a></code></p>
    100 <p class="firstline">Request the job status.</p>
    101 <p class="toc_element">
    102   <code><a href="#list">list(projectId, pageSize=None, x__xgafv=None, pageToken=None, location=None, filter=None, view=None)</a></code></p>
    103 <p class="firstline">List the jobs of a project.</p>
    104 <p class="toc_element">
    105   <code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>
    106 <p class="firstline">Retrieves the next page of results.</p>
    107 <p class="toc_element">
    108   <code><a href="#update">update(projectId, jobId, body, location=None, x__xgafv=None)</a></code></p>
    109 <p class="firstline">Updates the state of an existing Cloud Dataflow job.</p>
    110 <h3>Method Details</h3>
    111 <div class="method">
    112     <code class="details" id="create">create(projectId, body, location=None, x__xgafv=None, replaceJobId=None, view=None)</code>
    113   <pre>Creates a Cloud Dataflow job.
    114 
    115 Args:
    116   projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
    117   body: object, The request body. (required)
    118     The object takes the form of:
    119 
    120 { # Defines a job to be run by the Cloud Dataflow service.
    121     "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
    122         # If this field is set, the service will ensure its uniqueness.
    123         # The request to create a job will fail if the service has knowledge of a
    124         # previously submitted job with the same client's ID and job name.
    125         # The caller may use this field to ensure idempotence of job
    126         # creation across retried attempts to create a job.
    127         # By default, the field is empty and, in that case, the service ignores it.
    128     "requestedState": "A String", # The job's requested state.
    129         # 
    130         # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
    131         # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
    132         # also be used to directly set a job's requested state to
    133         # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
    134         # job if it has not already reached a terminal state.
    135     "name": "A String", # The user-specified Cloud Dataflow job name.
    136         # 
    137         # Only one Job with a given name may exist in a project at any
    138         # given time. If a caller attempts to create a Job with the same
    139         # name as an already-existing Job, the attempt returns the
    140         # existing Job.
    141         # 
    142         # The name must match the regular expression
    143         # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
    144     "location": "A String", # The location that contains this job.
    145     "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
    146         # `JOB_STATE_UPDATED`), this field contains the ID of that job.
    147     "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
    148     "currentState": "A String", # The current state of the job.
    149         # 
    150         # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
    151         # specified.
    152         # 
    153         # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
    154         # terminal state. After a job has reached a terminal state, no
    155         # further state updates may be made.
    156         # 
    157         # This field may be mutated by the Cloud Dataflow service;
    158         # callers cannot mutate it.
    159     "labels": { # User-defined labels for this job.
    160         # 
    161         # The labels map can contain no more than 64 entries.  Entries of the labels
    162         # map are UTF8 strings that comply with the following restrictions:
    163         # 
    164         # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
    165         # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
    166         # * Both keys and values are additionally constrained to be <= 128 bytes in
    167         # size.
    168       "a_key": "A String",
    169     },
    170     "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
    171         # corresponding name prefixes of the new job.
    172       "a_key": "A String",
    173     },
    174     "id": "A String", # The unique ID of this job.
    175         # 
    176         # This field is set by the Cloud Dataflow service when the Job is
    177         # created, and is immutable for the life of the job.
    178     "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
    179       "version": { # A structure describing which components and their versions of the service
    180           # are required in order to run the job.
    181         "a_key": "", # Properties of the object.
    182       },
    183       "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
    184           # storage.  The system will append the suffix "/temp-{JOBNAME} to
    185           # this resource prefix, where {JOBNAME} is the value of the
    186           # job_name field.  The resulting bucket and object prefix is used
    187           # as the prefix of the resources used to store temporary data
    188           # needed during the job execution.  NOTE: This will override the
    189           # value in taskrunner_settings.
    190           # The supported resource type is:
    191           #
    192           # Google Cloud Storage:
    193           #
    194           #   storage.googleapis.com/{bucket}/{object}
    195           #   bucket.storage.googleapis.com/{object}
    196       "internalExperiments": { # Experimental settings.
    197         "a_key": "", # Properties of the object. Contains field @type with type URL.
    198       },
    199       "dataset": "A String", # The dataset for the current project where various workflow
    200           # related tables are stored.
    201           #
    202           # The supported resource type is:
    203           #
    204           # Google BigQuery:
    205           #   bigquery.googleapis.com/{dataset}
    206       "experiments": [ # The list of experiments to enable.
    207         "A String",
    208       ],
    209       "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
    210       "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
    211           # options are passed through the service and are used to recreate the
    212           # SDK pipeline options on the worker in a language agnostic and platform
    213           # independent way.
    214         "a_key": "", # Properties of the object.
    215       },
    216       "userAgent": { # A description of the process that generated the request.
    217         "a_key": "", # Properties of the object.
    218       },
    219       "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
    220           # unspecified, the service will attempt to choose a reasonable
    221           # default.  This should be in the form of the API service name,
    222           # e.g. "compute.googleapis.com".
    223       "workerPools": [ # The worker pools. At least one "harness" worker pool must be
    224           # specified in order for the job to have workers.
    225         { # Describes one particular pool of Cloud Dataflow workers to be
    226             # instantiated by the Cloud Dataflow service in order to perform the
    227             # computations required by a job.  Note that a workflow job may use
    228             # multiple pools, in order to match the various computational
    229             # requirements of the various stages of the job.
    230           "diskSourceImage": "A String", # Fully qualified source image for disks.
    231           "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
    232               # using the standard Dataflow task runner.  Users should ignore
    233               # this field.
    234             "workflowFileName": "A String", # The file to store the workflow in.
    235             "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
    236                 # will not be uploaded.
    237                 #
    238                 # The supported resource type is:
    239                 #
    240                 # Google Cloud Storage:
    241                 #   storage.googleapis.com/{bucket}/{object}
    242                 #   bucket.storage.googleapis.com/{object}
    243             "commandlinesFileName": "A String", # The file to store preprocessing commands in.
    244             "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
    245               "reportingEnabled": True or False, # Whether to send work progress updates to the service.
    246               "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
    247                   # "shuffle/v1beta1".
    248               "workerId": "A String", # The ID of the worker running this pipeline.
    249               "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
    250                   #
    251                   # When workers access Google Cloud APIs, they logically do so via
    252                   # relative URLs.  If this field is specified, it supplies the base
    253                   # URL to use for resolving these relative URLs.  The normative
    254                   # algorithm used is defined by RFC 1808, "Relative Uniform Resource
    255                   # Locators".
    256                   #
    257                   # If not specified, the default value is "http://www.googleapis.com/"
    258               "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
    259                   # "dataflow/v1b3/projects".
    260               "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
    261                   # storage.
    262                   #
    263                   # The supported resource type is:
    264                   #
    265                   # Google Cloud Storage:
    266                   #
    267                   #   storage.googleapis.com/{bucket}/{object}
    268                   #   bucket.storage.googleapis.com/{object}
    269             },
    270             "vmId": "A String", # The ID string of the VM.
    271             "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
    272             "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
    273             "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
    274                 # access the Cloud Dataflow API.
    275               "A String",
    276             ],
    277             "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
    278                 # taskrunner; e.g. "root".
    279             "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
    280                 #
    281                 # When workers access Google Cloud APIs, they logically do so via
    282                 # relative URLs.  If this field is specified, it supplies the base
    283                 # URL to use for resolving these relative URLs.  The normative
    284                 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
    285                 # Locators".
    286                 #
    287                 # If not specified, the default value is "http://www.googleapis.com/"
    288             "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
    289                 # taskrunner; e.g. "wheel".
    290             "languageHint": "A String", # The suggested backend language.
    291             "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
    292                 # console.
    293             "streamingWorkerMainClass": "A String", # The streaming worker main class name.
    294             "logDir": "A String", # The directory on the VM to store logs.
    295             "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
    296             "harnessCommand": "A String", # The command to launch the worker harness.
    297             "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
    298                 # temporary storage.
    299                 #
    300                 # The supported resource type is:
    301                 #
    302                 # Google Cloud Storage:
    303                 #   storage.googleapis.com/{bucket}/{object}
    304                 #   bucket.storage.googleapis.com/{object}
    305             "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
    306           },
    307           "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
    308               # are supported.
    309           "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
    310               # service will attempt to choose a reasonable default.
    311           "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
    312               # the service will use the network "default".
    313           "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
    314               # will attempt to choose a reasonable default.
    315           "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
    316               # attempt to choose a reasonable default.
    317           "dataDisks": [ # Data disks that are used by a VM in this workflow.
    318             { # Describes the data disk used by a workflow job.
    319               "mountPoint": "A String", # Directory in a VM where disk is mounted.
    320               "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
    321                   # attempt to choose a reasonable default.
    322               "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
    323                   # must be a disk type appropriate to the project and zone in which
    324                   # the workers will run.  If unknown or unspecified, the service
    325                   # will attempt to choose a reasonable default.
    326                   #
    327                   # For example, the standard persistent disk type is a resource name
    328                   # typically ending in "pd-standard".  If SSD persistent disks are
    329                   # available, the resource name typically ends with "pd-ssd".  The
    330                   # actual valid values are defined the Google Compute Engine API,
    331                   # not by the Cloud Dataflow API; consult the Google Compute Engine
    332                   # documentation for more information about determining the set of
    333                   # available disk types for a particular project and zone.
    334                   #
    335                   # Google Compute Engine Disk types are local to a particular
    336                   # project in a particular zone, and so the resource name will
    337                   # typically look something like this:
    338                   #
    339                   # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
    340             },
    341           ],
    342           "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
    343               # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
    344               # `TEARDOWN_NEVER`.
    345               # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
    346               # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
    347               # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
    348               # down.
    349               #
    350               # If the workers are not torn down by the service, they will
    351               # continue to run and use Google Compute Engine VM resources in the
    352               # user's project until they are explicitly terminated by the user.
    353               # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
    354               # policy except for small, manually supervised test jobs.
    355               #
    356               # If unknown or unspecified, the service will attempt to choose a reasonable
    357               # default.
    358           "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
    359               # Compute Engine API.
    360           "ipConfiguration": "A String", # Configuration for VM IPs.
    361           "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
    362               # service will choose a number of threads (according to the number of cores
    363               # on the selected machine type for batch, or 1 by convention for streaming).
    364           "poolArgs": { # Extra arguments for this worker pool.
    365             "a_key": "", # Properties of the object. Contains field @type with type URL.
    366           },
    367           "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
    368               # execute the job.  If zero or unspecified, the service will
    369               # attempt to choose a reasonable default.
    370           "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
    371               # harness, residing in Google Container Registry.
    372           "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
    373               # the form "regions/REGION/subnetworks/SUBNETWORK".
    374           "packages": [ # Packages to be installed on workers.
    375             { # The packages that must be installed in order for a worker to run the
    376                 # steps of the Cloud Dataflow job that will be assigned to its worker
    377                 # pool.
    378                 #
    379                 # This is the mechanism by which the Cloud Dataflow SDK causes code to
    380                 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
    381                 # might use this to install jars containing the user's code and all of the
    382                 # various dependencies (libraries, data files, etc.) required in order
    383                 # for that code to run.
    384               "location": "A String", # The resource to read the package from. The supported resource type is:
    385                   #
    386                   # Google Cloud Storage:
    387                   #
    388                   #   storage.googleapis.com/{bucket}
    389                   #   bucket.storage.googleapis.com/
    390               "name": "A String", # The name of the package.
    391             },
    392           ],
    393           "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
    394             "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
    395             "algorithm": "A String", # The algorithm to use for autoscaling.
    396           },
    397           "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
    398               # select a default set of packages which are useful to worker
    399               # harnesses written in a particular language.
    400           "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
    401               # attempt to choose a reasonable default.
    402           "metadata": { # Metadata to set on the Google Compute Engine VMs.
    403             "a_key": "A String",
    404           },
    405         },
    406       ],
    407     },
    408     "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
    409         # A description of the user pipeline and stages through which it is executed.
    410         # Created by Cloud Dataflow service.  Only retrieved with
    411         # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
    412         # form.  This data is provided by the Dataflow service for ease of visualizing
    413         # the pipeline and interpretting Dataflow provided metrics.
    414       "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
    415         { # Description of the type, names/ids, and input/outputs for a transform.
    416           "kind": "A String", # Type of transform.
    417           "name": "A String", # User provided name for this transform instance.
    418           "inputCollectionName": [ # User names for all collection inputs to this transform.
    419             "A String",
    420           ],
    421           "displayData": [ # Transform-specific display data.
    422             { # Data provided with a pipeline or transform to provide descriptive info.
    423               "shortStrValue": "A String", # A possible additional shorter value to display.
    424                   # For example a java_class_name_value of com.mypackage.MyDoFn
    425                   # will be stored with MyDoFn as the short_str_value and
    426                   # com.mypackage.MyDoFn as the java_class_name value.
    427                   # short_str_value can be displayed and java_class_name_value
    428                   # will be displayed as a tooltip.
    429               "durationValue": "A String", # Contains value if the data is of duration type.
    430               "url": "A String", # An optional full URL.
    431               "floatValue": 3.14, # Contains value if the data is of float type.
    432               "namespace": "A String", # The namespace for the key. This is usually a class name or programming
    433                   # language namespace (i.e. python module) which defines the display data.
    434                   # This allows a dax monitoring system to specially handle the data
    435                   # and perform custom rendering.
    436               "javaClassValue": "A String", # Contains value if the data is of java class type.
    437               "label": "A String", # An optional label to display in a dax UI for the element.
    438               "boolValue": True or False, # Contains value if the data is of a boolean type.
    439               "strValue": "A String", # Contains value if the data is of string type.
    440               "key": "A String", # The key identifying the display data.
    441                   # This is intended to be used as a label for the display data
    442                   # when viewed in a dax monitoring system.
    443               "int64Value": "A String", # Contains value if the data is of int64 type.
    444               "timestampValue": "A String", # Contains value if the data is of timestamp type.
    445             },
    446           ],
    447           "outputCollectionName": [ # User  names for all collection outputs to this transform.
    448             "A String",
    449           ],
    450           "id": "A String", # SDK generated id of this transform instance.
    451         },
    452       ],
    453       "displayData": [ # Pipeline level display data.
    454         { # Data provided with a pipeline or transform to provide descriptive info.
    455           "shortStrValue": "A String", # A possible additional shorter value to display.
    456               # For example a java_class_name_value of com.mypackage.MyDoFn
    457               # will be stored with MyDoFn as the short_str_value and
    458               # com.mypackage.MyDoFn as the java_class_name value.
    459               # short_str_value can be displayed and java_class_name_value
    460               # will be displayed as a tooltip.
    461           "durationValue": "A String", # Contains value if the data is of duration type.
    462           "url": "A String", # An optional full URL.
    463           "floatValue": 3.14, # Contains value if the data is of float type.
    464           "namespace": "A String", # The namespace for the key. This is usually a class name or programming
    465               # language namespace (i.e. python module) which defines the display data.
    466               # This allows a dax monitoring system to specially handle the data
    467               # and perform custom rendering.
    468           "javaClassValue": "A String", # Contains value if the data is of java class type.
    469           "label": "A String", # An optional label to display in a dax UI for the element.
    470           "boolValue": True or False, # Contains value if the data is of a boolean type.
    471           "strValue": "A String", # Contains value if the data is of string type.
    472           "key": "A String", # The key identifying the display data.
    473               # This is intended to be used as a label for the display data
    474               # when viewed in a dax monitoring system.
    475           "int64Value": "A String", # Contains value if the data is of int64 type.
    476           "timestampValue": "A String", # Contains value if the data is of timestamp type.
    477         },
    478       ],
    479       "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
    480         { # Description of the composing transforms, names/ids, and input/outputs of a
    481             # stage of execution.  Some composing transforms and sources may have been
    482             # generated by the Dataflow service during execution planning.
    483           "componentSource": [ # Collections produced and consumed by component transforms of this stage.
    484             { # Description of an interstitial value between transforms in an execution
    485                 # stage.
    486               "userName": "A String", # Human-readable name for this transform; may be user or system generated.
    487               "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
    488                   # source is most closely associated.
    489               "name": "A String", # Dataflow service generated name for this source.
    490             },
    491           ],
    492           "kind": "A String", # Type of tranform this stage is executing.
    493           "name": "A String", # Dataflow service generated name for this stage.
    494           "outputSource": [ # Output sources for this stage.
    495             { # Description of an input or output of an execution stage.
    496               "userName": "A String", # Human-readable name for this source; may be user or system generated.
    497               "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
    498                   # source is most closely associated.
    499               "name": "A String", # Dataflow service generated name for this source.
    500               "sizeBytes": "A String", # Size of the source, if measurable.
    501             },
    502           ],
    503           "inputSource": [ # Input sources for this stage.
    504             { # Description of an input or output of an execution stage.
    505               "userName": "A String", # Human-readable name for this source; may be user or system generated.
    506               "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
    507                   # source is most closely associated.
    508               "name": "A String", # Dataflow service generated name for this source.
    509               "sizeBytes": "A String", # Size of the source, if measurable.
    510             },
    511           ],
    512           "componentTransform": [ # Transforms that comprise this execution stage.
    513             { # Description of a transform executed as part of an execution stage.
    514               "userName": "A String", # Human-readable name for this transform; may be user or system generated.
    515               "originalTransform": "A String", # User name for the original user transform with which this transform is
    516                   # most closely associated.
    517               "name": "A String", # Dataflow service generated name for this source.
    518             },
    519           ],
    520           "id": "A String", # Dataflow service generated id for this stage.
    521         },
    522       ],
    523     },
    524     "steps": [ # The top-level steps that constitute the entire job.
    525       { # Defines a particular step within a Cloud Dataflow job.
    526           #
    527           # A job consists of multiple steps, each of which performs some
    528           # specific operation as part of the overall job.  Data is typically
    529           # passed from one step to another as part of the job.
    530           #
    531           # Here's an example of a sequence of steps which together implement a
    532           # Map-Reduce job:
    533           #
    534           #   * Read a collection of data from some source, parsing the
    535           #     collection's elements.
    536           #
    537           #   * Validate the elements.
    538           #
    539           #   * Apply a user-defined function to map each element to some value
    540           #     and extract an element-specific key value.
    541           #
    542           #   * Group elements with the same key into a single element with
    543           #     that key, transforming a multiply-keyed collection into a
    544           #     uniquely-keyed collection.
    545           #
    546           #   * Write the elements out to some data sink.
    547           #
    548           # Note that the Cloud Dataflow service may be used to run many different
    549           # types of jobs, not just Map-Reduce.
    550         "kind": "A String", # The kind of step in the Cloud Dataflow job.
    551         "properties": { # Named properties associated with the step. Each kind of
    552             # predefined step has its own required set of properties.
    553             # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
    554           "a_key": "", # Properties of the object.
    555         },
    556         "name": "A String", # The name that identifies the step. This must be unique for each
    557             # step with respect to all other steps in the Cloud Dataflow job.
    558       },
    559     ],
    560     "currentStateTime": "A String", # The timestamp associated with the current state.
    561     "tempFiles": [ # A set of files the system should be aware of that are used
    562         # for temporary storage. These temporary files will be
    563         # removed on job completion.
    564         # No duplicates are allowed.
    565         # No file patterns are supported.
    566         # 
    567         # The supported files are:
    568         # 
    569         # Google Cloud Storage:
    570         # 
    571         #    storage.googleapis.com/{bucket}/{object}
    572         #    bucket.storage.googleapis.com/{object}
    573       "A String",
    574     ],
    575     "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
    576         # callers cannot mutate it.
    577       { # A message describing the state of a particular execution stage.
    578         "executionStageName": "A String", # The name of the execution stage.
    579         "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
    580         "currentStateTime": "A String", # The time at which the stage transitioned to this state.
    581       },
    582     ],
    583     "type": "A String", # The type of Cloud Dataflow job.
    584     "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
    585         # Cloud Dataflow service.
    586     "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
    587         # of the job it replaced.
    588         # 
    589         # When sending a `CreateJobRequest`, you can update a job by specifying it
    590         # here. The job named here is stopped, and its intermediate state is
    591         # transferred to this job.
    592     "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
    593         # isn't contained in the submitted job.
    594       "stages": { # A mapping from each stage to the information about that stage.
    595         "a_key": { # Contains information about how a particular
    596             # google.dataflow.v1beta3.Step will be executed.
    597           "stepName": [ # The steps associated with the execution stage.
    598               # Note that stages may have several steps, and that a given step
    599               # might be run by more than one stage.
    600             "A String",
    601           ],
    602         },
    603       },
    604     },
    605   }
    606 
    607   location: string, The location that contains this job.
    608   x__xgafv: string, V1 error format.
    609     Allowed values
    610       1 - v1 error format
    611       2 - v2 error format
    612   replaceJobId: string, Deprecated. This field is now in the Job message.
    613   view: string, The level of information requested in response.
    614 
    615 Returns:
    616   An object of the form:
    617 
    618     { # Defines a job to be run by the Cloud Dataflow service.
    619       "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
    620           # If this field is set, the service will ensure its uniqueness.
    621           # The request to create a job will fail if the service has knowledge of a
    622           # previously submitted job with the same client's ID and job name.
    623           # The caller may use this field to ensure idempotence of job
    624           # creation across retried attempts to create a job.
    625           # By default, the field is empty and, in that case, the service ignores it.
    626       "requestedState": "A String", # The job's requested state.
    627           #
    628           # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
    629           # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
    630           # also be used to directly set a job's requested state to
    631           # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
    632           # job if it has not already reached a terminal state.
    633       "name": "A String", # The user-specified Cloud Dataflow job name.
    634           #
    635           # Only one Job with a given name may exist in a project at any
    636           # given time. If a caller attempts to create a Job with the same
    637           # name as an already-existing Job, the attempt returns the
    638           # existing Job.
    639           #
    640           # The name must match the regular expression
    641           # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
    642       "location": "A String", # The location that contains this job.
    643       "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
    644           # `JOB_STATE_UPDATED`), this field contains the ID of that job.
    645       "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
    646       "currentState": "A String", # The current state of the job.
    647           #
    648           # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
    649           # specified.
    650           #
    651           # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
    652           # terminal state. After a job has reached a terminal state, no
    653           # further state updates may be made.
    654           #
    655           # This field may be mutated by the Cloud Dataflow service;
    656           # callers cannot mutate it.
    657       "labels": { # User-defined labels for this job.
    658           #
    659           # The labels map can contain no more than 64 entries.  Entries of the labels
    660           # map are UTF8 strings that comply with the following restrictions:
    661           #
    662           # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
    663           # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
    664           # * Both keys and values are additionally constrained to be <= 128 bytes in
    665           # size.
    666         "a_key": "A String",
    667       },
    668       "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
    669           # corresponding name prefixes of the new job.
    670         "a_key": "A String",
    671       },
    672       "id": "A String", # The unique ID of this job.
    673           #
    674           # This field is set by the Cloud Dataflow service when the Job is
    675           # created, and is immutable for the life of the job.
    676       "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
    677         "version": { # A structure describing which components and their versions of the service
    678             # are required in order to run the job.
    679           "a_key": "", # Properties of the object.
    680         },
    681         "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
    682             # storage.  The system will append the suffix "/temp-{JOBNAME} to
    683             # this resource prefix, where {JOBNAME} is the value of the
    684             # job_name field.  The resulting bucket and object prefix is used
    685             # as the prefix of the resources used to store temporary data
    686             # needed during the job execution.  NOTE: This will override the
    687             # value in taskrunner_settings.
    688             # The supported resource type is:
    689             #
    690             # Google Cloud Storage:
    691             #
    692             #   storage.googleapis.com/{bucket}/{object}
    693             #   bucket.storage.googleapis.com/{object}
    694         "internalExperiments": { # Experimental settings.
    695           "a_key": "", # Properties of the object. Contains field @type with type URL.
    696         },
    697         "dataset": "A String", # The dataset for the current project where various workflow
    698             # related tables are stored.
    699             #
    700             # The supported resource type is:
    701             #
    702             # Google BigQuery:
    703             #   bigquery.googleapis.com/{dataset}
    704         "experiments": [ # The list of experiments to enable.
    705           "A String",
    706         ],
    707         "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
    708         "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
    709             # options are passed through the service and are used to recreate the
    710             # SDK pipeline options on the worker in a language agnostic and platform
    711             # independent way.
    712           "a_key": "", # Properties of the object.
    713         },
    714         "userAgent": { # A description of the process that generated the request.
    715           "a_key": "", # Properties of the object.
    716         },
    717         "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
    718             # unspecified, the service will attempt to choose a reasonable
    719             # default.  This should be in the form of the API service name,
    720             # e.g. "compute.googleapis.com".
    721         "workerPools": [ # The worker pools. At least one "harness" worker pool must be
    722             # specified in order for the job to have workers.
    723           { # Describes one particular pool of Cloud Dataflow workers to be
    724               # instantiated by the Cloud Dataflow service in order to perform the
    725               # computations required by a job.  Note that a workflow job may use
    726               # multiple pools, in order to match the various computational
    727               # requirements of the various stages of the job.
    728             "diskSourceImage": "A String", # Fully qualified source image for disks.
    729             "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
    730                 # using the standard Dataflow task runner.  Users should ignore
    731                 # this field.
    732               "workflowFileName": "A String", # The file to store the workflow in.
    733               "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
    734                   # will not be uploaded.
    735                   #
    736                   # The supported resource type is:
    737                   #
    738                   # Google Cloud Storage:
    739                   #   storage.googleapis.com/{bucket}/{object}
    740                   #   bucket.storage.googleapis.com/{object}
    741               "commandlinesFileName": "A String", # The file to store preprocessing commands in.
    742               "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
    743                 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
    744                 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
    745                     # "shuffle/v1beta1".
    746                 "workerId": "A String", # The ID of the worker running this pipeline.
    747                 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
    748                     #
    749                     # When workers access Google Cloud APIs, they logically do so via
    750                     # relative URLs.  If this field is specified, it supplies the base
    751                     # URL to use for resolving these relative URLs.  The normative
    752                     # algorithm used is defined by RFC 1808, "Relative Uniform Resource
    753                     # Locators".
    754                     #
    755                     # If not specified, the default value is "http://www.googleapis.com/"
    756                 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
    757                     # "dataflow/v1b3/projects".
    758                 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
    759                     # storage.
    760                     #
    761                     # The supported resource type is:
    762                     #
    763                     # Google Cloud Storage:
    764                     #
    765                     #   storage.googleapis.com/{bucket}/{object}
    766                     #   bucket.storage.googleapis.com/{object}
    767               },
    768               "vmId": "A String", # The ID string of the VM.
    769               "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
    770               "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
    771               "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
    772                   # access the Cloud Dataflow API.
    773                 "A String",
    774               ],
    775               "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
    776                   # taskrunner; e.g. "root".
    777               "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
    778                   #
    779                   # When workers access Google Cloud APIs, they logically do so via
    780                   # relative URLs.  If this field is specified, it supplies the base
    781                   # URL to use for resolving these relative URLs.  The normative
    782                   # algorithm used is defined by RFC 1808, "Relative Uniform Resource
    783                   # Locators".
    784                   #
    785                   # If not specified, the default value is "http://www.googleapis.com/"
    786               "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
    787                   # taskrunner; e.g. "wheel".
    788               "languageHint": "A String", # The suggested backend language.
    789               "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
    790                   # console.
    791               "streamingWorkerMainClass": "A String", # The streaming worker main class name.
    792               "logDir": "A String", # The directory on the VM to store logs.
    793               "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
    794               "harnessCommand": "A String", # The command to launch the worker harness.
    795               "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
    796                   # temporary storage.
    797                   #
    798                   # The supported resource type is:
    799                   #
    800                   # Google Cloud Storage:
    801                   #   storage.googleapis.com/{bucket}/{object}
    802                   #   bucket.storage.googleapis.com/{object}
    803               "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
    804             },
    805             "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
    806                 # are supported.
    807             "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
    808                 # service will attempt to choose a reasonable default.
    809             "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
    810                 # the service will use the network "default".
    811             "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
    812                 # will attempt to choose a reasonable default.
    813             "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
    814                 # attempt to choose a reasonable default.
    815             "dataDisks": [ # Data disks that are used by a VM in this workflow.
    816               { # Describes the data disk used by a workflow job.
    817                 "mountPoint": "A String", # Directory in a VM where disk is mounted.
    818                 "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
    819                     # attempt to choose a reasonable default.
    820                 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
    821                     # must be a disk type appropriate to the project and zone in which
    822                     # the workers will run.  If unknown or unspecified, the service
    823                     # will attempt to choose a reasonable default.
    824                     #
    825                     # For example, the standard persistent disk type is a resource name
    826                     # typically ending in "pd-standard".  If SSD persistent disks are
    827                     # available, the resource name typically ends with "pd-ssd".  The
    828                     # actual valid values are defined the Google Compute Engine API,
    829                     # not by the Cloud Dataflow API; consult the Google Compute Engine
    830                     # documentation for more information about determining the set of
    831                     # available disk types for a particular project and zone.
    832                     #
    833                     # Google Compute Engine Disk types are local to a particular
    834                     # project in a particular zone, and so the resource name will
    835                     # typically look something like this:
    836                     #
    837                     # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
    838               },
    839             ],
    840             "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
    841                 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
    842                 # `TEARDOWN_NEVER`.
    843                 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
    844                 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
    845                 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
    846                 # down.
    847                 #
    848                 # If the workers are not torn down by the service, they will
    849                 # continue to run and use Google Compute Engine VM resources in the
    850                 # user's project until they are explicitly terminated by the user.
    851                 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
    852                 # policy except for small, manually supervised test jobs.
    853                 #
    854                 # If unknown or unspecified, the service will attempt to choose a reasonable
    855                 # default.
    856             "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
    857                 # Compute Engine API.
    858             "ipConfiguration": "A String", # Configuration for VM IPs.
    859             "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
    860                 # service will choose a number of threads (according to the number of cores
    861                 # on the selected machine type for batch, or 1 by convention for streaming).
    862             "poolArgs": { # Extra arguments for this worker pool.
    863               "a_key": "", # Properties of the object. Contains field @type with type URL.
    864             },
    865             "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
    866                 # execute the job.  If zero or unspecified, the service will
    867                 # attempt to choose a reasonable default.
    868             "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
    869                 # harness, residing in Google Container Registry.
    870             "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
    871                 # the form "regions/REGION/subnetworks/SUBNETWORK".
    872             "packages": [ # Packages to be installed on workers.
    873               { # The packages that must be installed in order for a worker to run the
    874                   # steps of the Cloud Dataflow job that will be assigned to its worker
    875                   # pool.
    876                   #
    877                   # This is the mechanism by which the Cloud Dataflow SDK causes code to
    878                   # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
    879                   # might use this to install jars containing the user's code and all of the
    880                   # various dependencies (libraries, data files, etc.) required in order
    881                   # for that code to run.
    882                 "location": "A String", # The resource to read the package from. The supported resource type is:
    883                     #
    884                     # Google Cloud Storage:
    885                     #
    886                     #   storage.googleapis.com/{bucket}
    887                     #   bucket.storage.googleapis.com/
    888                 "name": "A String", # The name of the package.
    889               },
    890             ],
    891             "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
    892               "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
    893               "algorithm": "A String", # The algorithm to use for autoscaling.
    894             },
    895             "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
    896                 # select a default set of packages which are useful to worker
    897                 # harnesses written in a particular language.
    898             "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
    899                 # attempt to choose a reasonable default.
    900             "metadata": { # Metadata to set on the Google Compute Engine VMs.
    901               "a_key": "A String",
    902             },
    903           },
    904         ],
    905       },
    906       "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
    907           # A description of the user pipeline and stages through which it is executed.
    908           # Created by Cloud Dataflow service.  Only retrieved with
    909           # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
    910           # form.  This data is provided by the Dataflow service for ease of visualizing
    911           # the pipeline and interpretting Dataflow provided metrics.
    912         "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
    913           { # Description of the type, names/ids, and input/outputs for a transform.
    914             "kind": "A String", # Type of transform.
    915             "name": "A String", # User provided name for this transform instance.
    916             "inputCollectionName": [ # User names for all collection inputs to this transform.
    917               "A String",
    918             ],
    919             "displayData": [ # Transform-specific display data.
    920               { # Data provided with a pipeline or transform to provide descriptive info.
    921                 "shortStrValue": "A String", # A possible additional shorter value to display.
    922                     # For example a java_class_name_value of com.mypackage.MyDoFn
    923                     # will be stored with MyDoFn as the short_str_value and
    924                     # com.mypackage.MyDoFn as the java_class_name value.
    925                     # short_str_value can be displayed and java_class_name_value
    926                     # will be displayed as a tooltip.
    927                 "durationValue": "A String", # Contains value if the data is of duration type.
    928                 "url": "A String", # An optional full URL.
    929                 "floatValue": 3.14, # Contains value if the data is of float type.
    930                 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
    931                     # language namespace (i.e. python module) which defines the display data.
    932                     # This allows a dax monitoring system to specially handle the data
    933                     # and perform custom rendering.
    934                 "javaClassValue": "A String", # Contains value if the data is of java class type.
    935                 "label": "A String", # An optional label to display in a dax UI for the element.
    936                 "boolValue": True or False, # Contains value if the data is of a boolean type.
    937                 "strValue": "A String", # Contains value if the data is of string type.
    938                 "key": "A String", # The key identifying the display data.
    939                     # This is intended to be used as a label for the display data
    940                     # when viewed in a dax monitoring system.
    941                 "int64Value": "A String", # Contains value if the data is of int64 type.
    942                 "timestampValue": "A String", # Contains value if the data is of timestamp type.
    943               },
    944             ],
    945             "outputCollectionName": [ # User  names for all collection outputs to this transform.
    946               "A String",
    947             ],
    948             "id": "A String", # SDK generated id of this transform instance.
    949           },
    950         ],
    951         "displayData": [ # Pipeline level display data.
    952           { # Data provided with a pipeline or transform to provide descriptive info.
    953             "shortStrValue": "A String", # A possible additional shorter value to display.
    954                 # For example a java_class_name_value of com.mypackage.MyDoFn
    955                 # will be stored with MyDoFn as the short_str_value and
    956                 # com.mypackage.MyDoFn as the java_class_name value.
    957                 # short_str_value can be displayed and java_class_name_value
    958                 # will be displayed as a tooltip.
    959             "durationValue": "A String", # Contains value if the data is of duration type.
    960             "url": "A String", # An optional full URL.
    961             "floatValue": 3.14, # Contains value if the data is of float type.
    962             "namespace": "A String", # The namespace for the key. This is usually a class name or programming
    963                 # language namespace (i.e. python module) which defines the display data.
    964                 # This allows a dax monitoring system to specially handle the data
    965                 # and perform custom rendering.
    966             "javaClassValue": "A String", # Contains value if the data is of java class type.
    967             "label": "A String", # An optional label to display in a dax UI for the element.
    968             "boolValue": True or False, # Contains value if the data is of a boolean type.
    969             "strValue": "A String", # Contains value if the data is of string type.
    970             "key": "A String", # The key identifying the display data.
    971                 # This is intended to be used as a label for the display data
    972                 # when viewed in a dax monitoring system.
    973             "int64Value": "A String", # Contains value if the data is of int64 type.
    974             "timestampValue": "A String", # Contains value if the data is of timestamp type.
    975           },
    976         ],
    977         "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
    978           { # Description of the composing transforms, names/ids, and input/outputs of a
    979               # stage of execution.  Some composing transforms and sources may have been
    980               # generated by the Dataflow service during execution planning.
    981             "componentSource": [ # Collections produced and consumed by component transforms of this stage.
    982               { # Description of an interstitial value between transforms in an execution
    983                   # stage.
    984                 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
    985                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
    986                     # source is most closely associated.
    987                 "name": "A String", # Dataflow service generated name for this source.
    988               },
    989             ],
    990             "kind": "A String", # Type of tranform this stage is executing.
    991             "name": "A String", # Dataflow service generated name for this stage.
    992             "outputSource": [ # Output sources for this stage.
    993               { # Description of an input or output of an execution stage.
    994                 "userName": "A String", # Human-readable name for this source; may be user or system generated.
    995                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
    996                     # source is most closely associated.
    997                 "name": "A String", # Dataflow service generated name for this source.
    998                 "sizeBytes": "A String", # Size of the source, if measurable.
    999               },
   1000             ],
   1001             "inputSource": [ # Input sources for this stage.
   1002               { # Description of an input or output of an execution stage.
   1003                 "userName": "A String", # Human-readable name for this source; may be user or system generated.
   1004                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   1005                     # source is most closely associated.
   1006                 "name": "A String", # Dataflow service generated name for this source.
   1007                 "sizeBytes": "A String", # Size of the source, if measurable.
   1008               },
   1009             ],
   1010             "componentTransform": [ # Transforms that comprise this execution stage.
   1011               { # Description of a transform executed as part of an execution stage.
   1012                 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   1013                 "originalTransform": "A String", # User name for the original user transform with which this transform is
   1014                     # most closely associated.
   1015                 "name": "A String", # Dataflow service generated name for this source.
   1016               },
   1017             ],
   1018             "id": "A String", # Dataflow service generated id for this stage.
   1019           },
   1020         ],
   1021       },
   1022       "steps": [ # The top-level steps that constitute the entire job.
   1023         { # Defines a particular step within a Cloud Dataflow job.
   1024             #
   1025             # A job consists of multiple steps, each of which performs some
   1026             # specific operation as part of the overall job.  Data is typically
   1027             # passed from one step to another as part of the job.
   1028             #
   1029             # Here's an example of a sequence of steps which together implement a
   1030             # Map-Reduce job:
   1031             #
   1032             #   * Read a collection of data from some source, parsing the
   1033             #     collection's elements.
   1034             #
   1035             #   * Validate the elements.
   1036             #
   1037             #   * Apply a user-defined function to map each element to some value
   1038             #     and extract an element-specific key value.
   1039             #
   1040             #   * Group elements with the same key into a single element with
   1041             #     that key, transforming a multiply-keyed collection into a
   1042             #     uniquely-keyed collection.
   1043             #
   1044             #   * Write the elements out to some data sink.
   1045             #
   1046             # Note that the Cloud Dataflow service may be used to run many different
   1047             # types of jobs, not just Map-Reduce.
   1048           "kind": "A String", # The kind of step in the Cloud Dataflow job.
   1049           "properties": { # Named properties associated with the step. Each kind of
   1050               # predefined step has its own required set of properties.
   1051               # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
   1052             "a_key": "", # Properties of the object.
   1053           },
   1054           "name": "A String", # The name that identifies the step. This must be unique for each
   1055               # step with respect to all other steps in the Cloud Dataflow job.
   1056         },
   1057       ],
   1058       "currentStateTime": "A String", # The timestamp associated with the current state.
   1059       "tempFiles": [ # A set of files the system should be aware of that are used
   1060           # for temporary storage. These temporary files will be
   1061           # removed on job completion.
   1062           # No duplicates are allowed.
   1063           # No file patterns are supported.
   1064           #
   1065           # The supported files are:
   1066           #
   1067           # Google Cloud Storage:
   1068           #
   1069           #    storage.googleapis.com/{bucket}/{object}
   1070           #    bucket.storage.googleapis.com/{object}
   1071         "A String",
   1072       ],
   1073       "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
   1074           # callers cannot mutate it.
   1075         { # A message describing the state of a particular execution stage.
   1076           "executionStageName": "A String", # The name of the execution stage.
   1077           "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
   1078           "currentStateTime": "A String", # The time at which the stage transitioned to this state.
   1079         },
   1080       ],
   1081       "type": "A String", # The type of Cloud Dataflow job.
   1082       "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
   1083           # Cloud Dataflow service.
   1084       "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
   1085           # of the job it replaced.
   1086           #
   1087           # When sending a `CreateJobRequest`, you can update a job by specifying it
   1088           # here. The job named here is stopped, and its intermediate state is
   1089           # transferred to this job.
   1090       "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
   1091           # isn't contained in the submitted job.
   1092         "stages": { # A mapping from each stage to the information about that stage.
   1093           "a_key": { # Contains information about how a particular
   1094               # google.dataflow.v1beta3.Step will be executed.
   1095             "stepName": [ # The steps associated with the execution stage.
   1096                 # Note that stages may have several steps, and that a given step
   1097                 # might be run by more than one stage.
   1098               "A String",
   1099             ],
   1100           },
   1101         },
   1102       },
   1103     }
1104
1105 1106
1107 get(projectId, jobId, location=None, x__xgafv=None, view=None) 1108
Gets the state of the specified Cloud Dataflow job.
   1109 
   1110 Args:
   1111   projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
   1112   jobId: string, The job ID. (required)
   1113   location: string, The location that contains this job.
   1114   x__xgafv: string, V1 error format.
   1115     Allowed values
   1116       1 - v1 error format
   1117       2 - v2 error format
   1118   view: string, The level of information requested in response.
   1119 
   1120 Returns:
   1121   An object of the form:
   1122 
   1123     { # Defines a job to be run by the Cloud Dataflow service.
   1124       "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
   1125           # If this field is set, the service will ensure its uniqueness.
   1126           # The request to create a job will fail if the service has knowledge of a
   1127           # previously submitted job with the same client's ID and job name.
   1128           # The caller may use this field to ensure idempotence of job
   1129           # creation across retried attempts to create a job.
   1130           # By default, the field is empty and, in that case, the service ignores it.
   1131       "requestedState": "A String", # The job's requested state.
   1132           #
   1133           # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
   1134           # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
   1135           # also be used to directly set a job's requested state to
   1136           # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
   1137           # job if it has not already reached a terminal state.
   1138       "name": "A String", # The user-specified Cloud Dataflow job name.
   1139           #
   1140           # Only one Job with a given name may exist in a project at any
   1141           # given time. If a caller attempts to create a Job with the same
   1142           # name as an already-existing Job, the attempt returns the
   1143           # existing Job.
   1144           #
   1145           # The name must match the regular expression
   1146           # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
   1147       "location": "A String", # The location that contains this job.
   1148       "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
   1149           # `JOB_STATE_UPDATED`), this field contains the ID of that job.
   1150       "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
   1151       "currentState": "A String", # The current state of the job.
   1152           #
   1153           # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
   1154           # specified.
   1155           #
   1156           # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
   1157           # terminal state. After a job has reached a terminal state, no
   1158           # further state updates may be made.
   1159           #
   1160           # This field may be mutated by the Cloud Dataflow service;
   1161           # callers cannot mutate it.
   1162       "labels": { # User-defined labels for this job.
   1163           #
   1164           # The labels map can contain no more than 64 entries.  Entries of the labels
   1165           # map are UTF8 strings that comply with the following restrictions:
   1166           #
   1167           # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
   1168           # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
   1169           # * Both keys and values are additionally constrained to be <= 128 bytes in
   1170           # size.
   1171         "a_key": "A String",
   1172       },
   1173       "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
   1174           # corresponding name prefixes of the new job.
   1175         "a_key": "A String",
   1176       },
   1177       "id": "A String", # The unique ID of this job.
   1178           #
   1179           # This field is set by the Cloud Dataflow service when the Job is
   1180           # created, and is immutable for the life of the job.
   1181       "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
   1182         "version": { # A structure describing which components and their versions of the service
   1183             # are required in order to run the job.
   1184           "a_key": "", # Properties of the object.
   1185         },
   1186         "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
   1187             # storage.  The system will append the suffix "/temp-{JOBNAME} to
   1188             # this resource prefix, where {JOBNAME} is the value of the
   1189             # job_name field.  The resulting bucket and object prefix is used
   1190             # as the prefix of the resources used to store temporary data
   1191             # needed during the job execution.  NOTE: This will override the
   1192             # value in taskrunner_settings.
   1193             # The supported resource type is:
   1194             #
   1195             # Google Cloud Storage:
   1196             #
   1197             #   storage.googleapis.com/{bucket}/{object}
   1198             #   bucket.storage.googleapis.com/{object}
   1199         "internalExperiments": { # Experimental settings.
   1200           "a_key": "", # Properties of the object. Contains field @type with type URL.
   1201         },
   1202         "dataset": "A String", # The dataset for the current project where various workflow
   1203             # related tables are stored.
   1204             #
   1205             # The supported resource type is:
   1206             #
   1207             # Google BigQuery:
   1208             #   bigquery.googleapis.com/{dataset}
   1209         "experiments": [ # The list of experiments to enable.
   1210           "A String",
   1211         ],
   1212         "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
   1213         "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
   1214             # options are passed through the service and are used to recreate the
   1215             # SDK pipeline options on the worker in a language agnostic and platform
   1216             # independent way.
   1217           "a_key": "", # Properties of the object.
   1218         },
   1219         "userAgent": { # A description of the process that generated the request.
   1220           "a_key": "", # Properties of the object.
   1221         },
   1222         "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
   1223             # unspecified, the service will attempt to choose a reasonable
   1224             # default.  This should be in the form of the API service name,
   1225             # e.g. "compute.googleapis.com".
   1226         "workerPools": [ # The worker pools. At least one "harness" worker pool must be
   1227             # specified in order for the job to have workers.
   1228           { # Describes one particular pool of Cloud Dataflow workers to be
   1229               # instantiated by the Cloud Dataflow service in order to perform the
   1230               # computations required by a job.  Note that a workflow job may use
   1231               # multiple pools, in order to match the various computational
   1232               # requirements of the various stages of the job.
   1233             "diskSourceImage": "A String", # Fully qualified source image for disks.
   1234             "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
   1235                 # using the standard Dataflow task runner.  Users should ignore
   1236                 # this field.
   1237               "workflowFileName": "A String", # The file to store the workflow in.
   1238               "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
   1239                   # will not be uploaded.
   1240                   #
   1241                   # The supported resource type is:
   1242                   #
   1243                   # Google Cloud Storage:
   1244                   #   storage.googleapis.com/{bucket}/{object}
   1245                   #   bucket.storage.googleapis.com/{object}
   1246               "commandlinesFileName": "A String", # The file to store preprocessing commands in.
   1247               "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
   1248                 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
   1249                 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
   1250                     # "shuffle/v1beta1".
   1251                 "workerId": "A String", # The ID of the worker running this pipeline.
   1252                 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
   1253                     #
   1254                     # When workers access Google Cloud APIs, they logically do so via
   1255                     # relative URLs.  If this field is specified, it supplies the base
   1256                     # URL to use for resolving these relative URLs.  The normative
   1257                     # algorithm used is defined by RFC 1808, "Relative Uniform Resource
   1258                     # Locators".
   1259                     #
   1260                     # If not specified, the default value is "http://www.googleapis.com/"
   1261                 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
   1262                     # "dataflow/v1b3/projects".
   1263                 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
   1264                     # storage.
   1265                     #
   1266                     # The supported resource type is:
   1267                     #
   1268                     # Google Cloud Storage:
   1269                     #
   1270                     #   storage.googleapis.com/{bucket}/{object}
   1271                     #   bucket.storage.googleapis.com/{object}
   1272               },
   1273               "vmId": "A String", # The ID string of the VM.
   1274               "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
   1275               "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
   1276               "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
   1277                   # access the Cloud Dataflow API.
   1278                 "A String",
   1279               ],
   1280               "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
   1281                   # taskrunner; e.g. "root".
   1282               "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
   1283                   #
   1284                   # When workers access Google Cloud APIs, they logically do so via
   1285                   # relative URLs.  If this field is specified, it supplies the base
   1286                   # URL to use for resolving these relative URLs.  The normative
   1287                   # algorithm used is defined by RFC 1808, "Relative Uniform Resource
   1288                   # Locators".
   1289                   #
   1290                   # If not specified, the default value is "http://www.googleapis.com/"
   1291               "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
   1292                   # taskrunner; e.g. "wheel".
   1293               "languageHint": "A String", # The suggested backend language.
   1294               "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
   1295                   # console.
   1296               "streamingWorkerMainClass": "A String", # The streaming worker main class name.
   1297               "logDir": "A String", # The directory on the VM to store logs.
   1298               "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
   1299               "harnessCommand": "A String", # The command to launch the worker harness.
   1300               "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
   1301                   # temporary storage.
   1302                   #
   1303                   # The supported resource type is:
   1304                   #
   1305                   # Google Cloud Storage:
   1306                   #   storage.googleapis.com/{bucket}/{object}
   1307                   #   bucket.storage.googleapis.com/{object}
   1308               "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
   1309             },
   1310             "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
   1311                 # are supported.
   1312             "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
   1313                 # service will attempt to choose a reasonable default.
   1314             "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
   1315                 # the service will use the network "default".
   1316             "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
   1317                 # will attempt to choose a reasonable default.
   1318             "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
   1319                 # attempt to choose a reasonable default.
   1320             "dataDisks": [ # Data disks that are used by a VM in this workflow.
   1321               { # Describes the data disk used by a workflow job.
   1322                 "mountPoint": "A String", # Directory in a VM where disk is mounted.
   1323                 "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
   1324                     # attempt to choose a reasonable default.
   1325                 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
   1326                     # must be a disk type appropriate to the project and zone in which
   1327                     # the workers will run.  If unknown or unspecified, the service
   1328                     # will attempt to choose a reasonable default.
   1329                     #
   1330                     # For example, the standard persistent disk type is a resource name
   1331                     # typically ending in "pd-standard".  If SSD persistent disks are
   1332                     # available, the resource name typically ends with "pd-ssd".  The
   1333                     # actual valid values are defined the Google Compute Engine API,
   1334                     # not by the Cloud Dataflow API; consult the Google Compute Engine
   1335                     # documentation for more information about determining the set of
   1336                     # available disk types for a particular project and zone.
   1337                     #
   1338                     # Google Compute Engine Disk types are local to a particular
   1339                     # project in a particular zone, and so the resource name will
   1340                     # typically look something like this:
   1341                     #
   1342                     # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
   1343               },
   1344             ],
   1345             "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
   1346                 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
   1347                 # `TEARDOWN_NEVER`.
   1348                 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
   1349                 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
   1350                 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
   1351                 # down.
   1352                 #
   1353                 # If the workers are not torn down by the service, they will
   1354                 # continue to run and use Google Compute Engine VM resources in the
   1355                 # user's project until they are explicitly terminated by the user.
   1356                 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
   1357                 # policy except for small, manually supervised test jobs.
   1358                 #
   1359                 # If unknown or unspecified, the service will attempt to choose a reasonable
   1360                 # default.
   1361             "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
   1362                 # Compute Engine API.
   1363             "ipConfiguration": "A String", # Configuration for VM IPs.
   1364             "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
   1365                 # service will choose a number of threads (according to the number of cores
   1366                 # on the selected machine type for batch, or 1 by convention for streaming).
   1367             "poolArgs": { # Extra arguments for this worker pool.
   1368               "a_key": "", # Properties of the object. Contains field @type with type URL.
   1369             },
   1370             "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
   1371                 # execute the job.  If zero or unspecified, the service will
   1372                 # attempt to choose a reasonable default.
   1373             "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
   1374                 # harness, residing in Google Container Registry.
   1375             "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
   1376                 # the form "regions/REGION/subnetworks/SUBNETWORK".
   1377             "packages": [ # Packages to be installed on workers.
   1378               { # The packages that must be installed in order for a worker to run the
   1379                   # steps of the Cloud Dataflow job that will be assigned to its worker
   1380                   # pool.
   1381                   #
   1382                   # This is the mechanism by which the Cloud Dataflow SDK causes code to
   1383                   # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
   1384                   # might use this to install jars containing the user's code and all of the
   1385                   # various dependencies (libraries, data files, etc.) required in order
   1386                   # for that code to run.
   1387                 "location": "A String", # The resource to read the package from. The supported resource type is:
   1388                     #
   1389                     # Google Cloud Storage:
   1390                     #
   1391                     #   storage.googleapis.com/{bucket}
   1392                     #   bucket.storage.googleapis.com/
   1393                 "name": "A String", # The name of the package.
   1394               },
   1395             ],
   1396             "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
   1397               "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
   1398               "algorithm": "A String", # The algorithm to use for autoscaling.
   1399             },
   1400             "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
   1401                 # select a default set of packages which are useful to worker
   1402                 # harnesses written in a particular language.
   1403             "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
   1404                 # attempt to choose a reasonable default.
   1405             "metadata": { # Metadata to set on the Google Compute Engine VMs.
   1406               "a_key": "A String",
   1407             },
   1408           },
   1409         ],
   1410       },
   1411       "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
   1412           # A description of the user pipeline and stages through which it is executed.
   1413           # Created by Cloud Dataflow service.  Only retrieved with
   1414           # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
   1415           # form.  This data is provided by the Dataflow service for ease of visualizing
   1416           # the pipeline and interpretting Dataflow provided metrics.
   1417         "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
   1418           { # Description of the type, names/ids, and input/outputs for a transform.
   1419             "kind": "A String", # Type of transform.
   1420             "name": "A String", # User provided name for this transform instance.
   1421             "inputCollectionName": [ # User names for all collection inputs to this transform.
   1422               "A String",
   1423             ],
   1424             "displayData": [ # Transform-specific display data.
   1425               { # Data provided with a pipeline or transform to provide descriptive info.
   1426                 "shortStrValue": "A String", # A possible additional shorter value to display.
   1427                     # For example a java_class_name_value of com.mypackage.MyDoFn
   1428                     # will be stored with MyDoFn as the short_str_value and
   1429                     # com.mypackage.MyDoFn as the java_class_name value.
   1430                     # short_str_value can be displayed and java_class_name_value
   1431                     # will be displayed as a tooltip.
   1432                 "durationValue": "A String", # Contains value if the data is of duration type.
   1433                 "url": "A String", # An optional full URL.
   1434                 "floatValue": 3.14, # Contains value if the data is of float type.
   1435                 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   1436                     # language namespace (i.e. python module) which defines the display data.
   1437                     # This allows a dax monitoring system to specially handle the data
   1438                     # and perform custom rendering.
   1439                 "javaClassValue": "A String", # Contains value if the data is of java class type.
   1440                 "label": "A String", # An optional label to display in a dax UI for the element.
   1441                 "boolValue": True or False, # Contains value if the data is of a boolean type.
   1442                 "strValue": "A String", # Contains value if the data is of string type.
   1443                 "key": "A String", # The key identifying the display data.
   1444                     # This is intended to be used as a label for the display data
   1445                     # when viewed in a dax monitoring system.
   1446                 "int64Value": "A String", # Contains value if the data is of int64 type.
   1447                 "timestampValue": "A String", # Contains value if the data is of timestamp type.
   1448               },
   1449             ],
   1450             "outputCollectionName": [ # User  names for all collection outputs to this transform.
   1451               "A String",
   1452             ],
   1453             "id": "A String", # SDK generated id of this transform instance.
   1454           },
   1455         ],
   1456         "displayData": [ # Pipeline level display data.
   1457           { # Data provided with a pipeline or transform to provide descriptive info.
   1458             "shortStrValue": "A String", # A possible additional shorter value to display.
   1459                 # For example a java_class_name_value of com.mypackage.MyDoFn
   1460                 # will be stored with MyDoFn as the short_str_value and
   1461                 # com.mypackage.MyDoFn as the java_class_name value.
   1462                 # short_str_value can be displayed and java_class_name_value
   1463                 # will be displayed as a tooltip.
   1464             "durationValue": "A String", # Contains value if the data is of duration type.
   1465             "url": "A String", # An optional full URL.
   1466             "floatValue": 3.14, # Contains value if the data is of float type.
   1467             "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   1468                 # language namespace (i.e. python module) which defines the display data.
   1469                 # This allows a dax monitoring system to specially handle the data
   1470                 # and perform custom rendering.
   1471             "javaClassValue": "A String", # Contains value if the data is of java class type.
   1472             "label": "A String", # An optional label to display in a dax UI for the element.
   1473             "boolValue": True or False, # Contains value if the data is of a boolean type.
   1474             "strValue": "A String", # Contains value if the data is of string type.
   1475             "key": "A String", # The key identifying the display data.
   1476                 # This is intended to be used as a label for the display data
   1477                 # when viewed in a dax monitoring system.
   1478             "int64Value": "A String", # Contains value if the data is of int64 type.
   1479             "timestampValue": "A String", # Contains value if the data is of timestamp type.
   1480           },
   1481         ],
   1482         "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
   1483           { # Description of the composing transforms, names/ids, and input/outputs of a
   1484               # stage of execution.  Some composing transforms and sources may have been
   1485               # generated by the Dataflow service during execution planning.
   1486             "componentSource": [ # Collections produced and consumed by component transforms of this stage.
   1487               { # Description of an interstitial value between transforms in an execution
   1488                   # stage.
   1489                 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   1490                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   1491                     # source is most closely associated.
   1492                 "name": "A String", # Dataflow service generated name for this source.
   1493               },
   1494             ],
   1495             "kind": "A String", # Type of tranform this stage is executing.
   1496             "name": "A String", # Dataflow service generated name for this stage.
   1497             "outputSource": [ # Output sources for this stage.
   1498               { # Description of an input or output of an execution stage.
   1499                 "userName": "A String", # Human-readable name for this source; may be user or system generated.
   1500                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   1501                     # source is most closely associated.
   1502                 "name": "A String", # Dataflow service generated name for this source.
   1503                 "sizeBytes": "A String", # Size of the source, if measurable.
   1504               },
   1505             ],
   1506             "inputSource": [ # Input sources for this stage.
   1507               { # Description of an input or output of an execution stage.
   1508                 "userName": "A String", # Human-readable name for this source; may be user or system generated.
   1509                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   1510                     # source is most closely associated.
   1511                 "name": "A String", # Dataflow service generated name for this source.
   1512                 "sizeBytes": "A String", # Size of the source, if measurable.
   1513               },
   1514             ],
   1515             "componentTransform": [ # Transforms that comprise this execution stage.
   1516               { # Description of a transform executed as part of an execution stage.
   1517                 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   1518                 "originalTransform": "A String", # User name for the original user transform with which this transform is
   1519                     # most closely associated.
   1520                 "name": "A String", # Dataflow service generated name for this source.
   1521               },
   1522             ],
   1523             "id": "A String", # Dataflow service generated id for this stage.
   1524           },
   1525         ],
   1526       },
   1527       "steps": [ # The top-level steps that constitute the entire job.
   1528         { # Defines a particular step within a Cloud Dataflow job.
   1529             #
   1530             # A job consists of multiple steps, each of which performs some
   1531             # specific operation as part of the overall job.  Data is typically
   1532             # passed from one step to another as part of the job.
   1533             #
   1534             # Here's an example of a sequence of steps which together implement a
   1535             # Map-Reduce job:
   1536             #
   1537             #   * Read a collection of data from some source, parsing the
   1538             #     collection's elements.
   1539             #
   1540             #   * Validate the elements.
   1541             #
   1542             #   * Apply a user-defined function to map each element to some value
   1543             #     and extract an element-specific key value.
   1544             #
   1545             #   * Group elements with the same key into a single element with
   1546             #     that key, transforming a multiply-keyed collection into a
   1547             #     uniquely-keyed collection.
   1548             #
   1549             #   * Write the elements out to some data sink.
   1550             #
   1551             # Note that the Cloud Dataflow service may be used to run many different
   1552             # types of jobs, not just Map-Reduce.
   1553           "kind": "A String", # The kind of step in the Cloud Dataflow job.
   1554           "properties": { # Named properties associated with the step. Each kind of
   1555               # predefined step has its own required set of properties.
   1556               # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
   1557             "a_key": "", # Properties of the object.
   1558           },
   1559           "name": "A String", # The name that identifies the step. This must be unique for each
   1560               # step with respect to all other steps in the Cloud Dataflow job.
   1561         },
   1562       ],
   1563       "currentStateTime": "A String", # The timestamp associated with the current state.
   1564       "tempFiles": [ # A set of files the system should be aware of that are used
   1565           # for temporary storage. These temporary files will be
   1566           # removed on job completion.
   1567           # No duplicates are allowed.
   1568           # No file patterns are supported.
   1569           #
   1570           # The supported files are:
   1571           #
   1572           # Google Cloud Storage:
   1573           #
   1574           #    storage.googleapis.com/{bucket}/{object}
   1575           #    bucket.storage.googleapis.com/{object}
   1576         "A String",
   1577       ],
   1578       "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
   1579           # callers cannot mutate it.
   1580         { # A message describing the state of a particular execution stage.
   1581           "executionStageName": "A String", # The name of the execution stage.
   1582           "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
   1583           "currentStateTime": "A String", # The time at which the stage transitioned to this state.
   1584         },
   1585       ],
   1586       "type": "A String", # The type of Cloud Dataflow job.
   1587       "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
   1588           # Cloud Dataflow service.
   1589       "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
   1590           # of the job it replaced.
   1591           #
   1592           # When sending a `CreateJobRequest`, you can update a job by specifying it
   1593           # here. The job named here is stopped, and its intermediate state is
   1594           # transferred to this job.
   1595       "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
   1596           # isn't contained in the submitted job.
   1597         "stages": { # A mapping from each stage to the information about that stage.
   1598           "a_key": { # Contains information about how a particular
   1599               # google.dataflow.v1beta3.Step will be executed.
   1600             "stepName": [ # The steps associated with the execution stage.
   1601                 # Note that stages may have several steps, and that a given step
   1602                 # might be run by more than one stage.
   1603               "A String",
   1604             ],
   1605           },
   1606         },
   1607       },
   1608     }</pre>
   1609 </div>
   1610 
   1611 <div class="method">
   1612     <code class="details" id="getMetrics">getMetrics(projectId, jobId, startTime=None, location=None, x__xgafv=None)</code>
   1613   <pre>Request the job status.
   1614 
   1615 Args:
   1616   projectId: string, A project id. (required)
   1617   jobId: string, The job to get messages for. (required)
   1618   startTime: string, Return only metric data that has changed since this time.
   1619 Default is to return all information about all metrics for the job.
   1620   location: string, The location which contains the job specified by job_id.
   1621   x__xgafv: string, V1 error format.
   1622     Allowed values
   1623       1 - v1 error format
   1624       2 - v2 error format
   1625 
   1626 Returns:
   1627   An object of the form:
   1628 
   1629     { # JobMetrics contains a collection of metrics descibing the detailed progress
   1630       # of a Dataflow job. Metrics correspond to user-defined and system-defined
   1631       # metrics in the job.
   1632       #
   1633       # This resource captures only the most recent values of each metric;
   1634       # time-series data can be queried for them (under the same metric names)
   1635       # from Cloud Monitoring.
   1636     "metrics": [ # All metrics for this job.
   1637       { # Describes the state of a metric.
   1638         "meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
   1639             # This holds the count of the aggregated values and is used in combination
   1640             # with mean_sum above to obtain the actual mean aggregate value.
   1641             # The only possible value type is Long.
   1642         "updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are
   1643             # reporting work progress; it will be filled in responses from the
   1644             # metrics API.
   1645         "set": "", # Worker-computed aggregate value for the "Set" aggregation kind.  The only
   1646             # possible value type is a list of Values whose type can be Long, Double,
   1647             # or String, according to the metric's type.  All Values in the list must
   1648             # be of the same type.
   1649         "name": { # Identifies a metric, by describing the source which generated the # Name of the metric.
   1650             # metric.
   1651           "origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;
   1652               # will be "dataflow" for metrics defined by the Dataflow service or SDK.
   1653           "name": "A String", # Worker-defined metric name.
   1654           "context": { # Zero or more labeled fields which identify the part of the job this
   1655               # metric is associated with, such as the name of a step or collection.
   1656               #
   1657               # For example, built-in counters associated with steps will have
   1658               # context['step'] = <step-name>. Counters associated with PCollections
   1659               # in the SDK will have context['pcollection'] = <pcollection-name>.
   1660             "a_key": "A String",
   1661           },
   1662         },
   1663         "cumulative": True or False, # True if this metric is reported as the total cumulative aggregate
   1664             # value accumulated since the worker started working on this WorkItem.
   1665             # By default this is false, indicating that this metric is reported
   1666             # as a delta that is not associated with any WorkItem.
   1667         "kind": "A String", # Metric aggregation kind.  The possible metric aggregation kinds are
   1668             # "Sum", "Max", "Min", "Mean", "Set", "And", "Or", and "Distribution".
   1669             # The specified aggregation kind is case-insensitive.
   1670             #
   1671             # If omitted, this is not an aggregated value but instead
   1672             # a single metric sample value.
   1673         "scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",
   1674             # "And", and "Or".  The possible value types are Long, Double, and Boolean.
   1675         "meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
   1676             # This holds the sum of the aggregated values and is used in combination
   1677             # with mean_count below to obtain the actual mean aggregate value.
   1678             # The only possible value types are Long and Double.
   1679         "distribution": "", # A struct value describing properties of a distribution of numeric values.
   1680         "internal": "", # Worker-computed aggregate value for internal use by the Dataflow
   1681             # service.
   1682       },
   1683     ],
   1684     "metricTime": "A String", # Timestamp as of which metric values are current.
   1685   }</pre>
   1686 </div>
   1687 
   1688 <div class="method">
   1689     <code class="details" id="list">list(projectId, pageSize=None, x__xgafv=None, pageToken=None, location=None, filter=None, view=None)</code>
   1690   <pre>List the jobs of a project.
   1691 
   1692 Args:
   1693   projectId: string, The project which owns the jobs. (required)
   1694   pageSize: integer, If there are many jobs, limit response to at most this many.
   1695 The actual number of jobs returned will be the lesser of max_responses
   1696 and an unspecified server-defined limit.
   1697   x__xgafv: string, V1 error format.
   1698     Allowed values
   1699       1 - v1 error format
   1700       2 - v2 error format
   1701   pageToken: string, Set this to the 'next_page_token' field of a previous response
   1702 to request additional results in a long list.
   1703   location: string, The location that contains this job.
   1704   filter: string, The kind of filter to use.
   1705   view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.
   1706 
   1707 Returns:
   1708   An object of the form:
   1709 
   1710     { # Response to a request to list Cloud Dataflow jobs.  This may be a partial
   1711       # response, depending on the page size in the ListJobsRequest.
   1712     "nextPageToken": "A String", # Set if there may be more results than fit in this response.
   1713     "failedLocation": [ # Zero or more messages describing locations that failed to respond.
   1714       { # Indicates which location failed to respond to a request for data.
   1715         "name": "A String", # The name of the failed location.
   1716       },
   1717     ],
   1718     "jobs": [ # A subset of the requested job information.
   1719       { # Defines a job to be run by the Cloud Dataflow service.
   1720           "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
   1721               # If this field is set, the service will ensure its uniqueness.
   1722               # The request to create a job will fail if the service has knowledge of a
   1723               # previously submitted job with the same client's ID and job name.
   1724               # The caller may use this field to ensure idempotence of job
   1725               # creation across retried attempts to create a job.
   1726               # By default, the field is empty and, in that case, the service ignores it.
   1727           "requestedState": "A String", # The job's requested state.
   1728               #
   1729               # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
   1730               # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
   1731               # also be used to directly set a job's requested state to
   1732               # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
   1733               # job if it has not already reached a terminal state.
   1734           "name": "A String", # The user-specified Cloud Dataflow job name.
   1735               #
   1736               # Only one Job with a given name may exist in a project at any
   1737               # given time. If a caller attempts to create a Job with the same
   1738               # name as an already-existing Job, the attempt returns the
   1739               # existing Job.
   1740               #
   1741               # The name must match the regular expression
   1742               # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
   1743           "location": "A String", # The location that contains this job.
   1744           "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
   1745               # `JOB_STATE_UPDATED`), this field contains the ID of that job.
   1746           "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
   1747           "currentState": "A String", # The current state of the job.
   1748               #
   1749               # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
   1750               # specified.
   1751               #
   1752               # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
   1753               # terminal state. After a job has reached a terminal state, no
   1754               # further state updates may be made.
   1755               #
   1756               # This field may be mutated by the Cloud Dataflow service;
   1757               # callers cannot mutate it.
   1758           "labels": { # User-defined labels for this job.
   1759               #
   1760               # The labels map can contain no more than 64 entries.  Entries of the labels
   1761               # map are UTF8 strings that comply with the following restrictions:
   1762               #
   1763               # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
   1764               # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
   1765               # * Both keys and values are additionally constrained to be <= 128 bytes in
   1766               # size.
   1767             "a_key": "A String",
   1768           },
   1769           "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
   1770               # corresponding name prefixes of the new job.
   1771             "a_key": "A String",
   1772           },
   1773           "id": "A String", # The unique ID of this job.
   1774               #
   1775               # This field is set by the Cloud Dataflow service when the Job is
   1776               # created, and is immutable for the life of the job.
   1777           "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
   1778             "version": { # A structure describing which components and their versions of the service
   1779                 # are required in order to run the job.
   1780               "a_key": "", # Properties of the object.
   1781             },
   1782             "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
   1783                 # storage.  The system will append the suffix "/temp-{JOBNAME} to
   1784                 # this resource prefix, where {JOBNAME} is the value of the
   1785                 # job_name field.  The resulting bucket and object prefix is used
   1786                 # as the prefix of the resources used to store temporary data
   1787                 # needed during the job execution.  NOTE: This will override the
   1788                 # value in taskrunner_settings.
   1789                 # The supported resource type is:
   1790                 #
   1791                 # Google Cloud Storage:
   1792                 #
   1793                 #   storage.googleapis.com/{bucket}/{object}
   1794                 #   bucket.storage.googleapis.com/{object}
   1795             "internalExperiments": { # Experimental settings.
   1796               "a_key": "", # Properties of the object. Contains field @type with type URL.
   1797             },
   1798             "dataset": "A String", # The dataset for the current project where various workflow
   1799                 # related tables are stored.
   1800                 #
   1801                 # The supported resource type is:
   1802                 #
   1803                 # Google BigQuery:
   1804                 #   bigquery.googleapis.com/{dataset}
   1805             "experiments": [ # The list of experiments to enable.
   1806               "A String",
   1807             ],
   1808             "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
   1809             "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
   1810                 # options are passed through the service and are used to recreate the
   1811                 # SDK pipeline options on the worker in a language agnostic and platform
   1812                 # independent way.
   1813               "a_key": "", # Properties of the object.
   1814             },
   1815             "userAgent": { # A description of the process that generated the request.
   1816               "a_key": "", # Properties of the object.
   1817             },
   1818             "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
   1819                 # unspecified, the service will attempt to choose a reasonable
   1820                 # default.  This should be in the form of the API service name,
   1821                 # e.g. "compute.googleapis.com".
   1822             "workerPools": [ # The worker pools. At least one "harness" worker pool must be
   1823                 # specified in order for the job to have workers.
   1824               { # Describes one particular pool of Cloud Dataflow workers to be
   1825                   # instantiated by the Cloud Dataflow service in order to perform the
   1826                   # computations required by a job.  Note that a workflow job may use
   1827                   # multiple pools, in order to match the various computational
   1828                   # requirements of the various stages of the job.
   1829                 "diskSourceImage": "A String", # Fully qualified source image for disks.
   1830                 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
   1831                     # using the standard Dataflow task runner.  Users should ignore
   1832                     # this field.
   1833                   "workflowFileName": "A String", # The file to store the workflow in.
   1834                   "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
   1835                       # will not be uploaded.
   1836                       #
   1837                       # The supported resource type is:
   1838                       #
   1839                       # Google Cloud Storage:
   1840                       #   storage.googleapis.com/{bucket}/{object}
   1841                       #   bucket.storage.googleapis.com/{object}
   1842                   "commandlinesFileName": "A String", # The file to store preprocessing commands in.
   1843                   "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
   1844                     "reportingEnabled": True or False, # Whether to send work progress updates to the service.
   1845                     "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
   1846                         # "shuffle/v1beta1".
   1847                     "workerId": "A String", # The ID of the worker running this pipeline.
   1848                     "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
   1849                         #
   1850                         # When workers access Google Cloud APIs, they logically do so via
   1851                         # relative URLs.  If this field is specified, it supplies the base
   1852                         # URL to use for resolving these relative URLs.  The normative
   1853                         # algorithm used is defined by RFC 1808, "Relative Uniform Resource
   1854                         # Locators".
   1855                         #
   1856                         # If not specified, the default value is "http://www.googleapis.com/"
   1857                     "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
   1858                         # "dataflow/v1b3/projects".
   1859                     "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
   1860                         # storage.
   1861                         #
   1862                         # The supported resource type is:
   1863                         #
   1864                         # Google Cloud Storage:
   1865                         #
   1866                         #   storage.googleapis.com/{bucket}/{object}
   1867                         #   bucket.storage.googleapis.com/{object}
   1868                   },
   1869                   "vmId": "A String", # The ID string of the VM.
   1870                   "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
   1871                   "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
   1872                   "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
   1873                       # access the Cloud Dataflow API.
   1874                     "A String",
   1875                   ],
   1876                   "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
   1877                       # taskrunner; e.g. "root".
   1878                   "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
   1879                       #
   1880                       # When workers access Google Cloud APIs, they logically do so via
   1881                       # relative URLs.  If this field is specified, it supplies the base
   1882                       # URL to use for resolving these relative URLs.  The normative
   1883                       # algorithm used is defined by RFC 1808, "Relative Uniform Resource
   1884                       # Locators".
   1885                       #
   1886                       # If not specified, the default value is "http://www.googleapis.com/"
   1887                   "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
   1888                       # taskrunner; e.g. "wheel".
   1889                   "languageHint": "A String", # The suggested backend language.
   1890                   "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
   1891                       # console.
   1892                   "streamingWorkerMainClass": "A String", # The streaming worker main class name.
   1893                   "logDir": "A String", # The directory on the VM to store logs.
   1894                   "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
   1895                   "harnessCommand": "A String", # The command to launch the worker harness.
   1896                   "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
   1897                       # temporary storage.
   1898                       #
   1899                       # The supported resource type is:
   1900                       #
   1901                       # Google Cloud Storage:
   1902                       #   storage.googleapis.com/{bucket}/{object}
   1903                       #   bucket.storage.googleapis.com/{object}
   1904                   "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
   1905                 },
   1906                 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
   1907                     # are supported.
   1908                 "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
   1909                     # service will attempt to choose a reasonable default.
   1910                 "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
   1911                     # the service will use the network "default".
   1912                 "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
   1913                     # will attempt to choose a reasonable default.
   1914                 "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
   1915                     # attempt to choose a reasonable default.
   1916                 "dataDisks": [ # Data disks that are used by a VM in this workflow.
   1917                   { # Describes the data disk used by a workflow job.
   1918                     "mountPoint": "A String", # Directory in a VM where disk is mounted.
   1919                     "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
   1920                         # attempt to choose a reasonable default.
   1921                     "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
   1922                         # must be a disk type appropriate to the project and zone in which
   1923                         # the workers will run.  If unknown or unspecified, the service
   1924                         # will attempt to choose a reasonable default.
   1925                         #
   1926                         # For example, the standard persistent disk type is a resource name
   1927                         # typically ending in "pd-standard".  If SSD persistent disks are
   1928                         # available, the resource name typically ends with "pd-ssd".  The
   1929                         # actual valid values are defined the Google Compute Engine API,
   1930                         # not by the Cloud Dataflow API; consult the Google Compute Engine
   1931                         # documentation for more information about determining the set of
   1932                         # available disk types for a particular project and zone.
   1933                         #
   1934                         # Google Compute Engine Disk types are local to a particular
   1935                         # project in a particular zone, and so the resource name will
   1936                         # typically look something like this:
   1937                         #
   1938                         # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
   1939                   },
   1940                 ],
   1941                 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
   1942                     # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
   1943                     # `TEARDOWN_NEVER`.
   1944                     # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
   1945                     # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
   1946                     # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
   1947                     # down.
   1948                     #
   1949                     # If the workers are not torn down by the service, they will
   1950                     # continue to run and use Google Compute Engine VM resources in the
   1951                     # user's project until they are explicitly terminated by the user.
   1952                     # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
   1953                     # policy except for small, manually supervised test jobs.
   1954                     #
   1955                     # If unknown or unspecified, the service will attempt to choose a reasonable
   1956                     # default.
   1957                 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
   1958                     # Compute Engine API.
   1959                 "ipConfiguration": "A String", # Configuration for VM IPs.
   1960                 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
   1961                     # service will choose a number of threads (according to the number of cores
   1962                     # on the selected machine type for batch, or 1 by convention for streaming).
   1963                 "poolArgs": { # Extra arguments for this worker pool.
   1964                   "a_key": "", # Properties of the object. Contains field @type with type URL.
   1965                 },
   1966                 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
   1967                     # execute the job.  If zero or unspecified, the service will
   1968                     # attempt to choose a reasonable default.
   1969                 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
   1970                     # harness, residing in Google Container Registry.
   1971                 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
   1972                     # the form "regions/REGION/subnetworks/SUBNETWORK".
   1973                 "packages": [ # Packages to be installed on workers.
   1974                   { # The packages that must be installed in order for a worker to run the
   1975                       # steps of the Cloud Dataflow job that will be assigned to its worker
   1976                       # pool.
   1977                       #
   1978                       # This is the mechanism by which the Cloud Dataflow SDK causes code to
   1979                       # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
   1980                       # might use this to install jars containing the user's code and all of the
   1981                       # various dependencies (libraries, data files, etc.) required in order
   1982                       # for that code to run.
   1983                     "location": "A String", # The resource to read the package from. The supported resource type is:
   1984                         #
   1985                         # Google Cloud Storage:
   1986                         #
   1987                         #   storage.googleapis.com/{bucket}
   1988                         #   bucket.storage.googleapis.com/
   1989                     "name": "A String", # The name of the package.
   1990                   },
   1991                 ],
   1992                 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
   1993                   "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
   1994                   "algorithm": "A String", # The algorithm to use for autoscaling.
   1995                 },
   1996                 "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
   1997                     # select a default set of packages which are useful to worker
   1998                     # harnesses written in a particular language.
   1999                 "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
   2000                     # attempt to choose a reasonable default.
   2001                 "metadata": { # Metadata to set on the Google Compute Engine VMs.
   2002                   "a_key": "A String",
   2003                 },
   2004               },
   2005             ],
   2006           },
   2007           "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
   2008               # A description of the user pipeline and stages through which it is executed.
   2009               # Created by Cloud Dataflow service.  Only retrieved with
   2010               # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
   2011               # form.  This data is provided by the Dataflow service for ease of visualizing
   2012               # the pipeline and interpretting Dataflow provided metrics.
   2013             "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
   2014               { # Description of the type, names/ids, and input/outputs for a transform.
   2015                 "kind": "A String", # Type of transform.
   2016                 "name": "A String", # User provided name for this transform instance.
   2017                 "inputCollectionName": [ # User names for all collection inputs to this transform.
   2018                   "A String",
   2019                 ],
   2020                 "displayData": [ # Transform-specific display data.
   2021                   { # Data provided with a pipeline or transform to provide descriptive info.
   2022                     "shortStrValue": "A String", # A possible additional shorter value to display.
   2023                         # For example a java_class_name_value of com.mypackage.MyDoFn
   2024                         # will be stored with MyDoFn as the short_str_value and
   2025                         # com.mypackage.MyDoFn as the java_class_name value.
   2026                         # short_str_value can be displayed and java_class_name_value
   2027                         # will be displayed as a tooltip.
   2028                     "durationValue": "A String", # Contains value if the data is of duration type.
   2029                     "url": "A String", # An optional full URL.
   2030                     "floatValue": 3.14, # Contains value if the data is of float type.
   2031                     "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   2032                         # language namespace (i.e. python module) which defines the display data.
   2033                         # This allows a dax monitoring system to specially handle the data
   2034                         # and perform custom rendering.
   2035                     "javaClassValue": "A String", # Contains value if the data is of java class type.
   2036                     "label": "A String", # An optional label to display in a dax UI for the element.
   2037                     "boolValue": True or False, # Contains value if the data is of a boolean type.
   2038                     "strValue": "A String", # Contains value if the data is of string type.
   2039                     "key": "A String", # The key identifying the display data.
   2040                         # This is intended to be used as a label for the display data
   2041                         # when viewed in a dax monitoring system.
   2042                     "int64Value": "A String", # Contains value if the data is of int64 type.
   2043                     "timestampValue": "A String", # Contains value if the data is of timestamp type.
   2044                   },
   2045                 ],
   2046                 "outputCollectionName": [ # User  names for all collection outputs to this transform.
   2047                   "A String",
   2048                 ],
   2049                 "id": "A String", # SDK generated id of this transform instance.
   2050               },
   2051             ],
   2052             "displayData": [ # Pipeline level display data.
   2053               { # Data provided with a pipeline or transform to provide descriptive info.
   2054                 "shortStrValue": "A String", # A possible additional shorter value to display.
   2055                     # For example a java_class_name_value of com.mypackage.MyDoFn
   2056                     # will be stored with MyDoFn as the short_str_value and
   2057                     # com.mypackage.MyDoFn as the java_class_name value.
   2058                     # short_str_value can be displayed and java_class_name_value
   2059                     # will be displayed as a tooltip.
   2060                 "durationValue": "A String", # Contains value if the data is of duration type.
   2061                 "url": "A String", # An optional full URL.
   2062                 "floatValue": 3.14, # Contains value if the data is of float type.
   2063                 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   2064                     # language namespace (i.e. python module) which defines the display data.
   2065                     # This allows a dax monitoring system to specially handle the data
   2066                     # and perform custom rendering.
   2067                 "javaClassValue": "A String", # Contains value if the data is of java class type.
   2068                 "label": "A String", # An optional label to display in a dax UI for the element.
   2069                 "boolValue": True or False, # Contains value if the data is of a boolean type.
   2070                 "strValue": "A String", # Contains value if the data is of string type.
   2071                 "key": "A String", # The key identifying the display data.
   2072                     # This is intended to be used as a label for the display data
   2073                     # when viewed in a dax monitoring system.
   2074                 "int64Value": "A String", # Contains value if the data is of int64 type.
   2075                 "timestampValue": "A String", # Contains value if the data is of timestamp type.
   2076               },
   2077             ],
   2078             "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
   2079               { # Description of the composing transforms, names/ids, and input/outputs of a
   2080                   # stage of execution.  Some composing transforms and sources may have been
   2081                   # generated by the Dataflow service during execution planning.
   2082                 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
   2083                   { # Description of an interstitial value between transforms in an execution
   2084                       # stage.
   2085                     "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   2086                     "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   2087                         # source is most closely associated.
   2088                     "name": "A String", # Dataflow service generated name for this source.
   2089                   },
   2090                 ],
   2091                 "kind": "A String", # Type of tranform this stage is executing.
   2092                 "name": "A String", # Dataflow service generated name for this stage.
   2093                 "outputSource": [ # Output sources for this stage.
   2094                   { # Description of an input or output of an execution stage.
   2095                     "userName": "A String", # Human-readable name for this source; may be user or system generated.
   2096                     "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   2097                         # source is most closely associated.
   2098                     "name": "A String", # Dataflow service generated name for this source.
   2099                     "sizeBytes": "A String", # Size of the source, if measurable.
   2100                   },
   2101                 ],
   2102                 "inputSource": [ # Input sources for this stage.
   2103                   { # Description of an input or output of an execution stage.
   2104                     "userName": "A String", # Human-readable name for this source; may be user or system generated.
   2105                     "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   2106                         # source is most closely associated.
   2107                     "name": "A String", # Dataflow service generated name for this source.
   2108                     "sizeBytes": "A String", # Size of the source, if measurable.
   2109                   },
   2110                 ],
   2111                 "componentTransform": [ # Transforms that comprise this execution stage.
   2112                   { # Description of a transform executed as part of an execution stage.
   2113                     "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   2114                     "originalTransform": "A String", # User name for the original user transform with which this transform is
   2115                         # most closely associated.
   2116                     "name": "A String", # Dataflow service generated name for this source.
   2117                   },
   2118                 ],
   2119                 "id": "A String", # Dataflow service generated id for this stage.
   2120               },
   2121             ],
   2122           },
   2123           "steps": [ # The top-level steps that constitute the entire job.
   2124             { # Defines a particular step within a Cloud Dataflow job.
   2125                 #
   2126                 # A job consists of multiple steps, each of which performs some
   2127                 # specific operation as part of the overall job.  Data is typically
   2128                 # passed from one step to another as part of the job.
   2129                 #
   2130                 # Here's an example of a sequence of steps which together implement a
   2131                 # Map-Reduce job:
   2132                 #
   2133                 #   * Read a collection of data from some source, parsing the
   2134                 #     collection's elements.
   2135                 #
   2136                 #   * Validate the elements.
   2137                 #
   2138                 #   * Apply a user-defined function to map each element to some value
   2139                 #     and extract an element-specific key value.
   2140                 #
   2141                 #   * Group elements with the same key into a single element with
   2142                 #     that key, transforming a multiply-keyed collection into a
   2143                 #     uniquely-keyed collection.
   2144                 #
   2145                 #   * Write the elements out to some data sink.
   2146                 #
   2147                 # Note that the Cloud Dataflow service may be used to run many different
   2148                 # types of jobs, not just Map-Reduce.
   2149               "kind": "A String", # The kind of step in the Cloud Dataflow job.
   2150               "properties": { # Named properties associated with the step. Each kind of
   2151                   # predefined step has its own required set of properties.
   2152                   # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
   2153                 "a_key": "", # Properties of the object.
   2154               },
   2155               "name": "A String", # The name that identifies the step. This must be unique for each
   2156                   # step with respect to all other steps in the Cloud Dataflow job.
   2157             },
   2158           ],
   2159           "currentStateTime": "A String", # The timestamp associated with the current state.
   2160           "tempFiles": [ # A set of files the system should be aware of that are used
   2161               # for temporary storage. These temporary files will be
   2162               # removed on job completion.
   2163               # No duplicates are allowed.
   2164               # No file patterns are supported.
   2165               #
   2166               # The supported files are:
   2167               #
   2168               # Google Cloud Storage:
   2169               #
   2170               #    storage.googleapis.com/{bucket}/{object}
   2171               #    bucket.storage.googleapis.com/{object}
   2172             "A String",
   2173           ],
   2174           "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
   2175               # callers cannot mutate it.
   2176             { # A message describing the state of a particular execution stage.
   2177               "executionStageName": "A String", # The name of the execution stage.
   2178               "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
   2179               "currentStateTime": "A String", # The time at which the stage transitioned to this state.
   2180             },
   2181           ],
   2182           "type": "A String", # The type of Cloud Dataflow job.
   2183           "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
   2184               # Cloud Dataflow service.
   2185           "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
   2186               # of the job it replaced.
   2187               #
   2188               # When sending a `CreateJobRequest`, you can update a job by specifying it
   2189               # here. The job named here is stopped, and its intermediate state is
   2190               # transferred to this job.
   2191           "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
   2192               # isn't contained in the submitted job.
   2193             "stages": { # A mapping from each stage to the information about that stage.
   2194               "a_key": { # Contains information about how a particular
   2195                   # google.dataflow.v1beta3.Step will be executed.
   2196                 "stepName": [ # The steps associated with the execution stage.
   2197                     # Note that stages may have several steps, and that a given step
   2198                     # might be run by more than one stage.
   2199                   "A String",
   2200                 ],
   2201               },
   2202             },
   2203           },
   2204         },
   2205     ],
   2206   }</pre>
   2207 </div>
   2208 
   2209 <div class="method">
   2210     <code class="details" id="list_next">list_next(previous_request, previous_response)</code>
   2211   <pre>Retrieves the next page of results.
   2212 
   2213 Args:
   2214   previous_request: The request for the previous page. (required)
   2215   previous_response: The response from the request for the previous page. (required)
   2216 
   2217 Returns:
   2218   A request object that you can call 'execute()' on to request the next
   2219   page. Returns None if there are no more items in the collection.
   2220     </pre>
   2221 </div>
   2222 
   2223 <div class="method">
   2224     <code class="details" id="update">update(projectId, jobId, body, location=None, x__xgafv=None)</code>
   2225   <pre>Updates the state of an existing Cloud Dataflow job.
   2226 
   2227 Args:
   2228   projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
   2229   jobId: string, The job ID. (required)
   2230   body: object, The request body. (required)
   2231     The object takes the form of:
   2232 
   2233 { # Defines a job to be run by the Cloud Dataflow service.
   2234     "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
   2235         # If this field is set, the service will ensure its uniqueness.
   2236         # The request to create a job will fail if the service has knowledge of a
   2237         # previously submitted job with the same client's ID and job name.
   2238         # The caller may use this field to ensure idempotence of job
   2239         # creation across retried attempts to create a job.
   2240         # By default, the field is empty and, in that case, the service ignores it.
   2241     "requestedState": "A String", # The job's requested state.
   2242         # 
   2243         # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
   2244         # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
   2245         # also be used to directly set a job's requested state to
   2246         # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
   2247         # job if it has not already reached a terminal state.
   2248     "name": "A String", # The user-specified Cloud Dataflow job name.
   2249         # 
   2250         # Only one Job with a given name may exist in a project at any
   2251         # given time. If a caller attempts to create a Job with the same
   2252         # name as an already-existing Job, the attempt returns the
   2253         # existing Job.
   2254         # 
   2255         # The name must match the regular expression
   2256         # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
   2257     "location": "A String", # The location that contains this job.
   2258     "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
   2259         # `JOB_STATE_UPDATED`), this field contains the ID of that job.
   2260     "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
   2261     "currentState": "A String", # The current state of the job.
   2262         # 
   2263         # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
   2264         # specified.
   2265         # 
   2266         # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
   2267         # terminal state. After a job has reached a terminal state, no
   2268         # further state updates may be made.
   2269         # 
   2270         # This field may be mutated by the Cloud Dataflow service;
   2271         # callers cannot mutate it.
   2272     "labels": { # User-defined labels for this job.
   2273         # 
   2274         # The labels map can contain no more than 64 entries.  Entries of the labels
   2275         # map are UTF8 strings that comply with the following restrictions:
   2276         # 
   2277         # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
   2278         # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
   2279         # * Both keys and values are additionally constrained to be <= 128 bytes in
   2280         # size.
   2281       "a_key": "A String",
   2282     },
   2283     "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
   2284         # corresponding name prefixes of the new job.
   2285       "a_key": "A String",
   2286     },
   2287     "id": "A String", # The unique ID of this job.
   2288         # 
   2289         # This field is set by the Cloud Dataflow service when the Job is
   2290         # created, and is immutable for the life of the job.
   2291     "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
   2292       "version": { # A structure describing which components and their versions of the service
   2293           # are required in order to run the job.
   2294         "a_key": "", # Properties of the object.
   2295       },
   2296       "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
   2297           # storage.  The system will append the suffix "/temp-{JOBNAME} to
   2298           # this resource prefix, where {JOBNAME} is the value of the
   2299           # job_name field.  The resulting bucket and object prefix is used
   2300           # as the prefix of the resources used to store temporary data
   2301           # needed during the job execution.  NOTE: This will override the
   2302           # value in taskrunner_settings.
   2303           # The supported resource type is:
   2304           #
   2305           # Google Cloud Storage:
   2306           #
   2307           #   storage.googleapis.com/{bucket}/{object}
   2308           #   bucket.storage.googleapis.com/{object}
   2309       "internalExperiments": { # Experimental settings.
   2310         "a_key": "", # Properties of the object. Contains field @type with type URL.
   2311       },
   2312       "dataset": "A String", # The dataset for the current project where various workflow
   2313           # related tables are stored.
   2314           #
   2315           # The supported resource type is:
   2316           #
   2317           # Google BigQuery:
   2318           #   bigquery.googleapis.com/{dataset}
   2319       "experiments": [ # The list of experiments to enable.
   2320         "A String",
   2321       ],
   2322       "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
   2323       "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
   2324           # options are passed through the service and are used to recreate the
   2325           # SDK pipeline options on the worker in a language agnostic and platform
   2326           # independent way.
   2327         "a_key": "", # Properties of the object.
   2328       },
   2329       "userAgent": { # A description of the process that generated the request.
   2330         "a_key": "", # Properties of the object.
   2331       },
   2332       "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
   2333           # unspecified, the service will attempt to choose a reasonable
   2334           # default.  This should be in the form of the API service name,
   2335           # e.g. "compute.googleapis.com".
   2336       "workerPools": [ # The worker pools. At least one "harness" worker pool must be
   2337           # specified in order for the job to have workers.
   2338         { # Describes one particular pool of Cloud Dataflow workers to be
   2339             # instantiated by the Cloud Dataflow service in order to perform the
   2340             # computations required by a job.  Note that a workflow job may use
   2341             # multiple pools, in order to match the various computational
   2342             # requirements of the various stages of the job.
   2343           "diskSourceImage": "A String", # Fully qualified source image for disks.
   2344           "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
   2345               # using the standard Dataflow task runner.  Users should ignore
   2346               # this field.
   2347             "workflowFileName": "A String", # The file to store the workflow in.
   2348             "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
   2349                 # will not be uploaded.
   2350                 #
   2351                 # The supported resource type is:
   2352                 #
   2353                 # Google Cloud Storage:
   2354                 #   storage.googleapis.com/{bucket}/{object}
   2355                 #   bucket.storage.googleapis.com/{object}
   2356             "commandlinesFileName": "A String", # The file to store preprocessing commands in.
   2357             "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
   2358               "reportingEnabled": True or False, # Whether to send work progress updates to the service.
   2359               "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
   2360                   # "shuffle/v1beta1".
   2361               "workerId": "A String", # The ID of the worker running this pipeline.
   2362               "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
   2363                   #
   2364                   # When workers access Google Cloud APIs, they logically do so via
   2365                   # relative URLs.  If this field is specified, it supplies the base
   2366                   # URL to use for resolving these relative URLs.  The normative
   2367                   # algorithm used is defined by RFC 1808, "Relative Uniform Resource
   2368                   # Locators".
   2369                   #
   2370                   # If not specified, the default value is "http://www.googleapis.com/"
   2371               "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
   2372                   # "dataflow/v1b3/projects".
   2373               "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
   2374                   # storage.
   2375                   #
   2376                   # The supported resource type is:
   2377                   #
   2378                   # Google Cloud Storage:
   2379                   #
   2380                   #   storage.googleapis.com/{bucket}/{object}
   2381                   #   bucket.storage.googleapis.com/{object}
   2382             },
   2383             "vmId": "A String", # The ID string of the VM.
   2384             "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
   2385             "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
   2386             "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
   2387                 # access the Cloud Dataflow API.
   2388               "A String",
   2389             ],
   2390             "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
   2391                 # taskrunner; e.g. "root".
   2392             "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
   2393                 #
   2394                 # When workers access Google Cloud APIs, they logically do so via
   2395                 # relative URLs.  If this field is specified, it supplies the base
   2396                 # URL to use for resolving these relative URLs.  The normative
   2397                 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
   2398                 # Locators".
   2399                 #
   2400                 # If not specified, the default value is "http://www.googleapis.com/"
   2401             "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
   2402                 # taskrunner; e.g. "wheel".
   2403             "languageHint": "A String", # The suggested backend language.
   2404             "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
   2405                 # console.
   2406             "streamingWorkerMainClass": "A String", # The streaming worker main class name.
   2407             "logDir": "A String", # The directory on the VM to store logs.
   2408             "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
   2409             "harnessCommand": "A String", # The command to launch the worker harness.
   2410             "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
   2411                 # temporary storage.
   2412                 #
   2413                 # The supported resource type is:
   2414                 #
   2415                 # Google Cloud Storage:
   2416                 #   storage.googleapis.com/{bucket}/{object}
   2417                 #   bucket.storage.googleapis.com/{object}
   2418             "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
   2419           },
   2420           "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
   2421               # are supported.
   2422           "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
   2423               # service will attempt to choose a reasonable default.
   2424           "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
   2425               # the service will use the network "default".
   2426           "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
   2427               # will attempt to choose a reasonable default.
   2428           "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
   2429               # attempt to choose a reasonable default.
   2430           "dataDisks": [ # Data disks that are used by a VM in this workflow.
   2431             { # Describes the data disk used by a workflow job.
   2432               "mountPoint": "A String", # Directory in a VM where disk is mounted.
   2433               "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
   2434                   # attempt to choose a reasonable default.
   2435               "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
   2436                   # must be a disk type appropriate to the project and zone in which
   2437                   # the workers will run.  If unknown or unspecified, the service
   2438                   # will attempt to choose a reasonable default.
   2439                   #
   2440                   # For example, the standard persistent disk type is a resource name
   2441                   # typically ending in "pd-standard".  If SSD persistent disks are
   2442                   # available, the resource name typically ends with "pd-ssd".  The
   2443                   # actual valid values are defined the Google Compute Engine API,
   2444                   # not by the Cloud Dataflow API; consult the Google Compute Engine
   2445                   # documentation for more information about determining the set of
   2446                   # available disk types for a particular project and zone.
   2447                   #
   2448                   # Google Compute Engine Disk types are local to a particular
   2449                   # project in a particular zone, and so the resource name will
   2450                   # typically look something like this:
   2451                   #
   2452                   # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
   2453             },
   2454           ],
   2455           "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
   2456               # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
   2457               # `TEARDOWN_NEVER`.
   2458               # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
   2459               # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
   2460               # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
   2461               # down.
   2462               #
   2463               # If the workers are not torn down by the service, they will
   2464               # continue to run and use Google Compute Engine VM resources in the
   2465               # user's project until they are explicitly terminated by the user.
   2466               # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
   2467               # policy except for small, manually supervised test jobs.
   2468               #
   2469               # If unknown or unspecified, the service will attempt to choose a reasonable
   2470               # default.
   2471           "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
   2472               # Compute Engine API.
   2473           "ipConfiguration": "A String", # Configuration for VM IPs.
   2474           "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
   2475               # service will choose a number of threads (according to the number of cores
   2476               # on the selected machine type for batch, or 1 by convention for streaming).
   2477           "poolArgs": { # Extra arguments for this worker pool.
   2478             "a_key": "", # Properties of the object. Contains field @type with type URL.
   2479           },
   2480           "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
   2481               # execute the job.  If zero or unspecified, the service will
   2482               # attempt to choose a reasonable default.
   2483           "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
   2484               # harness, residing in Google Container Registry.
   2485           "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
   2486               # the form "regions/REGION/subnetworks/SUBNETWORK".
   2487           "packages": [ # Packages to be installed on workers.
   2488             { # The packages that must be installed in order for a worker to run the
   2489                 # steps of the Cloud Dataflow job that will be assigned to its worker
   2490                 # pool.
   2491                 #
   2492                 # This is the mechanism by which the Cloud Dataflow SDK causes code to
   2493                 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
   2494                 # might use this to install jars containing the user's code and all of the
   2495                 # various dependencies (libraries, data files, etc.) required in order
   2496                 # for that code to run.
   2497               "location": "A String", # The resource to read the package from. The supported resource type is:
   2498                   #
   2499                   # Google Cloud Storage:
   2500                   #
   2501                   #   storage.googleapis.com/{bucket}
   2502                   #   bucket.storage.googleapis.com/
   2503               "name": "A String", # The name of the package.
   2504             },
   2505           ],
   2506           "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
   2507             "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
   2508             "algorithm": "A String", # The algorithm to use for autoscaling.
   2509           },
   2510           "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
   2511               # select a default set of packages which are useful to worker
   2512               # harnesses written in a particular language.
   2513           "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
   2514               # attempt to choose a reasonable default.
   2515           "metadata": { # Metadata to set on the Google Compute Engine VMs.
   2516             "a_key": "A String",
   2517           },
   2518         },
   2519       ],
   2520     },
   2521     "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
   2522         # A description of the user pipeline and stages through which it is executed.
   2523         # Created by Cloud Dataflow service.  Only retrieved with
   2524         # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
   2525         # form.  This data is provided by the Dataflow service for ease of visualizing
   2526         # the pipeline and interpretting Dataflow provided metrics.
   2527       "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
   2528         { # Description of the type, names/ids, and input/outputs for a transform.
   2529           "kind": "A String", # Type of transform.
   2530           "name": "A String", # User provided name for this transform instance.
   2531           "inputCollectionName": [ # User names for all collection inputs to this transform.
   2532             "A String",
   2533           ],
   2534           "displayData": [ # Transform-specific display data.
   2535             { # Data provided with a pipeline or transform to provide descriptive info.
   2536               "shortStrValue": "A String", # A possible additional shorter value to display.
   2537                   # For example a java_class_name_value of com.mypackage.MyDoFn
   2538                   # will be stored with MyDoFn as the short_str_value and
   2539                   # com.mypackage.MyDoFn as the java_class_name value.
   2540                   # short_str_value can be displayed and java_class_name_value
   2541                   # will be displayed as a tooltip.
   2542               "durationValue": "A String", # Contains value if the data is of duration type.
   2543               "url": "A String", # An optional full URL.
   2544               "floatValue": 3.14, # Contains value if the data is of float type.
   2545               "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   2546                   # language namespace (i.e. python module) which defines the display data.
   2547                   # This allows a dax monitoring system to specially handle the data
   2548                   # and perform custom rendering.
   2549               "javaClassValue": "A String", # Contains value if the data is of java class type.
   2550               "label": "A String", # An optional label to display in a dax UI for the element.
   2551               "boolValue": True or False, # Contains value if the data is of a boolean type.
   2552               "strValue": "A String", # Contains value if the data is of string type.
   2553               "key": "A String", # The key identifying the display data.
   2554                   # This is intended to be used as a label for the display data
   2555                   # when viewed in a dax monitoring system.
   2556               "int64Value": "A String", # Contains value if the data is of int64 type.
   2557               "timestampValue": "A String", # Contains value if the data is of timestamp type.
   2558             },
   2559           ],
   2560           "outputCollectionName": [ # User  names for all collection outputs to this transform.
   2561             "A String",
   2562           ],
   2563           "id": "A String", # SDK generated id of this transform instance.
   2564         },
   2565       ],
   2566       "displayData": [ # Pipeline level display data.
   2567         { # Data provided with a pipeline or transform to provide descriptive info.
   2568           "shortStrValue": "A String", # A possible additional shorter value to display.
   2569               # For example a java_class_name_value of com.mypackage.MyDoFn
   2570               # will be stored with MyDoFn as the short_str_value and
   2571               # com.mypackage.MyDoFn as the java_class_name value.
   2572               # short_str_value can be displayed and java_class_name_value
   2573               # will be displayed as a tooltip.
   2574           "durationValue": "A String", # Contains value if the data is of duration type.
   2575           "url": "A String", # An optional full URL.
   2576           "floatValue": 3.14, # Contains value if the data is of float type.
   2577           "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   2578               # language namespace (i.e. python module) which defines the display data.
   2579               # This allows a dax monitoring system to specially handle the data
   2580               # and perform custom rendering.
   2581           "javaClassValue": "A String", # Contains value if the data is of java class type.
   2582           "label": "A String", # An optional label to display in a dax UI for the element.
   2583           "boolValue": True or False, # Contains value if the data is of a boolean type.
   2584           "strValue": "A String", # Contains value if the data is of string type.
   2585           "key": "A String", # The key identifying the display data.
   2586               # This is intended to be used as a label for the display data
   2587               # when viewed in a dax monitoring system.
   2588           "int64Value": "A String", # Contains value if the data is of int64 type.
   2589           "timestampValue": "A String", # Contains value if the data is of timestamp type.
   2590         },
   2591       ],
   2592       "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
   2593         { # Description of the composing transforms, names/ids, and input/outputs of a
   2594             # stage of execution.  Some composing transforms and sources may have been
   2595             # generated by the Dataflow service during execution planning.
   2596           "componentSource": [ # Collections produced and consumed by component transforms of this stage.
   2597             { # Description of an interstitial value between transforms in an execution
   2598                 # stage.
   2599               "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   2600               "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   2601                   # source is most closely associated.
   2602               "name": "A String", # Dataflow service generated name for this source.
   2603             },
   2604           ],
   2605           "kind": "A String", # Type of tranform this stage is executing.
   2606           "name": "A String", # Dataflow service generated name for this stage.
   2607           "outputSource": [ # Output sources for this stage.
   2608             { # Description of an input or output of an execution stage.
   2609               "userName": "A String", # Human-readable name for this source; may be user or system generated.
   2610               "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   2611                   # source is most closely associated.
   2612               "name": "A String", # Dataflow service generated name for this source.
   2613               "sizeBytes": "A String", # Size of the source, if measurable.
   2614             },
   2615           ],
   2616           "inputSource": [ # Input sources for this stage.
   2617             { # Description of an input or output of an execution stage.
   2618               "userName": "A String", # Human-readable name for this source; may be user or system generated.
   2619               "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   2620                   # source is most closely associated.
   2621               "name": "A String", # Dataflow service generated name for this source.
   2622               "sizeBytes": "A String", # Size of the source, if measurable.
   2623             },
   2624           ],
   2625           "componentTransform": [ # Transforms that comprise this execution stage.
   2626             { # Description of a transform executed as part of an execution stage.
   2627               "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   2628               "originalTransform": "A String", # User name for the original user transform with which this transform is
   2629                   # most closely associated.
   2630               "name": "A String", # Dataflow service generated name for this source.
   2631             },
   2632           ],
   2633           "id": "A String", # Dataflow service generated id for this stage.
   2634         },
   2635       ],
   2636     },
   2637     "steps": [ # The top-level steps that constitute the entire job.
   2638       { # Defines a particular step within a Cloud Dataflow job.
   2639           #
   2640           # A job consists of multiple steps, each of which performs some
   2641           # specific operation as part of the overall job.  Data is typically
   2642           # passed from one step to another as part of the job.
   2643           #
   2644           # Here's an example of a sequence of steps which together implement a
   2645           # Map-Reduce job:
   2646           #
   2647           #   * Read a collection of data from some source, parsing the
   2648           #     collection's elements.
   2649           #
   2650           #   * Validate the elements.
   2651           #
   2652           #   * Apply a user-defined function to map each element to some value
   2653           #     and extract an element-specific key value.
   2654           #
   2655           #   * Group elements with the same key into a single element with
   2656           #     that key, transforming a multiply-keyed collection into a
   2657           #     uniquely-keyed collection.
   2658           #
   2659           #   * Write the elements out to some data sink.
   2660           #
   2661           # Note that the Cloud Dataflow service may be used to run many different
   2662           # types of jobs, not just Map-Reduce.
   2663         "kind": "A String", # The kind of step in the Cloud Dataflow job.
   2664         "properties": { # Named properties associated with the step. Each kind of
   2665             # predefined step has its own required set of properties.
   2666             # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
   2667           "a_key": "", # Properties of the object.
   2668         },
   2669         "name": "A String", # The name that identifies the step. This must be unique for each
   2670             # step with respect to all other steps in the Cloud Dataflow job.
   2671       },
   2672     ],
   2673     "currentStateTime": "A String", # The timestamp associated with the current state.
   2674     "tempFiles": [ # A set of files the system should be aware of that are used
   2675         # for temporary storage. These temporary files will be
   2676         # removed on job completion.
   2677         # No duplicates are allowed.
   2678         # No file patterns are supported.
   2679         # 
   2680         # The supported files are:
   2681         # 
   2682         # Google Cloud Storage:
   2683         # 
   2684         #    storage.googleapis.com/{bucket}/{object}
   2685         #    bucket.storage.googleapis.com/{object}
   2686       "A String",
   2687     ],
   2688     "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
   2689         # callers cannot mutate it.
   2690       { # A message describing the state of a particular execution stage.
   2691         "executionStageName": "A String", # The name of the execution stage.
   2692         "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
   2693         "currentStateTime": "A String", # The time at which the stage transitioned to this state.
   2694       },
   2695     ],
   2696     "type": "A String", # The type of Cloud Dataflow job.
   2697     "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
   2698         # Cloud Dataflow service.
   2699     "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
   2700         # of the job it replaced.
   2701         # 
   2702         # When sending a `CreateJobRequest`, you can update a job by specifying it
   2703         # here. The job named here is stopped, and its intermediate state is
   2704         # transferred to this job.
   2705     "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
   2706         # isn't contained in the submitted job.
   2707       "stages": { # A mapping from each stage to the information about that stage.
   2708         "a_key": { # Contains information about how a particular
   2709             # google.dataflow.v1beta3.Step will be executed.
   2710           "stepName": [ # The steps associated with the execution stage.
   2711               # Note that stages may have several steps, and that a given step
   2712               # might be run by more than one stage.
   2713             "A String",
   2714           ],
   2715         },
   2716       },
   2717     },
   2718   }
   2719 
   2720   location: string, The location that contains this job.
   2721   x__xgafv: string, V1 error format.
   2722     Allowed values
   2723       1 - v1 error format
   2724       2 - v2 error format
   2725 
   2726 Returns:
   2727   An object of the form:
   2728 
   2729     { # Defines a job to be run by the Cloud Dataflow service.
   2730       "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
   2731           # If this field is set, the service will ensure its uniqueness.
   2732           # The request to create a job will fail if the service has knowledge of a
   2733           # previously submitted job with the same client's ID and job name.
   2734           # The caller may use this field to ensure idempotence of job
   2735           # creation across retried attempts to create a job.
   2736           # By default, the field is empty and, in that case, the service ignores it.
   2737       "requestedState": "A String", # The job's requested state.
   2738           #
   2739           # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
   2740           # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
   2741           # also be used to directly set a job's requested state to
   2742           # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
   2743           # job if it has not already reached a terminal state.
   2744       "name": "A String", # The user-specified Cloud Dataflow job name.
   2745           #
   2746           # Only one Job with a given name may exist in a project at any
   2747           # given time. If a caller attempts to create a Job with the same
   2748           # name as an already-existing Job, the attempt returns the
   2749           # existing Job.
   2750           #
   2751           # The name must match the regular expression
   2752           # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
   2753       "location": "A String", # The location that contains this job.
   2754       "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
   2755           # `JOB_STATE_UPDATED`), this field contains the ID of that job.
   2756       "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
   2757       "currentState": "A String", # The current state of the job.
   2758           #
   2759           # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
   2760           # specified.
   2761           #
   2762           # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
   2763           # terminal state. After a job has reached a terminal state, no
   2764           # further state updates may be made.
   2765           #
   2766           # This field may be mutated by the Cloud Dataflow service;
   2767           # callers cannot mutate it.
   2768       "labels": { # User-defined labels for this job.
   2769           #
   2770           # The labels map can contain no more than 64 entries.  Entries of the labels
   2771           # map are UTF8 strings that comply with the following restrictions:
   2772           #
   2773           # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
   2774           # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
   2775           # * Both keys and values are additionally constrained to be <= 128 bytes in
   2776           # size.
   2777         "a_key": "A String",
   2778       },
   2779       "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
   2780           # corresponding name prefixes of the new job.
   2781         "a_key": "A String",
   2782       },
   2783       "id": "A String", # The unique ID of this job.
   2784           #
   2785           # This field is set by the Cloud Dataflow service when the Job is
   2786           # created, and is immutable for the life of the job.
   2787       "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
   2788         "version": { # A structure describing which components and their versions of the service
   2789             # are required in order to run the job.
   2790           "a_key": "", # Properties of the object.
   2791         },
   2792         "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
   2793             # storage.  The system will append the suffix "/temp-{JOBNAME} to
   2794             # this resource prefix, where {JOBNAME} is the value of the
   2795             # job_name field.  The resulting bucket and object prefix is used
   2796             # as the prefix of the resources used to store temporary data
   2797             # needed during the job execution.  NOTE: This will override the
   2798             # value in taskrunner_settings.
   2799             # The supported resource type is:
   2800             #
   2801             # Google Cloud Storage:
   2802             #
   2803             #   storage.googleapis.com/{bucket}/{object}
   2804             #   bucket.storage.googleapis.com/{object}
   2805         "internalExperiments": { # Experimental settings.
   2806           "a_key": "", # Properties of the object. Contains field @type with type URL.
   2807         },
   2808         "dataset": "A String", # The dataset for the current project where various workflow
   2809             # related tables are stored.
   2810             #
   2811             # The supported resource type is:
   2812             #
   2813             # Google BigQuery:
   2814             #   bigquery.googleapis.com/{dataset}
   2815         "experiments": [ # The list of experiments to enable.
   2816           "A String",
   2817         ],
   2818         "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
   2819         "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
   2820             # options are passed through the service and are used to recreate the
   2821             # SDK pipeline options on the worker in a language agnostic and platform
   2822             # independent way.
   2823           "a_key": "", # Properties of the object.
   2824         },
   2825         "userAgent": { # A description of the process that generated the request.
   2826           "a_key": "", # Properties of the object.
   2827         },
   2828         "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
   2829             # unspecified, the service will attempt to choose a reasonable
   2830             # default.  This should be in the form of the API service name,
   2831             # e.g. "compute.googleapis.com".
   2832         "workerPools": [ # The worker pools. At least one "harness" worker pool must be
   2833             # specified in order for the job to have workers.
   2834           { # Describes one particular pool of Cloud Dataflow workers to be
   2835               # instantiated by the Cloud Dataflow service in order to perform the
   2836               # computations required by a job.  Note that a workflow job may use
   2837               # multiple pools, in order to match the various computational
   2838               # requirements of the various stages of the job.
   2839             "diskSourceImage": "A String", # Fully qualified source image for disks.
   2840             "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
   2841                 # using the standard Dataflow task runner.  Users should ignore
   2842                 # this field.
   2843               "workflowFileName": "A String", # The file to store the workflow in.
   2844               "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
   2845                   # will not be uploaded.
   2846                   #
   2847                   # The supported resource type is:
   2848                   #
   2849                   # Google Cloud Storage:
   2850                   #   storage.googleapis.com/{bucket}/{object}
   2851                   #   bucket.storage.googleapis.com/{object}
   2852               "commandlinesFileName": "A String", # The file to store preprocessing commands in.
   2853               "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
   2854                 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
   2855                 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
   2856                     # "shuffle/v1beta1".
   2857                 "workerId": "A String", # The ID of the worker running this pipeline.
   2858                 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
   2859                     #
   2860                     # When workers access Google Cloud APIs, they logically do so via
   2861                     # relative URLs.  If this field is specified, it supplies the base
   2862                     # URL to use for resolving these relative URLs.  The normative
   2863                     # algorithm used is defined by RFC 1808, "Relative Uniform Resource
   2864                     # Locators".
   2865                     #
   2866                     # If not specified, the default value is "http://www.googleapis.com/"
   2867                 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
   2868                     # "dataflow/v1b3/projects".
   2869                 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
   2870                     # storage.
   2871                     #
   2872                     # The supported resource type is:
   2873                     #
   2874                     # Google Cloud Storage:
   2875                     #
   2876                     #   storage.googleapis.com/{bucket}/{object}
   2877                     #   bucket.storage.googleapis.com/{object}
   2878               },
   2879               "vmId": "A String", # The ID string of the VM.
   2880               "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
   2881               "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
   2882               "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
   2883                   # access the Cloud Dataflow API.
   2884                 "A String",
   2885               ],
   2886               "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
   2887                   # taskrunner; e.g. "root".
   2888               "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
   2889                   #
   2890                   # When workers access Google Cloud APIs, they logically do so via
   2891                   # relative URLs.  If this field is specified, it supplies the base
   2892                   # URL to use for resolving these relative URLs.  The normative
   2893                   # algorithm used is defined by RFC 1808, "Relative Uniform Resource
   2894                   # Locators".
   2895                   #
   2896                   # If not specified, the default value is "http://www.googleapis.com/"
   2897               "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
   2898                   # taskrunner; e.g. "wheel".
   2899               "languageHint": "A String", # The suggested backend language.
   2900               "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
   2901                   # console.
   2902               "streamingWorkerMainClass": "A String", # The streaming worker main class name.
   2903               "logDir": "A String", # The directory on the VM to store logs.
   2904               "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
   2905               "harnessCommand": "A String", # The command to launch the worker harness.
   2906               "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
   2907                   # temporary storage.
   2908                   #
   2909                   # The supported resource type is:
   2910                   #
   2911                   # Google Cloud Storage:
   2912                   #   storage.googleapis.com/{bucket}/{object}
   2913                   #   bucket.storage.googleapis.com/{object}
   2914               "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
   2915             },
   2916             "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
   2917                 # are supported.
   2918             "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
   2919                 # service will attempt to choose a reasonable default.
   2920             "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
   2921                 # the service will use the network "default".
   2922             "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
   2923                 # will attempt to choose a reasonable default.
   2924             "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
   2925                 # attempt to choose a reasonable default.
   2926             "dataDisks": [ # Data disks that are used by a VM in this workflow.
   2927               { # Describes the data disk used by a workflow job.
   2928                 "mountPoint": "A String", # Directory in a VM where disk is mounted.
   2929                 "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
   2930                     # attempt to choose a reasonable default.
   2931                 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
   2932                     # must be a disk type appropriate to the project and zone in which
   2933                     # the workers will run.  If unknown or unspecified, the service
   2934                     # will attempt to choose a reasonable default.
   2935                     #
   2936                     # For example, the standard persistent disk type is a resource name
   2937                     # typically ending in "pd-standard".  If SSD persistent disks are
   2938                     # available, the resource name typically ends with "pd-ssd".  The
   2939                     # actual valid values are defined the Google Compute Engine API,
   2940                     # not by the Cloud Dataflow API; consult the Google Compute Engine
   2941                     # documentation for more information about determining the set of
   2942                     # available disk types for a particular project and zone.
   2943                     #
   2944                     # Google Compute Engine Disk types are local to a particular
   2945                     # project in a particular zone, and so the resource name will
   2946                     # typically look something like this:
   2947                     #
   2948                     # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
   2949               },
   2950             ],
   2951             "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
   2952                 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
   2953                 # `TEARDOWN_NEVER`.
   2954                 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
   2955                 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
   2956                 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
   2957                 # down.
   2958                 #
   2959                 # If the workers are not torn down by the service, they will
   2960                 # continue to run and use Google Compute Engine VM resources in the
   2961                 # user's project until they are explicitly terminated by the user.
   2962                 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
   2963                 # policy except for small, manually supervised test jobs.
   2964                 #
   2965                 # If unknown or unspecified, the service will attempt to choose a reasonable
   2966                 # default.
   2967             "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
   2968                 # Compute Engine API.
   2969             "ipConfiguration": "A String", # Configuration for VM IPs.
   2970             "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
   2971                 # service will choose a number of threads (according to the number of cores
   2972                 # on the selected machine type for batch, or 1 by convention for streaming).
   2973             "poolArgs": { # Extra arguments for this worker pool.
   2974               "a_key": "", # Properties of the object. Contains field @type with type URL.
   2975             },
   2976             "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
   2977                 # execute the job.  If zero or unspecified, the service will
   2978                 # attempt to choose a reasonable default.
   2979             "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
   2980                 # harness, residing in Google Container Registry.
   2981             "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
   2982                 # the form "regions/REGION/subnetworks/SUBNETWORK".
   2983             "packages": [ # Packages to be installed on workers.
   2984               { # The packages that must be installed in order for a worker to run the
   2985                   # steps of the Cloud Dataflow job that will be assigned to its worker
   2986                   # pool.
   2987                   #
   2988                   # This is the mechanism by which the Cloud Dataflow SDK causes code to
   2989                   # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
   2990                   # might use this to install jars containing the user's code and all of the
   2991                   # various dependencies (libraries, data files, etc.) required in order
   2992                   # for that code to run.
   2993                 "location": "A String", # The resource to read the package from. The supported resource type is:
   2994                     #
   2995                     # Google Cloud Storage:
   2996                     #
   2997                     #   storage.googleapis.com/{bucket}
   2998                     #   bucket.storage.googleapis.com/
   2999                 "name": "A String", # The name of the package.
   3000               },
   3001             ],
   3002             "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
   3003               "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
   3004               "algorithm": "A String", # The algorithm to use for autoscaling.
   3005             },
   3006             "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
   3007                 # select a default set of packages which are useful to worker
   3008                 # harnesses written in a particular language.
   3009             "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
   3010                 # attempt to choose a reasonable default.
   3011             "metadata": { # Metadata to set on the Google Compute Engine VMs.
   3012               "a_key": "A String",
   3013             },
   3014           },
   3015         ],
   3016       },
   3017       "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
   3018           # A description of the user pipeline and stages through which it is executed.
   3019           # Created by Cloud Dataflow service.  Only retrieved with
   3020           # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
   3021           # form.  This data is provided by the Dataflow service for ease of visualizing
   3022           # the pipeline and interpretting Dataflow provided metrics.
   3023         "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
   3024           { # Description of the type, names/ids, and input/outputs for a transform.
   3025             "kind": "A String", # Type of transform.
   3026             "name": "A String", # User provided name for this transform instance.
   3027             "inputCollectionName": [ # User names for all collection inputs to this transform.
   3028               "A String",
   3029             ],
   3030             "displayData": [ # Transform-specific display data.
   3031               { # Data provided with a pipeline or transform to provide descriptive info.
   3032                 "shortStrValue": "A String", # A possible additional shorter value to display.
   3033                     # For example a java_class_name_value of com.mypackage.MyDoFn
   3034                     # will be stored with MyDoFn as the short_str_value and
   3035                     # com.mypackage.MyDoFn as the java_class_name value.
   3036                     # short_str_value can be displayed and java_class_name_value
   3037                     # will be displayed as a tooltip.
   3038                 "durationValue": "A String", # Contains value if the data is of duration type.
   3039                 "url": "A String", # An optional full URL.
   3040                 "floatValue": 3.14, # Contains value if the data is of float type.
   3041                 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   3042                     # language namespace (i.e. python module) which defines the display data.
   3043                     # This allows a dax monitoring system to specially handle the data
   3044                     # and perform custom rendering.
   3045                 "javaClassValue": "A String", # Contains value if the data is of java class type.
   3046                 "label": "A String", # An optional label to display in a dax UI for the element.
   3047                 "boolValue": True or False, # Contains value if the data is of a boolean type.
   3048                 "strValue": "A String", # Contains value if the data is of string type.
   3049                 "key": "A String", # The key identifying the display data.
   3050                     # This is intended to be used as a label for the display data
   3051                     # when viewed in a dax monitoring system.
   3052                 "int64Value": "A String", # Contains value if the data is of int64 type.
   3053                 "timestampValue": "A String", # Contains value if the data is of timestamp type.
   3054               },
   3055             ],
   3056             "outputCollectionName": [ # User  names for all collection outputs to this transform.
   3057               "A String",
   3058             ],
   3059             "id": "A String", # SDK generated id of this transform instance.
   3060           },
   3061         ],
   3062         "displayData": [ # Pipeline level display data.
   3063           { # Data provided with a pipeline or transform to provide descriptive info.
   3064             "shortStrValue": "A String", # A possible additional shorter value to display.
   3065                 # For example a java_class_name_value of com.mypackage.MyDoFn
   3066                 # will be stored with MyDoFn as the short_str_value and
   3067                 # com.mypackage.MyDoFn as the java_class_name value.
   3068                 # short_str_value can be displayed and java_class_name_value
   3069                 # will be displayed as a tooltip.
   3070             "durationValue": "A String", # Contains value if the data is of duration type.
   3071             "url": "A String", # An optional full URL.
   3072             "floatValue": 3.14, # Contains value if the data is of float type.
   3073             "namespace": "A String", # The namespace for the key. This is usually a class name or programming
   3074                 # language namespace (i.e. python module) which defines the display data.
   3075                 # This allows a dax monitoring system to specially handle the data
   3076                 # and perform custom rendering.
   3077             "javaClassValue": "A String", # Contains value if the data is of java class type.
   3078             "label": "A String", # An optional label to display in a dax UI for the element.
   3079             "boolValue": True or False, # Contains value if the data is of a boolean type.
   3080             "strValue": "A String", # Contains value if the data is of string type.
   3081             "key": "A String", # The key identifying the display data.
   3082                 # This is intended to be used as a label for the display data
   3083                 # when viewed in a dax monitoring system.
   3084             "int64Value": "A String", # Contains value if the data is of int64 type.
   3085             "timestampValue": "A String", # Contains value if the data is of timestamp type.
   3086           },
   3087         ],
   3088         "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
   3089           { # Description of the composing transforms, names/ids, and input/outputs of a
   3090               # stage of execution.  Some composing transforms and sources may have been
   3091               # generated by the Dataflow service during execution planning.
   3092             "componentSource": [ # Collections produced and consumed by component transforms of this stage.
   3093               { # Description of an interstitial value between transforms in an execution
   3094                   # stage.
   3095                 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   3096                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   3097                     # source is most closely associated.
   3098                 "name": "A String", # Dataflow service generated name for this source.
   3099               },
   3100             ],
   3101             "kind": "A String", # Type of tranform this stage is executing.
   3102             "name": "A String", # Dataflow service generated name for this stage.
   3103             "outputSource": [ # Output sources for this stage.
   3104               { # Description of an input or output of an execution stage.
   3105                 "userName": "A String", # Human-readable name for this source; may be user or system generated.
   3106                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   3107                     # source is most closely associated.
   3108                 "name": "A String", # Dataflow service generated name for this source.
   3109                 "sizeBytes": "A String", # Size of the source, if measurable.
   3110               },
   3111             ],
   3112             "inputSource": [ # Input sources for this stage.
   3113               { # Description of an input or output of an execution stage.
   3114                 "userName": "A String", # Human-readable name for this source; may be user or system generated.
   3115                 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
   3116                     # source is most closely associated.
   3117                 "name": "A String", # Dataflow service generated name for this source.
   3118                 "sizeBytes": "A String", # Size of the source, if measurable.
   3119               },
   3120             ],
   3121             "componentTransform": [ # Transforms that comprise this execution stage.
   3122               { # Description of a transform executed as part of an execution stage.
   3123                 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
   3124                 "originalTransform": "A String", # User name for the original user transform with which this transform is
   3125                     # most closely associated.
   3126                 "name": "A String", # Dataflow service generated name for this source.
   3127               },
   3128             ],
   3129             "id": "A String", # Dataflow service generated id for this stage.
   3130           },
   3131         ],
   3132       },
   3133       "steps": [ # The top-level steps that constitute the entire job.
   3134         { # Defines a particular step within a Cloud Dataflow job.
   3135             #
   3136             # A job consists of multiple steps, each of which performs some
   3137             # specific operation as part of the overall job.  Data is typically
   3138             # passed from one step to another as part of the job.
   3139             #
   3140             # Here's an example of a sequence of steps which together implement a
   3141             # Map-Reduce job:
   3142             #
   3143             #   * Read a collection of data from some source, parsing the
   3144             #     collection's elements.
   3145             #
   3146             #   * Validate the elements.
   3147             #
   3148             #   * Apply a user-defined function to map each element to some value
   3149             #     and extract an element-specific key value.
   3150             #
   3151             #   * Group elements with the same key into a single element with
   3152             #     that key, transforming a multiply-keyed collection into a
   3153             #     uniquely-keyed collection.
   3154             #
   3155             #   * Write the elements out to some data sink.
   3156             #
   3157             # Note that the Cloud Dataflow service may be used to run many different
   3158             # types of jobs, not just Map-Reduce.
   3159           "kind": "A String", # The kind of step in the Cloud Dataflow job.
   3160           "properties": { # Named properties associated with the step. Each kind of
   3161               # predefined step has its own required set of properties.
   3162               # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
   3163             "a_key": "", # Properties of the object.
   3164           },
   3165           "name": "A String", # The name that identifies the step. This must be unique for each
   3166               # step with respect to all other steps in the Cloud Dataflow job.
   3167         },
   3168       ],
   3169       "currentStateTime": "A String", # The timestamp associated with the current state.
   3170       "tempFiles": [ # A set of files the system should be aware of that are used
   3171           # for temporary storage. These temporary files will be
   3172           # removed on job completion.
   3173           # No duplicates are allowed.
   3174           # No file patterns are supported.
   3175           #
   3176           # The supported files are:
   3177           #
   3178           # Google Cloud Storage:
   3179           #
   3180           #    storage.googleapis.com/{bucket}/{object}
   3181           #    bucket.storage.googleapis.com/{object}
   3182         "A String",
   3183       ],
   3184       "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
   3185           # callers cannot mutate it.
   3186         { # A message describing the state of a particular execution stage.
   3187           "executionStageName": "A String", # The name of the execution stage.
   3188           "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
   3189           "currentStateTime": "A String", # The time at which the stage transitioned to this state.
   3190         },
   3191       ],
   3192       "type": "A String", # The type of Cloud Dataflow job.
   3193       "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
   3194           # Cloud Dataflow service.
   3195       "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
   3196           # of the job it replaced.
   3197           #
   3198           # When sending a `CreateJobRequest`, you can update a job by specifying it
   3199           # here. The job named here is stopped, and its intermediate state is
   3200           # transferred to this job.
   3201       "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
   3202           # isn't contained in the submitted job.
   3203         "stages": { # A mapping from each stage to the information about that stage.
   3204           "a_key": { # Contains information about how a particular
   3205               # google.dataflow.v1beta3.Step will be executed.
   3206             "stepName": [ # The steps associated with the execution stage.
   3207                 # Note that stages may have several steps, and that a given step
   3208                 # might be run by more than one stage.
   3209               "A String",
   3210             ],
   3211           },
   3212         },
   3213       },
   3214     }</pre>
   3215 </div>
   3216 
   3217 </body></html>