1 <html><body>
2 <style>
3
4 body, h1, h2, h3, div, span, p, pre, a {
5 margin: 0;
6 padding: 0;
7 border: 0;
8 font-weight: inherit;
9 font-style: inherit;
10 font-size: 100%;
11 font-family: inherit;
12 vertical-align: baseline;
13 }
14
15 body {
16 font-size: 13px;
17 padding: 1em;
18 }
19
20 h1 {
21 font-size: 26px;
22 margin-bottom: 1em;
23 }
24
25 h2 {
26 font-size: 24px;
27 margin-bottom: 1em;
28 }
29
30 h3 {
31 font-size: 20px;
32 margin-bottom: 1em;
33 margin-top: 1em;
34 }
35
36 pre, code {
37 line-height: 1.5;
38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39 }
40
41 pre {
42 margin-top: 0.5em;
43 }
44
45 h1, h2, h3, p {
46 font-family: Arial, sans serif;
47 }
48
49 h1, h2, h3 {
50 border-bottom: solid #CCC 1px;
51 }
52
53 .toc_element {
54 margin-top: 0.5em;
55 }
56
57 .firstline {
58 margin-left: 2 em;
59 }
60
61 .method {
62 margin-top: 1em;
63 border: solid 1px #CCC;
64 padding: 1em;
65 background: #EEE;
66 }
67
68 .details {
69 font-weight: bold;
70 font-size: 14px;
71 }
72
73 </style>
74
75 <h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.locations.html">locations</a> . <a href="dataflow_v1b3.projects.locations.jobs.html">jobs</a></h1>
76 <h2>Instance Methods</h2>
77 <p class="toc_element">
78 <code><a href="dataflow_v1b3.projects.locations.jobs.debug.html">debug()</a></code>
79 </p>
80 <p class="firstline">Returns the debug Resource.</p>
81
82 <p class="toc_element">
83 <code><a href="dataflow_v1b3.projects.locations.jobs.messages.html">messages()</a></code>
84 </p>
85 <p class="firstline">Returns the messages Resource.</p>
86
87 <p class="toc_element">
88 <code><a href="dataflow_v1b3.projects.locations.jobs.workItems.html">workItems()</a></code>
89 </p>
90 <p class="firstline">Returns the workItems Resource.</p>
91
92 <p class="toc_element">
93 <code><a href="#create">create(projectId, location, body, x__xgafv=None, replaceJobId=None, view=None)</a></code></p>
94 <p class="firstline">Creates a Cloud Dataflow job.</p>
95 <p class="toc_element">
96 <code><a href="#get">get(projectId, location, jobId, x__xgafv=None, view=None)</a></code></p>
97 <p class="firstline">Gets the state of the specified Cloud Dataflow job.</p>
98 <p class="toc_element">
99 <code><a href="#getMetrics">getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)</a></code></p>
100 <p class="firstline">Request the job status.</p>
101 <p class="toc_element">
102 <code><a href="#list">list(projectId, location, pageSize=None, x__xgafv=None, pageToken=None, filter=None, view=None)</a></code></p>
103 <p class="firstline">List the jobs of a project.</p>
104 <p class="toc_element">
105 <code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>
106 <p class="firstline">Retrieves the next page of results.</p>
107 <p class="toc_element">
108 <code><a href="#update">update(projectId, location, jobId, body, x__xgafv=None)</a></code></p>
109 <p class="firstline">Updates the state of an existing Cloud Dataflow job.</p>
110 <h3>Method Details</h3>
111 <div class="method">
112 <code class="details" id="create">create(projectId, location, body, x__xgafv=None, replaceJobId=None, view=None)</code>
113 <pre>Creates a Cloud Dataflow job.
114
115 Args:
116 projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
117 location: string, The location that contains this job. (required)
118 body: object, The request body. (required)
119 The object takes the form of:
120
121 { # Defines a job to be run by the Cloud Dataflow service.
122 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
123 # If this field is set, the service will ensure its uniqueness.
124 # The request to create a job will fail if the service has knowledge of a
125 # previously submitted job with the same client's ID and job name.
126 # The caller may use this field to ensure idempotence of job
127 # creation across retried attempts to create a job.
128 # By default, the field is empty and, in that case, the service ignores it.
129 "requestedState": "A String", # The job's requested state.
130 #
131 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
132 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
133 # also be used to directly set a job's requested state to
134 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
135 # job if it has not already reached a terminal state.
136 "name": "A String", # The user-specified Cloud Dataflow job name.
137 #
138 # Only one Job with a given name may exist in a project at any
139 # given time. If a caller attempts to create a Job with the same
140 # name as an already-existing Job, the attempt returns the
141 # existing Job.
142 #
143 # The name must match the regular expression
144 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
145 "location": "A String", # The location that contains this job.
146 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
147 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
148 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
149 "currentState": "A String", # The current state of the job.
150 #
151 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
152 # specified.
153 #
154 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
155 # terminal state. After a job has reached a terminal state, no
156 # further state updates may be made.
157 #
158 # This field may be mutated by the Cloud Dataflow service;
159 # callers cannot mutate it.
160 "labels": { # User-defined labels for this job.
161 #
162 # The labels map can contain no more than 64 entries. Entries of the labels
163 # map are UTF8 strings that comply with the following restrictions:
164 #
165 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
166 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
167 # * Both keys and values are additionally constrained to be <= 128 bytes in
168 # size.
169 "a_key": "A String",
170 },
171 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
172 # corresponding name prefixes of the new job.
173 "a_key": "A String",
174 },
175 "id": "A String", # The unique ID of this job.
176 #
177 # This field is set by the Cloud Dataflow service when the Job is
178 # created, and is immutable for the life of the job.
179 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
180 "version": { # A structure describing which components and their versions of the service
181 # are required in order to run the job.
182 "a_key": "", # Properties of the object.
183 },
184 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
185 # storage. The system will append the suffix "/temp-{JOBNAME} to
186 # this resource prefix, where {JOBNAME} is the value of the
187 # job_name field. The resulting bucket and object prefix is used
188 # as the prefix of the resources used to store temporary data
189 # needed during the job execution. NOTE: This will override the
190 # value in taskrunner_settings.
191 # The supported resource type is:
192 #
193 # Google Cloud Storage:
194 #
195 # storage.googleapis.com/{bucket}/{object}
196 # bucket.storage.googleapis.com/{object}
197 "internalExperiments": { # Experimental settings.
198 "a_key": "", # Properties of the object. Contains field @type with type URL.
199 },
200 "dataset": "A String", # The dataset for the current project where various workflow
201 # related tables are stored.
202 #
203 # The supported resource type is:
204 #
205 # Google BigQuery:
206 # bigquery.googleapis.com/{dataset}
207 "experiments": [ # The list of experiments to enable.
208 "A String",
209 ],
210 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
211 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
212 # options are passed through the service and are used to recreate the
213 # SDK pipeline options on the worker in a language agnostic and platform
214 # independent way.
215 "a_key": "", # Properties of the object.
216 },
217 "userAgent": { # A description of the process that generated the request.
218 "a_key": "", # Properties of the object.
219 },
220 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
221 # unspecified, the service will attempt to choose a reasonable
222 # default. This should be in the form of the API service name,
223 # e.g. "compute.googleapis.com".
224 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
225 # specified in order for the job to have workers.
226 { # Describes one particular pool of Cloud Dataflow workers to be
227 # instantiated by the Cloud Dataflow service in order to perform the
228 # computations required by a job. Note that a workflow job may use
229 # multiple pools, in order to match the various computational
230 # requirements of the various stages of the job.
231 "diskSourceImage": "A String", # Fully qualified source image for disks.
232 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
233 # using the standard Dataflow task runner. Users should ignore
234 # this field.
235 "workflowFileName": "A String", # The file to store the workflow in.
236 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
237 # will not be uploaded.
238 #
239 # The supported resource type is:
240 #
241 # Google Cloud Storage:
242 # storage.googleapis.com/{bucket}/{object}
243 # bucket.storage.googleapis.com/{object}
244 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
245 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
246 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
247 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
248 # "shuffle/v1beta1".
249 "workerId": "A String", # The ID of the worker running this pipeline.
250 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
251 #
252 # When workers access Google Cloud APIs, they logically do so via
253 # relative URLs. If this field is specified, it supplies the base
254 # URL to use for resolving these relative URLs. The normative
255 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
256 # Locators".
257 #
258 # If not specified, the default value is "http://www.googleapis.com/"
259 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
260 # "dataflow/v1b3/projects".
261 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
262 # storage.
263 #
264 # The supported resource type is:
265 #
266 # Google Cloud Storage:
267 #
268 # storage.googleapis.com/{bucket}/{object}
269 # bucket.storage.googleapis.com/{object}
270 },
271 "vmId": "A String", # The ID string of the VM.
272 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
273 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
274 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
275 # access the Cloud Dataflow API.
276 "A String",
277 ],
278 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
279 # taskrunner; e.g. "root".
280 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
281 #
282 # When workers access Google Cloud APIs, they logically do so via
283 # relative URLs. If this field is specified, it supplies the base
284 # URL to use for resolving these relative URLs. The normative
285 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
286 # Locators".
287 #
288 # If not specified, the default value is "http://www.googleapis.com/"
289 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
290 # taskrunner; e.g. "wheel".
291 "languageHint": "A String", # The suggested backend language.
292 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
293 # console.
294 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
295 "logDir": "A String", # The directory on the VM to store logs.
296 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
297 "harnessCommand": "A String", # The command to launch the worker harness.
298 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
299 # temporary storage.
300 #
301 # The supported resource type is:
302 #
303 # Google Cloud Storage:
304 # storage.googleapis.com/{bucket}/{object}
305 # bucket.storage.googleapis.com/{object}
306 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
307 },
308 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
309 # are supported.
310 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
311 # service will attempt to choose a reasonable default.
312 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
313 # the service will use the network "default".
314 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
315 # will attempt to choose a reasonable default.
316 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
317 # attempt to choose a reasonable default.
318 "dataDisks": [ # Data disks that are used by a VM in this workflow.
319 { # Describes the data disk used by a workflow job.
320 "mountPoint": "A String", # Directory in a VM where disk is mounted.
321 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
322 # attempt to choose a reasonable default.
323 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
324 # must be a disk type appropriate to the project and zone in which
325 # the workers will run. If unknown or unspecified, the service
326 # will attempt to choose a reasonable default.
327 #
328 # For example, the standard persistent disk type is a resource name
329 # typically ending in "pd-standard". If SSD persistent disks are
330 # available, the resource name typically ends with "pd-ssd". The
331 # actual valid values are defined the Google Compute Engine API,
332 # not by the Cloud Dataflow API; consult the Google Compute Engine
333 # documentation for more information about determining the set of
334 # available disk types for a particular project and zone.
335 #
336 # Google Compute Engine Disk types are local to a particular
337 # project in a particular zone, and so the resource name will
338 # typically look something like this:
339 #
340 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
341 },
342 ],
343 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
344 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
345 # `TEARDOWN_NEVER`.
346 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
347 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
348 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
349 # down.
350 #
351 # If the workers are not torn down by the service, they will
352 # continue to run and use Google Compute Engine VM resources in the
353 # user's project until they are explicitly terminated by the user.
354 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
355 # policy except for small, manually supervised test jobs.
356 #
357 # If unknown or unspecified, the service will attempt to choose a reasonable
358 # default.
359 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
360 # Compute Engine API.
361 "ipConfiguration": "A String", # Configuration for VM IPs.
362 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
363 # service will choose a number of threads (according to the number of cores
364 # on the selected machine type for batch, or 1 by convention for streaming).
365 "poolArgs": { # Extra arguments for this worker pool.
366 "a_key": "", # Properties of the object. Contains field @type with type URL.
367 },
368 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
369 # execute the job. If zero or unspecified, the service will
370 # attempt to choose a reasonable default.
371 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
372 # harness, residing in Google Container Registry.
373 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
374 # the form "regions/REGION/subnetworks/SUBNETWORK".
375 "packages": [ # Packages to be installed on workers.
376 { # The packages that must be installed in order for a worker to run the
377 # steps of the Cloud Dataflow job that will be assigned to its worker
378 # pool.
379 #
380 # This is the mechanism by which the Cloud Dataflow SDK causes code to
381 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
382 # might use this to install jars containing the user's code and all of the
383 # various dependencies (libraries, data files, etc.) required in order
384 # for that code to run.
385 "location": "A String", # The resource to read the package from. The supported resource type is:
386 #
387 # Google Cloud Storage:
388 #
389 # storage.googleapis.com/{bucket}
390 # bucket.storage.googleapis.com/
391 "name": "A String", # The name of the package.
392 },
393 ],
394 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
395 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
396 "algorithm": "A String", # The algorithm to use for autoscaling.
397 },
398 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
399 # select a default set of packages which are useful to worker
400 # harnesses written in a particular language.
401 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
402 # attempt to choose a reasonable default.
403 "metadata": { # Metadata to set on the Google Compute Engine VMs.
404 "a_key": "A String",
405 },
406 },
407 ],
408 },
409 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
410 # A description of the user pipeline and stages through which it is executed.
411 # Created by Cloud Dataflow service. Only retrieved with
412 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
413 # form. This data is provided by the Dataflow service for ease of visualizing
414 # the pipeline and interpretting Dataflow provided metrics.
415 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
416 { # Description of the type, names/ids, and input/outputs for a transform.
417 "kind": "A String", # Type of transform.
418 "name": "A String", # User provided name for this transform instance.
419 "inputCollectionName": [ # User names for all collection inputs to this transform.
420 "A String",
421 ],
422 "displayData": [ # Transform-specific display data.
423 { # Data provided with a pipeline or transform to provide descriptive info.
424 "shortStrValue": "A String", # A possible additional shorter value to display.
425 # For example a java_class_name_value of com.mypackage.MyDoFn
426 # will be stored with MyDoFn as the short_str_value and
427 # com.mypackage.MyDoFn as the java_class_name value.
428 # short_str_value can be displayed and java_class_name_value
429 # will be displayed as a tooltip.
430 "durationValue": "A String", # Contains value if the data is of duration type.
431 "url": "A String", # An optional full URL.
432 "floatValue": 3.14, # Contains value if the data is of float type.
433 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
434 # language namespace (i.e. python module) which defines the display data.
435 # This allows a dax monitoring system to specially handle the data
436 # and perform custom rendering.
437 "javaClassValue": "A String", # Contains value if the data is of java class type.
438 "label": "A String", # An optional label to display in a dax UI for the element.
439 "boolValue": True or False, # Contains value if the data is of a boolean type.
440 "strValue": "A String", # Contains value if the data is of string type.
441 "key": "A String", # The key identifying the display data.
442 # This is intended to be used as a label for the display data
443 # when viewed in a dax monitoring system.
444 "int64Value": "A String", # Contains value if the data is of int64 type.
445 "timestampValue": "A String", # Contains value if the data is of timestamp type.
446 },
447 ],
448 "outputCollectionName": [ # User names for all collection outputs to this transform.
449 "A String",
450 ],
451 "id": "A String", # SDK generated id of this transform instance.
452 },
453 ],
454 "displayData": [ # Pipeline level display data.
455 { # Data provided with a pipeline or transform to provide descriptive info.
456 "shortStrValue": "A String", # A possible additional shorter value to display.
457 # For example a java_class_name_value of com.mypackage.MyDoFn
458 # will be stored with MyDoFn as the short_str_value and
459 # com.mypackage.MyDoFn as the java_class_name value.
460 # short_str_value can be displayed and java_class_name_value
461 # will be displayed as a tooltip.
462 "durationValue": "A String", # Contains value if the data is of duration type.
463 "url": "A String", # An optional full URL.
464 "floatValue": 3.14, # Contains value if the data is of float type.
465 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
466 # language namespace (i.e. python module) which defines the display data.
467 # This allows a dax monitoring system to specially handle the data
468 # and perform custom rendering.
469 "javaClassValue": "A String", # Contains value if the data is of java class type.
470 "label": "A String", # An optional label to display in a dax UI for the element.
471 "boolValue": True or False, # Contains value if the data is of a boolean type.
472 "strValue": "A String", # Contains value if the data is of string type.
473 "key": "A String", # The key identifying the display data.
474 # This is intended to be used as a label for the display data
475 # when viewed in a dax monitoring system.
476 "int64Value": "A String", # Contains value if the data is of int64 type.
477 "timestampValue": "A String", # Contains value if the data is of timestamp type.
478 },
479 ],
480 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
481 { # Description of the composing transforms, names/ids, and input/outputs of a
482 # stage of execution. Some composing transforms and sources may have been
483 # generated by the Dataflow service during execution planning.
484 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
485 { # Description of an interstitial value between transforms in an execution
486 # stage.
487 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
488 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
489 # source is most closely associated.
490 "name": "A String", # Dataflow service generated name for this source.
491 },
492 ],
493 "kind": "A String", # Type of tranform this stage is executing.
494 "name": "A String", # Dataflow service generated name for this stage.
495 "outputSource": [ # Output sources for this stage.
496 { # Description of an input or output of an execution stage.
497 "userName": "A String", # Human-readable name for this source; may be user or system generated.
498 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
499 # source is most closely associated.
500 "name": "A String", # Dataflow service generated name for this source.
501 "sizeBytes": "A String", # Size of the source, if measurable.
502 },
503 ],
504 "inputSource": [ # Input sources for this stage.
505 { # Description of an input or output of an execution stage.
506 "userName": "A String", # Human-readable name for this source; may be user or system generated.
507 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
508 # source is most closely associated.
509 "name": "A String", # Dataflow service generated name for this source.
510 "sizeBytes": "A String", # Size of the source, if measurable.
511 },
512 ],
513 "componentTransform": [ # Transforms that comprise this execution stage.
514 { # Description of a transform executed as part of an execution stage.
515 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
516 "originalTransform": "A String", # User name for the original user transform with which this transform is
517 # most closely associated.
518 "name": "A String", # Dataflow service generated name for this source.
519 },
520 ],
521 "id": "A String", # Dataflow service generated id for this stage.
522 },
523 ],
524 },
525 "steps": [ # The top-level steps that constitute the entire job.
526 { # Defines a particular step within a Cloud Dataflow job.
527 #
528 # A job consists of multiple steps, each of which performs some
529 # specific operation as part of the overall job. Data is typically
530 # passed from one step to another as part of the job.
531 #
532 # Here's an example of a sequence of steps which together implement a
533 # Map-Reduce job:
534 #
535 # * Read a collection of data from some source, parsing the
536 # collection's elements.
537 #
538 # * Validate the elements.
539 #
540 # * Apply a user-defined function to map each element to some value
541 # and extract an element-specific key value.
542 #
543 # * Group elements with the same key into a single element with
544 # that key, transforming a multiply-keyed collection into a
545 # uniquely-keyed collection.
546 #
547 # * Write the elements out to some data sink.
548 #
549 # Note that the Cloud Dataflow service may be used to run many different
550 # types of jobs, not just Map-Reduce.
551 "kind": "A String", # The kind of step in the Cloud Dataflow job.
552 "properties": { # Named properties associated with the step. Each kind of
553 # predefined step has its own required set of properties.
554 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
555 "a_key": "", # Properties of the object.
556 },
557 "name": "A String", # The name that identifies the step. This must be unique for each
558 # step with respect to all other steps in the Cloud Dataflow job.
559 },
560 ],
561 "currentStateTime": "A String", # The timestamp associated with the current state.
562 "tempFiles": [ # A set of files the system should be aware of that are used
563 # for temporary storage. These temporary files will be
564 # removed on job completion.
565 # No duplicates are allowed.
566 # No file patterns are supported.
567 #
568 # The supported files are:
569 #
570 # Google Cloud Storage:
571 #
572 # storage.googleapis.com/{bucket}/{object}
573 # bucket.storage.googleapis.com/{object}
574 "A String",
575 ],
576 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
577 # callers cannot mutate it.
578 { # A message describing the state of a particular execution stage.
579 "executionStageName": "A String", # The name of the execution stage.
580 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
581 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
582 },
583 ],
584 "type": "A String", # The type of Cloud Dataflow job.
585 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
586 # Cloud Dataflow service.
587 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
588 # of the job it replaced.
589 #
590 # When sending a `CreateJobRequest`, you can update a job by specifying it
591 # here. The job named here is stopped, and its intermediate state is
592 # transferred to this job.
593 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
594 # isn't contained in the submitted job.
595 "stages": { # A mapping from each stage to the information about that stage.
596 "a_key": { # Contains information about how a particular
597 # google.dataflow.v1beta3.Step will be executed.
598 "stepName": [ # The steps associated with the execution stage.
599 # Note that stages may have several steps, and that a given step
600 # might be run by more than one stage.
601 "A String",
602 ],
603 },
604 },
605 },
606 }
607
608 x__xgafv: string, V1 error format.
609 Allowed values
610 1 - v1 error format
611 2 - v2 error format
612 replaceJobId: string, Deprecated. This field is now in the Job message.
613 view: string, The level of information requested in response.
614
615 Returns:
616 An object of the form:
617
618 { # Defines a job to be run by the Cloud Dataflow service.
619 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
620 # If this field is set, the service will ensure its uniqueness.
621 # The request to create a job will fail if the service has knowledge of a
622 # previously submitted job with the same client's ID and job name.
623 # The caller may use this field to ensure idempotence of job
624 # creation across retried attempts to create a job.
625 # By default, the field is empty and, in that case, the service ignores it.
626 "requestedState": "A String", # The job's requested state.
627 #
628 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
629 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
630 # also be used to directly set a job's requested state to
631 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
632 # job if it has not already reached a terminal state.
633 "name": "A String", # The user-specified Cloud Dataflow job name.
634 #
635 # Only one Job with a given name may exist in a project at any
636 # given time. If a caller attempts to create a Job with the same
637 # name as an already-existing Job, the attempt returns the
638 # existing Job.
639 #
640 # The name must match the regular expression
641 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
642 "location": "A String", # The location that contains this job.
643 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
644 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
645 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
646 "currentState": "A String", # The current state of the job.
647 #
648 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
649 # specified.
650 #
651 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
652 # terminal state. After a job has reached a terminal state, no
653 # further state updates may be made.
654 #
655 # This field may be mutated by the Cloud Dataflow service;
656 # callers cannot mutate it.
657 "labels": { # User-defined labels for this job.
658 #
659 # The labels map can contain no more than 64 entries. Entries of the labels
660 # map are UTF8 strings that comply with the following restrictions:
661 #
662 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
663 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
664 # * Both keys and values are additionally constrained to be <= 128 bytes in
665 # size.
666 "a_key": "A String",
667 },
668 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
669 # corresponding name prefixes of the new job.
670 "a_key": "A String",
671 },
672 "id": "A String", # The unique ID of this job.
673 #
674 # This field is set by the Cloud Dataflow service when the Job is
675 # created, and is immutable for the life of the job.
676 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
677 "version": { # A structure describing which components and their versions of the service
678 # are required in order to run the job.
679 "a_key": "", # Properties of the object.
680 },
681 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
682 # storage. The system will append the suffix "/temp-{JOBNAME} to
683 # this resource prefix, where {JOBNAME} is the value of the
684 # job_name field. The resulting bucket and object prefix is used
685 # as the prefix of the resources used to store temporary data
686 # needed during the job execution. NOTE: This will override the
687 # value in taskrunner_settings.
688 # The supported resource type is:
689 #
690 # Google Cloud Storage:
691 #
692 # storage.googleapis.com/{bucket}/{object}
693 # bucket.storage.googleapis.com/{object}
694 "internalExperiments": { # Experimental settings.
695 "a_key": "", # Properties of the object. Contains field @type with type URL.
696 },
697 "dataset": "A String", # The dataset for the current project where various workflow
698 # related tables are stored.
699 #
700 # The supported resource type is:
701 #
702 # Google BigQuery:
703 # bigquery.googleapis.com/{dataset}
704 "experiments": [ # The list of experiments to enable.
705 "A String",
706 ],
707 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
708 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
709 # options are passed through the service and are used to recreate the
710 # SDK pipeline options on the worker in a language agnostic and platform
711 # independent way.
712 "a_key": "", # Properties of the object.
713 },
714 "userAgent": { # A description of the process that generated the request.
715 "a_key": "", # Properties of the object.
716 },
717 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
718 # unspecified, the service will attempt to choose a reasonable
719 # default. This should be in the form of the API service name,
720 # e.g. "compute.googleapis.com".
721 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
722 # specified in order for the job to have workers.
723 { # Describes one particular pool of Cloud Dataflow workers to be
724 # instantiated by the Cloud Dataflow service in order to perform the
725 # computations required by a job. Note that a workflow job may use
726 # multiple pools, in order to match the various computational
727 # requirements of the various stages of the job.
728 "diskSourceImage": "A String", # Fully qualified source image for disks.
729 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
730 # using the standard Dataflow task runner. Users should ignore
731 # this field.
732 "workflowFileName": "A String", # The file to store the workflow in.
733 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
734 # will not be uploaded.
735 #
736 # The supported resource type is:
737 #
738 # Google Cloud Storage:
739 # storage.googleapis.com/{bucket}/{object}
740 # bucket.storage.googleapis.com/{object}
741 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
742 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
743 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
744 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
745 # "shuffle/v1beta1".
746 "workerId": "A String", # The ID of the worker running this pipeline.
747 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
748 #
749 # When workers access Google Cloud APIs, they logically do so via
750 # relative URLs. If this field is specified, it supplies the base
751 # URL to use for resolving these relative URLs. The normative
752 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
753 # Locators".
754 #
755 # If not specified, the default value is "http://www.googleapis.com/"
756 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
757 # "dataflow/v1b3/projects".
758 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
759 # storage.
760 #
761 # The supported resource type is:
762 #
763 # Google Cloud Storage:
764 #
765 # storage.googleapis.com/{bucket}/{object}
766 # bucket.storage.googleapis.com/{object}
767 },
768 "vmId": "A String", # The ID string of the VM.
769 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
770 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
771 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
772 # access the Cloud Dataflow API.
773 "A String",
774 ],
775 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
776 # taskrunner; e.g. "root".
777 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
778 #
779 # When workers access Google Cloud APIs, they logically do so via
780 # relative URLs. If this field is specified, it supplies the base
781 # URL to use for resolving these relative URLs. The normative
782 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
783 # Locators".
784 #
785 # If not specified, the default value is "http://www.googleapis.com/"
786 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
787 # taskrunner; e.g. "wheel".
788 "languageHint": "A String", # The suggested backend language.
789 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
790 # console.
791 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
792 "logDir": "A String", # The directory on the VM to store logs.
793 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
794 "harnessCommand": "A String", # The command to launch the worker harness.
795 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
796 # temporary storage.
797 #
798 # The supported resource type is:
799 #
800 # Google Cloud Storage:
801 # storage.googleapis.com/{bucket}/{object}
802 # bucket.storage.googleapis.com/{object}
803 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
804 },
805 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
806 # are supported.
807 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
808 # service will attempt to choose a reasonable default.
809 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
810 # the service will use the network "default".
811 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
812 # will attempt to choose a reasonable default.
813 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
814 # attempt to choose a reasonable default.
815 "dataDisks": [ # Data disks that are used by a VM in this workflow.
816 { # Describes the data disk used by a workflow job.
817 "mountPoint": "A String", # Directory in a VM where disk is mounted.
818 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
819 # attempt to choose a reasonable default.
820 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
821 # must be a disk type appropriate to the project and zone in which
822 # the workers will run. If unknown or unspecified, the service
823 # will attempt to choose a reasonable default.
824 #
825 # For example, the standard persistent disk type is a resource name
826 # typically ending in "pd-standard". If SSD persistent disks are
827 # available, the resource name typically ends with "pd-ssd". The
828 # actual valid values are defined the Google Compute Engine API,
829 # not by the Cloud Dataflow API; consult the Google Compute Engine
830 # documentation for more information about determining the set of
831 # available disk types for a particular project and zone.
832 #
833 # Google Compute Engine Disk types are local to a particular
834 # project in a particular zone, and so the resource name will
835 # typically look something like this:
836 #
837 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
838 },
839 ],
840 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
841 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
842 # `TEARDOWN_NEVER`.
843 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
844 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
845 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
846 # down.
847 #
848 # If the workers are not torn down by the service, they will
849 # continue to run and use Google Compute Engine VM resources in the
850 # user's project until they are explicitly terminated by the user.
851 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
852 # policy except for small, manually supervised test jobs.
853 #
854 # If unknown or unspecified, the service will attempt to choose a reasonable
855 # default.
856 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
857 # Compute Engine API.
858 "ipConfiguration": "A String", # Configuration for VM IPs.
859 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
860 # service will choose a number of threads (according to the number of cores
861 # on the selected machine type for batch, or 1 by convention for streaming).
862 "poolArgs": { # Extra arguments for this worker pool.
863 "a_key": "", # Properties of the object. Contains field @type with type URL.
864 },
865 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
866 # execute the job. If zero or unspecified, the service will
867 # attempt to choose a reasonable default.
868 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
869 # harness, residing in Google Container Registry.
870 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
871 # the form "regions/REGION/subnetworks/SUBNETWORK".
872 "packages": [ # Packages to be installed on workers.
873 { # The packages that must be installed in order for a worker to run the
874 # steps of the Cloud Dataflow job that will be assigned to its worker
875 # pool.
876 #
877 # This is the mechanism by which the Cloud Dataflow SDK causes code to
878 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
879 # might use this to install jars containing the user's code and all of the
880 # various dependencies (libraries, data files, etc.) required in order
881 # for that code to run.
882 "location": "A String", # The resource to read the package from. The supported resource type is:
883 #
884 # Google Cloud Storage:
885 #
886 # storage.googleapis.com/{bucket}
887 # bucket.storage.googleapis.com/
888 "name": "A String", # The name of the package.
889 },
890 ],
891 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
892 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
893 "algorithm": "A String", # The algorithm to use for autoscaling.
894 },
895 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
896 # select a default set of packages which are useful to worker
897 # harnesses written in a particular language.
898 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
899 # attempt to choose a reasonable default.
900 "metadata": { # Metadata to set on the Google Compute Engine VMs.
901 "a_key": "A String",
902 },
903 },
904 ],
905 },
906 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
907 # A description of the user pipeline and stages through which it is executed.
908 # Created by Cloud Dataflow service. Only retrieved with
909 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
910 # form. This data is provided by the Dataflow service for ease of visualizing
911 # the pipeline and interpretting Dataflow provided metrics.
912 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
913 { # Description of the type, names/ids, and input/outputs for a transform.
914 "kind": "A String", # Type of transform.
915 "name": "A String", # User provided name for this transform instance.
916 "inputCollectionName": [ # User names for all collection inputs to this transform.
917 "A String",
918 ],
919 "displayData": [ # Transform-specific display data.
920 { # Data provided with a pipeline or transform to provide descriptive info.
921 "shortStrValue": "A String", # A possible additional shorter value to display.
922 # For example a java_class_name_value of com.mypackage.MyDoFn
923 # will be stored with MyDoFn as the short_str_value and
924 # com.mypackage.MyDoFn as the java_class_name value.
925 # short_str_value can be displayed and java_class_name_value
926 # will be displayed as a tooltip.
927 "durationValue": "A String", # Contains value if the data is of duration type.
928 "url": "A String", # An optional full URL.
929 "floatValue": 3.14, # Contains value if the data is of float type.
930 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
931 # language namespace (i.e. python module) which defines the display data.
932 # This allows a dax monitoring system to specially handle the data
933 # and perform custom rendering.
934 "javaClassValue": "A String", # Contains value if the data is of java class type.
935 "label": "A String", # An optional label to display in a dax UI for the element.
936 "boolValue": True or False, # Contains value if the data is of a boolean type.
937 "strValue": "A String", # Contains value if the data is of string type.
938 "key": "A String", # The key identifying the display data.
939 # This is intended to be used as a label for the display data
940 # when viewed in a dax monitoring system.
941 "int64Value": "A String", # Contains value if the data is of int64 type.
942 "timestampValue": "A String", # Contains value if the data is of timestamp type.
943 },
944 ],
945 "outputCollectionName": [ # User names for all collection outputs to this transform.
946 "A String",
947 ],
948 "id": "A String", # SDK generated id of this transform instance.
949 },
950 ],
951 "displayData": [ # Pipeline level display data.
952 { # Data provided with a pipeline or transform to provide descriptive info.
953 "shortStrValue": "A String", # A possible additional shorter value to display.
954 # For example a java_class_name_value of com.mypackage.MyDoFn
955 # will be stored with MyDoFn as the short_str_value and
956 # com.mypackage.MyDoFn as the java_class_name value.
957 # short_str_value can be displayed and java_class_name_value
958 # will be displayed as a tooltip.
959 "durationValue": "A String", # Contains value if the data is of duration type.
960 "url": "A String", # An optional full URL.
961 "floatValue": 3.14, # Contains value if the data is of float type.
962 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
963 # language namespace (i.e. python module) which defines the display data.
964 # This allows a dax monitoring system to specially handle the data
965 # and perform custom rendering.
966 "javaClassValue": "A String", # Contains value if the data is of java class type.
967 "label": "A String", # An optional label to display in a dax UI for the element.
968 "boolValue": True or False, # Contains value if the data is of a boolean type.
969 "strValue": "A String", # Contains value if the data is of string type.
970 "key": "A String", # The key identifying the display data.
971 # This is intended to be used as a label for the display data
972 # when viewed in a dax monitoring system.
973 "int64Value": "A String", # Contains value if the data is of int64 type.
974 "timestampValue": "A String", # Contains value if the data is of timestamp type.
975 },
976 ],
977 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
978 { # Description of the composing transforms, names/ids, and input/outputs of a
979 # stage of execution. Some composing transforms and sources may have been
980 # generated by the Dataflow service during execution planning.
981 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
982 { # Description of an interstitial value between transforms in an execution
983 # stage.
984 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
985 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
986 # source is most closely associated.
987 "name": "A String", # Dataflow service generated name for this source.
988 },
989 ],
990 "kind": "A String", # Type of tranform this stage is executing.
991 "name": "A String", # Dataflow service generated name for this stage.
992 "outputSource": [ # Output sources for this stage.
993 { # Description of an input or output of an execution stage.
994 "userName": "A String", # Human-readable name for this source; may be user or system generated.
995 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
996 # source is most closely associated.
997 "name": "A String", # Dataflow service generated name for this source.
998 "sizeBytes": "A String", # Size of the source, if measurable.
999 },
1000 ],
1001 "inputSource": [ # Input sources for this stage.
1002 { # Description of an input or output of an execution stage.
1003 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1004 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1005 # source is most closely associated.
1006 "name": "A String", # Dataflow service generated name for this source.
1007 "sizeBytes": "A String", # Size of the source, if measurable.
1008 },
1009 ],
1010 "componentTransform": [ # Transforms that comprise this execution stage.
1011 { # Description of a transform executed as part of an execution stage.
1012 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1013 "originalTransform": "A String", # User name for the original user transform with which this transform is
1014 # most closely associated.
1015 "name": "A String", # Dataflow service generated name for this source.
1016 },
1017 ],
1018 "id": "A String", # Dataflow service generated id for this stage.
1019 },
1020 ],
1021 },
1022 "steps": [ # The top-level steps that constitute the entire job.
1023 { # Defines a particular step within a Cloud Dataflow job.
1024 #
1025 # A job consists of multiple steps, each of which performs some
1026 # specific operation as part of the overall job. Data is typically
1027 # passed from one step to another as part of the job.
1028 #
1029 # Here's an example of a sequence of steps which together implement a
1030 # Map-Reduce job:
1031 #
1032 # * Read a collection of data from some source, parsing the
1033 # collection's elements.
1034 #
1035 # * Validate the elements.
1036 #
1037 # * Apply a user-defined function to map each element to some value
1038 # and extract an element-specific key value.
1039 #
1040 # * Group elements with the same key into a single element with
1041 # that key, transforming a multiply-keyed collection into a
1042 # uniquely-keyed collection.
1043 #
1044 # * Write the elements out to some data sink.
1045 #
1046 # Note that the Cloud Dataflow service may be used to run many different
1047 # types of jobs, not just Map-Reduce.
1048 "kind": "A String", # The kind of step in the Cloud Dataflow job.
1049 "properties": { # Named properties associated with the step. Each kind of
1050 # predefined step has its own required set of properties.
1051 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
1052 "a_key": "", # Properties of the object.
1053 },
1054 "name": "A String", # The name that identifies the step. This must be unique for each
1055 # step with respect to all other steps in the Cloud Dataflow job.
1056 },
1057 ],
1058 "currentStateTime": "A String", # The timestamp associated with the current state.
1059 "tempFiles": [ # A set of files the system should be aware of that are used
1060 # for temporary storage. These temporary files will be
1061 # removed on job completion.
1062 # No duplicates are allowed.
1063 # No file patterns are supported.
1064 #
1065 # The supported files are:
1066 #
1067 # Google Cloud Storage:
1068 #
1069 # storage.googleapis.com/{bucket}/{object}
1070 # bucket.storage.googleapis.com/{object}
1071 "A String",
1072 ],
1073 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1074 # callers cannot mutate it.
1075 { # A message describing the state of a particular execution stage.
1076 "executionStageName": "A String", # The name of the execution stage.
1077 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1078 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1079 },
1080 ],
1081 "type": "A String", # The type of Cloud Dataflow job.
1082 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1083 # Cloud Dataflow service.
1084 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1085 # of the job it replaced.
1086 #
1087 # When sending a `CreateJobRequest`, you can update a job by specifying it
1088 # here. The job named here is stopped, and its intermediate state is
1089 # transferred to this job.
1090 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1091 # isn't contained in the submitted job.
1092 "stages": { # A mapping from each stage to the information about that stage.
1093 "a_key": { # Contains information about how a particular
1094 # google.dataflow.v1beta3.Step will be executed.
1095 "stepName": [ # The steps associated with the execution stage.
1096 # Note that stages may have several steps, and that a given step
1097 # might be run by more than one stage.
1098 "A String",
1099 ],
1100 },
1101 },
1102 },
1103 }
1104
1107 get(projectId, location, jobId, x__xgafv=None, view=None)
1108 Gets the state of the specified Cloud Dataflow job.
1109
1110 Args:
1111 projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
1112 location: string, The location that contains this job. (required)
1113 jobId: string, The job ID. (required)
1114 x__xgafv: string, V1 error format.
1115 Allowed values
1116 1 - v1 error format
1117 2 - v2 error format
1118 view: string, The level of information requested in response.
1119
1120 Returns:
1121 An object of the form:
1122
1123 { # Defines a job to be run by the Cloud Dataflow service.
1124 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1125 # If this field is set, the service will ensure its uniqueness.
1126 # The request to create a job will fail if the service has knowledge of a
1127 # previously submitted job with the same client's ID and job name.
1128 # The caller may use this field to ensure idempotence of job
1129 # creation across retried attempts to create a job.
1130 # By default, the field is empty and, in that case, the service ignores it.
1131 "requestedState": "A String", # The job's requested state.
1132 #
1133 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1134 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
1135 # also be used to directly set a job's requested state to
1136 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1137 # job if it has not already reached a terminal state.
1138 "name": "A String", # The user-specified Cloud Dataflow job name.
1139 #
1140 # Only one Job with a given name may exist in a project at any
1141 # given time. If a caller attempts to create a Job with the same
1142 # name as an already-existing Job, the attempt returns the
1143 # existing Job.
1144 #
1145 # The name must match the regular expression
1146 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1147 "location": "A String", # The location that contains this job.
1148 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1149 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1150 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1151 "currentState": "A String", # The current state of the job.
1152 #
1153 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1154 # specified.
1155 #
1156 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1157 # terminal state. After a job has reached a terminal state, no
1158 # further state updates may be made.
1159 #
1160 # This field may be mutated by the Cloud Dataflow service;
1161 # callers cannot mutate it.
1162 "labels": { # User-defined labels for this job.
1163 #
1164 # The labels map can contain no more than 64 entries. Entries of the labels
1165 # map are UTF8 strings that comply with the following restrictions:
1166 #
1167 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
1168 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1169 # * Both keys and values are additionally constrained to be <= 128 bytes in
1170 # size.
1171 "a_key": "A String",
1172 },
1173 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1174 # corresponding name prefixes of the new job.
1175 "a_key": "A String",
1176 },
1177 "id": "A String", # The unique ID of this job.
1178 #
1179 # This field is set by the Cloud Dataflow service when the Job is
1180 # created, and is immutable for the life of the job.
1181 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1182 "version": { # A structure describing which components and their versions of the service
1183 # are required in order to run the job.
1184 "a_key": "", # Properties of the object.
1185 },
1186 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1187 # storage. The system will append the suffix "/temp-{JOBNAME} to
1188 # this resource prefix, where {JOBNAME} is the value of the
1189 # job_name field. The resulting bucket and object prefix is used
1190 # as the prefix of the resources used to store temporary data
1191 # needed during the job execution. NOTE: This will override the
1192 # value in taskrunner_settings.
1193 # The supported resource type is:
1194 #
1195 # Google Cloud Storage:
1196 #
1197 # storage.googleapis.com/{bucket}/{object}
1198 # bucket.storage.googleapis.com/{object}
1199 "internalExperiments": { # Experimental settings.
1200 "a_key": "", # Properties of the object. Contains field @type with type URL.
1201 },
1202 "dataset": "A String", # The dataset for the current project where various workflow
1203 # related tables are stored.
1204 #
1205 # The supported resource type is:
1206 #
1207 # Google BigQuery:
1208 # bigquery.googleapis.com/{dataset}
1209 "experiments": [ # The list of experiments to enable.
1210 "A String",
1211 ],
1212 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
1213 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1214 # options are passed through the service and are used to recreate the
1215 # SDK pipeline options on the worker in a language agnostic and platform
1216 # independent way.
1217 "a_key": "", # Properties of the object.
1218 },
1219 "userAgent": { # A description of the process that generated the request.
1220 "a_key": "", # Properties of the object.
1221 },
1222 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
1223 # unspecified, the service will attempt to choose a reasonable
1224 # default. This should be in the form of the API service name,
1225 # e.g. "compute.googleapis.com".
1226 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1227 # specified in order for the job to have workers.
1228 { # Describes one particular pool of Cloud Dataflow workers to be
1229 # instantiated by the Cloud Dataflow service in order to perform the
1230 # computations required by a job. Note that a workflow job may use
1231 # multiple pools, in order to match the various computational
1232 # requirements of the various stages of the job.
1233 "diskSourceImage": "A String", # Fully qualified source image for disks.
1234 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1235 # using the standard Dataflow task runner. Users should ignore
1236 # this field.
1237 "workflowFileName": "A String", # The file to store the workflow in.
1238 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
1239 # will not be uploaded.
1240 #
1241 # The supported resource type is:
1242 #
1243 # Google Cloud Storage:
1244 # storage.googleapis.com/{bucket}/{object}
1245 # bucket.storage.googleapis.com/{object}
1246 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1247 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1248 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1249 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1250 # "shuffle/v1beta1".
1251 "workerId": "A String", # The ID of the worker running this pipeline.
1252 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1253 #
1254 # When workers access Google Cloud APIs, they logically do so via
1255 # relative URLs. If this field is specified, it supplies the base
1256 # URL to use for resolving these relative URLs. The normative
1257 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1258 # Locators".
1259 #
1260 # If not specified, the default value is "http://www.googleapis.com/"
1261 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1262 # "dataflow/v1b3/projects".
1263 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1264 # storage.
1265 #
1266 # The supported resource type is:
1267 #
1268 # Google Cloud Storage:
1269 #
1270 # storage.googleapis.com/{bucket}/{object}
1271 # bucket.storage.googleapis.com/{object}
1272 },
1273 "vmId": "A String", # The ID string of the VM.
1274 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1275 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1276 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1277 # access the Cloud Dataflow API.
1278 "A String",
1279 ],
1280 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1281 # taskrunner; e.g. "root".
1282 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1283 #
1284 # When workers access Google Cloud APIs, they logically do so via
1285 # relative URLs. If this field is specified, it supplies the base
1286 # URL to use for resolving these relative URLs. The normative
1287 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1288 # Locators".
1289 #
1290 # If not specified, the default value is "http://www.googleapis.com/"
1291 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1292 # taskrunner; e.g. "wheel".
1293 "languageHint": "A String", # The suggested backend language.
1294 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1295 # console.
1296 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1297 "logDir": "A String", # The directory on the VM to store logs.
1298 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
1299 "harnessCommand": "A String", # The command to launch the worker harness.
1300 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1301 # temporary storage.
1302 #
1303 # The supported resource type is:
1304 #
1305 # Google Cloud Storage:
1306 # storage.googleapis.com/{bucket}/{object}
1307 # bucket.storage.googleapis.com/{object}
1308 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1309 },
1310 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1311 # are supported.
1312 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
1313 # service will attempt to choose a reasonable default.
1314 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
1315 # the service will use the network "default".
1316 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
1317 # will attempt to choose a reasonable default.
1318 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
1319 # attempt to choose a reasonable default.
1320 "dataDisks": [ # Data disks that are used by a VM in this workflow.
1321 { # Describes the data disk used by a workflow job.
1322 "mountPoint": "A String", # Directory in a VM where disk is mounted.
1323 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
1324 # attempt to choose a reasonable default.
1325 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
1326 # must be a disk type appropriate to the project and zone in which
1327 # the workers will run. If unknown or unspecified, the service
1328 # will attempt to choose a reasonable default.
1329 #
1330 # For example, the standard persistent disk type is a resource name
1331 # typically ending in "pd-standard". If SSD persistent disks are
1332 # available, the resource name typically ends with "pd-ssd". The
1333 # actual valid values are defined the Google Compute Engine API,
1334 # not by the Cloud Dataflow API; consult the Google Compute Engine
1335 # documentation for more information about determining the set of
1336 # available disk types for a particular project and zone.
1337 #
1338 # Google Compute Engine Disk types are local to a particular
1339 # project in a particular zone, and so the resource name will
1340 # typically look something like this:
1341 #
1342 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
1343 },
1344 ],
1345 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1346 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1347 # `TEARDOWN_NEVER`.
1348 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1349 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1350 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1351 # down.
1352 #
1353 # If the workers are not torn down by the service, they will
1354 # continue to run and use Google Compute Engine VM resources in the
1355 # user's project until they are explicitly terminated by the user.
1356 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1357 # policy except for small, manually supervised test jobs.
1358 #
1359 # If unknown or unspecified, the service will attempt to choose a reasonable
1360 # default.
1361 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1362 # Compute Engine API.
1363 "ipConfiguration": "A String", # Configuration for VM IPs.
1364 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1365 # service will choose a number of threads (according to the number of cores
1366 # on the selected machine type for batch, or 1 by convention for streaming).
1367 "poolArgs": { # Extra arguments for this worker pool.
1368 "a_key": "", # Properties of the object. Contains field @type with type URL.
1369 },
1370 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1371 # execute the job. If zero or unspecified, the service will
1372 # attempt to choose a reasonable default.
1373 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1374 # harness, residing in Google Container Registry.
1375 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
1376 # the form "regions/REGION/subnetworks/SUBNETWORK".
1377 "packages": [ # Packages to be installed on workers.
1378 { # The packages that must be installed in order for a worker to run the
1379 # steps of the Cloud Dataflow job that will be assigned to its worker
1380 # pool.
1381 #
1382 # This is the mechanism by which the Cloud Dataflow SDK causes code to
1383 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1384 # might use this to install jars containing the user's code and all of the
1385 # various dependencies (libraries, data files, etc.) required in order
1386 # for that code to run.
1387 "location": "A String", # The resource to read the package from. The supported resource type is:
1388 #
1389 # Google Cloud Storage:
1390 #
1391 # storage.googleapis.com/{bucket}
1392 # bucket.storage.googleapis.com/
1393 "name": "A String", # The name of the package.
1394 },
1395 ],
1396 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1397 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1398 "algorithm": "A String", # The algorithm to use for autoscaling.
1399 },
1400 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
1401 # select a default set of packages which are useful to worker
1402 # harnesses written in a particular language.
1403 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
1404 # attempt to choose a reasonable default.
1405 "metadata": { # Metadata to set on the Google Compute Engine VMs.
1406 "a_key": "A String",
1407 },
1408 },
1409 ],
1410 },
1411 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1412 # A description of the user pipeline and stages through which it is executed.
1413 # Created by Cloud Dataflow service. Only retrieved with
1414 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1415 # form. This data is provided by the Dataflow service for ease of visualizing
1416 # the pipeline and interpretting Dataflow provided metrics.
1417 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1418 { # Description of the type, names/ids, and input/outputs for a transform.
1419 "kind": "A String", # Type of transform.
1420 "name": "A String", # User provided name for this transform instance.
1421 "inputCollectionName": [ # User names for all collection inputs to this transform.
1422 "A String",
1423 ],
1424 "displayData": [ # Transform-specific display data.
1425 { # Data provided with a pipeline or transform to provide descriptive info.
1426 "shortStrValue": "A String", # A possible additional shorter value to display.
1427 # For example a java_class_name_value of com.mypackage.MyDoFn
1428 # will be stored with MyDoFn as the short_str_value and
1429 # com.mypackage.MyDoFn as the java_class_name value.
1430 # short_str_value can be displayed and java_class_name_value
1431 # will be displayed as a tooltip.
1432 "durationValue": "A String", # Contains value if the data is of duration type.
1433 "url": "A String", # An optional full URL.
1434 "floatValue": 3.14, # Contains value if the data is of float type.
1435 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1436 # language namespace (i.e. python module) which defines the display data.
1437 # This allows a dax monitoring system to specially handle the data
1438 # and perform custom rendering.
1439 "javaClassValue": "A String", # Contains value if the data is of java class type.
1440 "label": "A String", # An optional label to display in a dax UI for the element.
1441 "boolValue": True or False, # Contains value if the data is of a boolean type.
1442 "strValue": "A String", # Contains value if the data is of string type.
1443 "key": "A String", # The key identifying the display data.
1444 # This is intended to be used as a label for the display data
1445 # when viewed in a dax monitoring system.
1446 "int64Value": "A String", # Contains value if the data is of int64 type.
1447 "timestampValue": "A String", # Contains value if the data is of timestamp type.
1448 },
1449 ],
1450 "outputCollectionName": [ # User names for all collection outputs to this transform.
1451 "A String",
1452 ],
1453 "id": "A String", # SDK generated id of this transform instance.
1454 },
1455 ],
1456 "displayData": [ # Pipeline level display data.
1457 { # Data provided with a pipeline or transform to provide descriptive info.
1458 "shortStrValue": "A String", # A possible additional shorter value to display.
1459 # For example a java_class_name_value of com.mypackage.MyDoFn
1460 # will be stored with MyDoFn as the short_str_value and
1461 # com.mypackage.MyDoFn as the java_class_name value.
1462 # short_str_value can be displayed and java_class_name_value
1463 # will be displayed as a tooltip.
1464 "durationValue": "A String", # Contains value if the data is of duration type.
1465 "url": "A String", # An optional full URL.
1466 "floatValue": 3.14, # Contains value if the data is of float type.
1467 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1468 # language namespace (i.e. python module) which defines the display data.
1469 # This allows a dax monitoring system to specially handle the data
1470 # and perform custom rendering.
1471 "javaClassValue": "A String", # Contains value if the data is of java class type.
1472 "label": "A String", # An optional label to display in a dax UI for the element.
1473 "boolValue": True or False, # Contains value if the data is of a boolean type.
1474 "strValue": "A String", # Contains value if the data is of string type.
1475 "key": "A String", # The key identifying the display data.
1476 # This is intended to be used as a label for the display data
1477 # when viewed in a dax monitoring system.
1478 "int64Value": "A String", # Contains value if the data is of int64 type.
1479 "timestampValue": "A String", # Contains value if the data is of timestamp type.
1480 },
1481 ],
1482 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
1483 { # Description of the composing transforms, names/ids, and input/outputs of a
1484 # stage of execution. Some composing transforms and sources may have been
1485 # generated by the Dataflow service during execution planning.
1486 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
1487 { # Description of an interstitial value between transforms in an execution
1488 # stage.
1489 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1490 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1491 # source is most closely associated.
1492 "name": "A String", # Dataflow service generated name for this source.
1493 },
1494 ],
1495 "kind": "A String", # Type of tranform this stage is executing.
1496 "name": "A String", # Dataflow service generated name for this stage.
1497 "outputSource": [ # Output sources for this stage.
1498 { # Description of an input or output of an execution stage.
1499 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1500 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1501 # source is most closely associated.
1502 "name": "A String", # Dataflow service generated name for this source.
1503 "sizeBytes": "A String", # Size of the source, if measurable.
1504 },
1505 ],
1506 "inputSource": [ # Input sources for this stage.
1507 { # Description of an input or output of an execution stage.
1508 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1509 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1510 # source is most closely associated.
1511 "name": "A String", # Dataflow service generated name for this source.
1512 "sizeBytes": "A String", # Size of the source, if measurable.
1513 },
1514 ],
1515 "componentTransform": [ # Transforms that comprise this execution stage.
1516 { # Description of a transform executed as part of an execution stage.
1517 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1518 "originalTransform": "A String", # User name for the original user transform with which this transform is
1519 # most closely associated.
1520 "name": "A String", # Dataflow service generated name for this source.
1521 },
1522 ],
1523 "id": "A String", # Dataflow service generated id for this stage.
1524 },
1525 ],
1526 },
1527 "steps": [ # The top-level steps that constitute the entire job.
1528 { # Defines a particular step within a Cloud Dataflow job.
1529 #
1530 # A job consists of multiple steps, each of which performs some
1531 # specific operation as part of the overall job. Data is typically
1532 # passed from one step to another as part of the job.
1533 #
1534 # Here's an example of a sequence of steps which together implement a
1535 # Map-Reduce job:
1536 #
1537 # * Read a collection of data from some source, parsing the
1538 # collection's elements.
1539 #
1540 # * Validate the elements.
1541 #
1542 # * Apply a user-defined function to map each element to some value
1543 # and extract an element-specific key value.
1544 #
1545 # * Group elements with the same key into a single element with
1546 # that key, transforming a multiply-keyed collection into a
1547 # uniquely-keyed collection.
1548 #
1549 # * Write the elements out to some data sink.
1550 #
1551 # Note that the Cloud Dataflow service may be used to run many different
1552 # types of jobs, not just Map-Reduce.
1553 "kind": "A String", # The kind of step in the Cloud Dataflow job.
1554 "properties": { # Named properties associated with the step. Each kind of
1555 # predefined step has its own required set of properties.
1556 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
1557 "a_key": "", # Properties of the object.
1558 },
1559 "name": "A String", # The name that identifies the step. This must be unique for each
1560 # step with respect to all other steps in the Cloud Dataflow job.
1561 },
1562 ],
1563 "currentStateTime": "A String", # The timestamp associated with the current state.
1564 "tempFiles": [ # A set of files the system should be aware of that are used
1565 # for temporary storage. These temporary files will be
1566 # removed on job completion.
1567 # No duplicates are allowed.
1568 # No file patterns are supported.
1569 #
1570 # The supported files are:
1571 #
1572 # Google Cloud Storage:
1573 #
1574 # storage.googleapis.com/{bucket}/{object}
1575 # bucket.storage.googleapis.com/{object}
1576 "A String",
1577 ],
1578 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1579 # callers cannot mutate it.
1580 { # A message describing the state of a particular execution stage.
1581 "executionStageName": "A String", # The name of the execution stage.
1582 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1583 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1584 },
1585 ],
1586 "type": "A String", # The type of Cloud Dataflow job.
1587 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1588 # Cloud Dataflow service.
1589 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1590 # of the job it replaced.
1591 #
1592 # When sending a `CreateJobRequest`, you can update a job by specifying it
1593 # here. The job named here is stopped, and its intermediate state is
1594 # transferred to this job.
1595 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1596 # isn't contained in the submitted job.
1597 "stages": { # A mapping from each stage to the information about that stage.
1598 "a_key": { # Contains information about how a particular
1599 # google.dataflow.v1beta3.Step will be executed.
1600 "stepName": [ # The steps associated with the execution stage.
1601 # Note that stages may have several steps, and that a given step
1602 # might be run by more than one stage.
1603 "A String",
1604 ],
1605 },
1606 },
1607 },
1608 }</pre>
1609 </div>
1610
1611 <div class="method">
1612 <code class="details" id="getMetrics">getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)</code>
1613 <pre>Request the job status.
1614
1615 Args:
1616 projectId: string, A project id. (required)
1617 location: string, The location which contains the job specified by job_id. (required)
1618 jobId: string, The job to get messages for. (required)
1619 startTime: string, Return only metric data that has changed since this time.
1620 Default is to return all information about all metrics for the job.
1621 x__xgafv: string, V1 error format.
1622 Allowed values
1623 1 - v1 error format
1624 2 - v2 error format
1625
1626 Returns:
1627 An object of the form:
1628
1629 { # JobMetrics contains a collection of metrics descibing the detailed progress
1630 # of a Dataflow job. Metrics correspond to user-defined and system-defined
1631 # metrics in the job.
1632 #
1633 # This resource captures only the most recent values of each metric;
1634 # time-series data can be queried for them (under the same metric names)
1635 # from Cloud Monitoring.
1636 "metrics": [ # All metrics for this job.
1637 { # Describes the state of a metric.
1638 "meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
1639 # This holds the count of the aggregated values and is used in combination
1640 # with mean_sum above to obtain the actual mean aggregate value.
1641 # The only possible value type is Long.
1642 "updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are
1643 # reporting work progress; it will be filled in responses from the
1644 # metrics API.
1645 "set": "", # Worker-computed aggregate value for the "Set" aggregation kind. The only
1646 # possible value type is a list of Values whose type can be Long, Double,
1647 # or String, according to the metric's type. All Values in the list must
1648 # be of the same type.
1649 "name": { # Identifies a metric, by describing the source which generated the # Name of the metric.
1650 # metric.
1651 "origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;
1652 # will be "dataflow" for metrics defined by the Dataflow service or SDK.
1653 "name": "A String", # Worker-defined metric name.
1654 "context": { # Zero or more labeled fields which identify the part of the job this
1655 # metric is associated with, such as the name of a step or collection.
1656 #
1657 # For example, built-in counters associated with steps will have
1658 # context['step'] = <step-name>. Counters associated with PCollections
1659 # in the SDK will have context['pcollection'] = <pcollection-name>.
1660 "a_key": "A String",
1661 },
1662 },
1663 "cumulative": True or False, # True if this metric is reported as the total cumulative aggregate
1664 # value accumulated since the worker started working on this WorkItem.
1665 # By default this is false, indicating that this metric is reported
1666 # as a delta that is not associated with any WorkItem.
1667 "kind": "A String", # Metric aggregation kind. The possible metric aggregation kinds are
1668 # "Sum", "Max", "Min", "Mean", "Set", "And", "Or", and "Distribution".
1669 # The specified aggregation kind is case-insensitive.
1670 #
1671 # If omitted, this is not an aggregated value but instead
1672 # a single metric sample value.
1673 "scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",
1674 # "And", and "Or". The possible value types are Long, Double, and Boolean.
1675 "meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
1676 # This holds the sum of the aggregated values and is used in combination
1677 # with mean_count below to obtain the actual mean aggregate value.
1678 # The only possible value types are Long and Double.
1679 "distribution": "", # A struct value describing properties of a distribution of numeric values.
1680 "internal": "", # Worker-computed aggregate value for internal use by the Dataflow
1681 # service.
1682 },
1683 ],
1684 "metricTime": "A String", # Timestamp as of which metric values are current.
1685 }</pre>
1686 </div>
1687
1688 <div class="method">
1689 <code class="details" id="list">list(projectId, location, pageSize=None, x__xgafv=None, pageToken=None, filter=None, view=None)</code>
1690 <pre>List the jobs of a project.
1691
1692 Args:
1693 projectId: string, The project which owns the jobs. (required)
1694 location: string, The location that contains this job. (required)
1695 pageSize: integer, If there are many jobs, limit response to at most this many.
1696 The actual number of jobs returned will be the lesser of max_responses
1697 and an unspecified server-defined limit.
1698 x__xgafv: string, V1 error format.
1699 Allowed values
1700 1 - v1 error format
1701 2 - v2 error format
1702 pageToken: string, Set this to the 'next_page_token' field of a previous response
1703 to request additional results in a long list.
1704 filter: string, The kind of filter to use.
1705 view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.
1706
1707 Returns:
1708 An object of the form:
1709
1710 { # Response to a request to list Cloud Dataflow jobs. This may be a partial
1711 # response, depending on the page size in the ListJobsRequest.
1712 "nextPageToken": "A String", # Set if there may be more results than fit in this response.
1713 "failedLocation": [ # Zero or more messages describing locations that failed to respond.
1714 { # Indicates which location failed to respond to a request for data.
1715 "name": "A String", # The name of the failed location.
1716 },
1717 ],
1718 "jobs": [ # A subset of the requested job information.
1719 { # Defines a job to be run by the Cloud Dataflow service.
1720 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1721 # If this field is set, the service will ensure its uniqueness.
1722 # The request to create a job will fail if the service has knowledge of a
1723 # previously submitted job with the same client's ID and job name.
1724 # The caller may use this field to ensure idempotence of job
1725 # creation across retried attempts to create a job.
1726 # By default, the field is empty and, in that case, the service ignores it.
1727 "requestedState": "A String", # The job's requested state.
1728 #
1729 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1730 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
1731 # also be used to directly set a job's requested state to
1732 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1733 # job if it has not already reached a terminal state.
1734 "name": "A String", # The user-specified Cloud Dataflow job name.
1735 #
1736 # Only one Job with a given name may exist in a project at any
1737 # given time. If a caller attempts to create a Job with the same
1738 # name as an already-existing Job, the attempt returns the
1739 # existing Job.
1740 #
1741 # The name must match the regular expression
1742 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1743 "location": "A String", # The location that contains this job.
1744 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1745 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1746 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1747 "currentState": "A String", # The current state of the job.
1748 #
1749 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1750 # specified.
1751 #
1752 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1753 # terminal state. After a job has reached a terminal state, no
1754 # further state updates may be made.
1755 #
1756 # This field may be mutated by the Cloud Dataflow service;
1757 # callers cannot mutate it.
1758 "labels": { # User-defined labels for this job.
1759 #
1760 # The labels map can contain no more than 64 entries. Entries of the labels
1761 # map are UTF8 strings that comply with the following restrictions:
1762 #
1763 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
1764 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1765 # * Both keys and values are additionally constrained to be <= 128 bytes in
1766 # size.
1767 "a_key": "A String",
1768 },
1769 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1770 # corresponding name prefixes of the new job.
1771 "a_key": "A String",
1772 },
1773 "id": "A String", # The unique ID of this job.
1774 #
1775 # This field is set by the Cloud Dataflow service when the Job is
1776 # created, and is immutable for the life of the job.
1777 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1778 "version": { # A structure describing which components and their versions of the service
1779 # are required in order to run the job.
1780 "a_key": "", # Properties of the object.
1781 },
1782 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1783 # storage. The system will append the suffix "/temp-{JOBNAME} to
1784 # this resource prefix, where {JOBNAME} is the value of the
1785 # job_name field. The resulting bucket and object prefix is used
1786 # as the prefix of the resources used to store temporary data
1787 # needed during the job execution. NOTE: This will override the
1788 # value in taskrunner_settings.
1789 # The supported resource type is:
1790 #
1791 # Google Cloud Storage:
1792 #
1793 # storage.googleapis.com/{bucket}/{object}
1794 # bucket.storage.googleapis.com/{object}
1795 "internalExperiments": { # Experimental settings.
1796 "a_key": "", # Properties of the object. Contains field @type with type URL.
1797 },
1798 "dataset": "A String", # The dataset for the current project where various workflow
1799 # related tables are stored.
1800 #
1801 # The supported resource type is:
1802 #
1803 # Google BigQuery:
1804 # bigquery.googleapis.com/{dataset}
1805 "experiments": [ # The list of experiments to enable.
1806 "A String",
1807 ],
1808 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
1809 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1810 # options are passed through the service and are used to recreate the
1811 # SDK pipeline options on the worker in a language agnostic and platform
1812 # independent way.
1813 "a_key": "", # Properties of the object.
1814 },
1815 "userAgent": { # A description of the process that generated the request.
1816 "a_key": "", # Properties of the object.
1817 },
1818 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
1819 # unspecified, the service will attempt to choose a reasonable
1820 # default. This should be in the form of the API service name,
1821 # e.g. "compute.googleapis.com".
1822 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1823 # specified in order for the job to have workers.
1824 { # Describes one particular pool of Cloud Dataflow workers to be
1825 # instantiated by the Cloud Dataflow service in order to perform the
1826 # computations required by a job. Note that a workflow job may use
1827 # multiple pools, in order to match the various computational
1828 # requirements of the various stages of the job.
1829 "diskSourceImage": "A String", # Fully qualified source image for disks.
1830 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1831 # using the standard Dataflow task runner. Users should ignore
1832 # this field.
1833 "workflowFileName": "A String", # The file to store the workflow in.
1834 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
1835 # will not be uploaded.
1836 #
1837 # The supported resource type is:
1838 #
1839 # Google Cloud Storage:
1840 # storage.googleapis.com/{bucket}/{object}
1841 # bucket.storage.googleapis.com/{object}
1842 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1843 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1844 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1845 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1846 # "shuffle/v1beta1".
1847 "workerId": "A String", # The ID of the worker running this pipeline.
1848 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1849 #
1850 # When workers access Google Cloud APIs, they logically do so via
1851 # relative URLs. If this field is specified, it supplies the base
1852 # URL to use for resolving these relative URLs. The normative
1853 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1854 # Locators".
1855 #
1856 # If not specified, the default value is "http://www.googleapis.com/"
1857 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1858 # "dataflow/v1b3/projects".
1859 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1860 # storage.
1861 #
1862 # The supported resource type is:
1863 #
1864 # Google Cloud Storage:
1865 #
1866 # storage.googleapis.com/{bucket}/{object}
1867 # bucket.storage.googleapis.com/{object}
1868 },
1869 "vmId": "A String", # The ID string of the VM.
1870 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1871 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1872 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1873 # access the Cloud Dataflow API.
1874 "A String",
1875 ],
1876 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1877 # taskrunner; e.g. "root".
1878 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1879 #
1880 # When workers access Google Cloud APIs, they logically do so via
1881 # relative URLs. If this field is specified, it supplies the base
1882 # URL to use for resolving these relative URLs. The normative
1883 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1884 # Locators".
1885 #
1886 # If not specified, the default value is "http://www.googleapis.com/"
1887 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1888 # taskrunner; e.g. "wheel".
1889 "languageHint": "A String", # The suggested backend language.
1890 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1891 # console.
1892 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1893 "logDir": "A String", # The directory on the VM to store logs.
1894 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
1895 "harnessCommand": "A String", # The command to launch the worker harness.
1896 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1897 # temporary storage.
1898 #
1899 # The supported resource type is:
1900 #
1901 # Google Cloud Storage:
1902 # storage.googleapis.com/{bucket}/{object}
1903 # bucket.storage.googleapis.com/{object}
1904 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1905 },
1906 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1907 # are supported.
1908 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
1909 # service will attempt to choose a reasonable default.
1910 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
1911 # the service will use the network "default".
1912 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
1913 # will attempt to choose a reasonable default.
1914 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
1915 # attempt to choose a reasonable default.
1916 "dataDisks": [ # Data disks that are used by a VM in this workflow.
1917 { # Describes the data disk used by a workflow job.
1918 "mountPoint": "A String", # Directory in a VM where disk is mounted.
1919 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
1920 # attempt to choose a reasonable default.
1921 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
1922 # must be a disk type appropriate to the project and zone in which
1923 # the workers will run. If unknown or unspecified, the service
1924 # will attempt to choose a reasonable default.
1925 #
1926 # For example, the standard persistent disk type is a resource name
1927 # typically ending in "pd-standard". If SSD persistent disks are
1928 # available, the resource name typically ends with "pd-ssd". The
1929 # actual valid values are defined the Google Compute Engine API,
1930 # not by the Cloud Dataflow API; consult the Google Compute Engine
1931 # documentation for more information about determining the set of
1932 # available disk types for a particular project and zone.
1933 #
1934 # Google Compute Engine Disk types are local to a particular
1935 # project in a particular zone, and so the resource name will
1936 # typically look something like this:
1937 #
1938 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
1939 },
1940 ],
1941 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1942 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1943 # `TEARDOWN_NEVER`.
1944 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1945 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1946 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1947 # down.
1948 #
1949 # If the workers are not torn down by the service, they will
1950 # continue to run and use Google Compute Engine VM resources in the
1951 # user's project until they are explicitly terminated by the user.
1952 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1953 # policy except for small, manually supervised test jobs.
1954 #
1955 # If unknown or unspecified, the service will attempt to choose a reasonable
1956 # default.
1957 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1958 # Compute Engine API.
1959 "ipConfiguration": "A String", # Configuration for VM IPs.
1960 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1961 # service will choose a number of threads (according to the number of cores
1962 # on the selected machine type for batch, or 1 by convention for streaming).
1963 "poolArgs": { # Extra arguments for this worker pool.
1964 "a_key": "", # Properties of the object. Contains field @type with type URL.
1965 },
1966 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1967 # execute the job. If zero or unspecified, the service will
1968 # attempt to choose a reasonable default.
1969 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1970 # harness, residing in Google Container Registry.
1971 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
1972 # the form "regions/REGION/subnetworks/SUBNETWORK".
1973 "packages": [ # Packages to be installed on workers.
1974 { # The packages that must be installed in order for a worker to run the
1975 # steps of the Cloud Dataflow job that will be assigned to its worker
1976 # pool.
1977 #
1978 # This is the mechanism by which the Cloud Dataflow SDK causes code to
1979 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1980 # might use this to install jars containing the user's code and all of the
1981 # various dependencies (libraries, data files, etc.) required in order
1982 # for that code to run.
1983 "location": "A String", # The resource to read the package from. The supported resource type is:
1984 #
1985 # Google Cloud Storage:
1986 #
1987 # storage.googleapis.com/{bucket}
1988 # bucket.storage.googleapis.com/
1989 "name": "A String", # The name of the package.
1990 },
1991 ],
1992 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1993 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1994 "algorithm": "A String", # The algorithm to use for autoscaling.
1995 },
1996 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
1997 # select a default set of packages which are useful to worker
1998 # harnesses written in a particular language.
1999 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
2000 # attempt to choose a reasonable default.
2001 "metadata": { # Metadata to set on the Google Compute Engine VMs.
2002 "a_key": "A String",
2003 },
2004 },
2005 ],
2006 },
2007 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
2008 # A description of the user pipeline and stages through which it is executed.
2009 # Created by Cloud Dataflow service. Only retrieved with
2010 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
2011 # form. This data is provided by the Dataflow service for ease of visualizing
2012 # the pipeline and interpretting Dataflow provided metrics.
2013 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
2014 { # Description of the type, names/ids, and input/outputs for a transform.
2015 "kind": "A String", # Type of transform.
2016 "name": "A String", # User provided name for this transform instance.
2017 "inputCollectionName": [ # User names for all collection inputs to this transform.
2018 "A String",
2019 ],
2020 "displayData": [ # Transform-specific display data.
2021 { # Data provided with a pipeline or transform to provide descriptive info.
2022 "shortStrValue": "A String", # A possible additional shorter value to display.
2023 # For example a java_class_name_value of com.mypackage.MyDoFn
2024 # will be stored with MyDoFn as the short_str_value and
2025 # com.mypackage.MyDoFn as the java_class_name value.
2026 # short_str_value can be displayed and java_class_name_value
2027 # will be displayed as a tooltip.
2028 "durationValue": "A String", # Contains value if the data is of duration type.
2029 "url": "A String", # An optional full URL.
2030 "floatValue": 3.14, # Contains value if the data is of float type.
2031 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2032 # language namespace (i.e. python module) which defines the display data.
2033 # This allows a dax monitoring system to specially handle the data
2034 # and perform custom rendering.
2035 "javaClassValue": "A String", # Contains value if the data is of java class type.
2036 "label": "A String", # An optional label to display in a dax UI for the element.
2037 "boolValue": True or False, # Contains value if the data is of a boolean type.
2038 "strValue": "A String", # Contains value if the data is of string type.
2039 "key": "A String", # The key identifying the display data.
2040 # This is intended to be used as a label for the display data
2041 # when viewed in a dax monitoring system.
2042 "int64Value": "A String", # Contains value if the data is of int64 type.
2043 "timestampValue": "A String", # Contains value if the data is of timestamp type.
2044 },
2045 ],
2046 "outputCollectionName": [ # User names for all collection outputs to this transform.
2047 "A String",
2048 ],
2049 "id": "A String", # SDK generated id of this transform instance.
2050 },
2051 ],
2052 "displayData": [ # Pipeline level display data.
2053 { # Data provided with a pipeline or transform to provide descriptive info.
2054 "shortStrValue": "A String", # A possible additional shorter value to display.
2055 # For example a java_class_name_value of com.mypackage.MyDoFn
2056 # will be stored with MyDoFn as the short_str_value and
2057 # com.mypackage.MyDoFn as the java_class_name value.
2058 # short_str_value can be displayed and java_class_name_value
2059 # will be displayed as a tooltip.
2060 "durationValue": "A String", # Contains value if the data is of duration type.
2061 "url": "A String", # An optional full URL.
2062 "floatValue": 3.14, # Contains value if the data is of float type.
2063 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2064 # language namespace (i.e. python module) which defines the display data.
2065 # This allows a dax monitoring system to specially handle the data
2066 # and perform custom rendering.
2067 "javaClassValue": "A String", # Contains value if the data is of java class type.
2068 "label": "A String", # An optional label to display in a dax UI for the element.
2069 "boolValue": True or False, # Contains value if the data is of a boolean type.
2070 "strValue": "A String", # Contains value if the data is of string type.
2071 "key": "A String", # The key identifying the display data.
2072 # This is intended to be used as a label for the display data
2073 # when viewed in a dax monitoring system.
2074 "int64Value": "A String", # Contains value if the data is of int64 type.
2075 "timestampValue": "A String", # Contains value if the data is of timestamp type.
2076 },
2077 ],
2078 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
2079 { # Description of the composing transforms, names/ids, and input/outputs of a
2080 # stage of execution. Some composing transforms and sources may have been
2081 # generated by the Dataflow service during execution planning.
2082 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
2083 { # Description of an interstitial value between transforms in an execution
2084 # stage.
2085 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2086 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2087 # source is most closely associated.
2088 "name": "A String", # Dataflow service generated name for this source.
2089 },
2090 ],
2091 "kind": "A String", # Type of tranform this stage is executing.
2092 "name": "A String", # Dataflow service generated name for this stage.
2093 "outputSource": [ # Output sources for this stage.
2094 { # Description of an input or output of an execution stage.
2095 "userName": "A String", # Human-readable name for this source; may be user or system generated.
2096 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2097 # source is most closely associated.
2098 "name": "A String", # Dataflow service generated name for this source.
2099 "sizeBytes": "A String", # Size of the source, if measurable.
2100 },
2101 ],
2102 "inputSource": [ # Input sources for this stage.
2103 { # Description of an input or output of an execution stage.
2104 "userName": "A String", # Human-readable name for this source; may be user or system generated.
2105 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2106 # source is most closely associated.
2107 "name": "A String", # Dataflow service generated name for this source.
2108 "sizeBytes": "A String", # Size of the source, if measurable.
2109 },
2110 ],
2111 "componentTransform": [ # Transforms that comprise this execution stage.
2112 { # Description of a transform executed as part of an execution stage.
2113 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2114 "originalTransform": "A String", # User name for the original user transform with which this transform is
2115 # most closely associated.
2116 "name": "A String", # Dataflow service generated name for this source.
2117 },
2118 ],
2119 "id": "A String", # Dataflow service generated id for this stage.
2120 },
2121 ],
2122 },
2123 "steps": [ # The top-level steps that constitute the entire job.
2124 { # Defines a particular step within a Cloud Dataflow job.
2125 #
2126 # A job consists of multiple steps, each of which performs some
2127 # specific operation as part of the overall job. Data is typically
2128 # passed from one step to another as part of the job.
2129 #
2130 # Here's an example of a sequence of steps which together implement a
2131 # Map-Reduce job:
2132 #
2133 # * Read a collection of data from some source, parsing the
2134 # collection's elements.
2135 #
2136 # * Validate the elements.
2137 #
2138 # * Apply a user-defined function to map each element to some value
2139 # and extract an element-specific key value.
2140 #
2141 # * Group elements with the same key into a single element with
2142 # that key, transforming a multiply-keyed collection into a
2143 # uniquely-keyed collection.
2144 #
2145 # * Write the elements out to some data sink.
2146 #
2147 # Note that the Cloud Dataflow service may be used to run many different
2148 # types of jobs, not just Map-Reduce.
2149 "kind": "A String", # The kind of step in the Cloud Dataflow job.
2150 "properties": { # Named properties associated with the step. Each kind of
2151 # predefined step has its own required set of properties.
2152 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
2153 "a_key": "", # Properties of the object.
2154 },
2155 "name": "A String", # The name that identifies the step. This must be unique for each
2156 # step with respect to all other steps in the Cloud Dataflow job.
2157 },
2158 ],
2159 "currentStateTime": "A String", # The timestamp associated with the current state.
2160 "tempFiles": [ # A set of files the system should be aware of that are used
2161 # for temporary storage. These temporary files will be
2162 # removed on job completion.
2163 # No duplicates are allowed.
2164 # No file patterns are supported.
2165 #
2166 # The supported files are:
2167 #
2168 # Google Cloud Storage:
2169 #
2170 # storage.googleapis.com/{bucket}/{object}
2171 # bucket.storage.googleapis.com/{object}
2172 "A String",
2173 ],
2174 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
2175 # callers cannot mutate it.
2176 { # A message describing the state of a particular execution stage.
2177 "executionStageName": "A String", # The name of the execution stage.
2178 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
2179 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
2180 },
2181 ],
2182 "type": "A String", # The type of Cloud Dataflow job.
2183 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
2184 # Cloud Dataflow service.
2185 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
2186 # of the job it replaced.
2187 #
2188 # When sending a `CreateJobRequest`, you can update a job by specifying it
2189 # here. The job named here is stopped, and its intermediate state is
2190 # transferred to this job.
2191 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
2192 # isn't contained in the submitted job.
2193 "stages": { # A mapping from each stage to the information about that stage.
2194 "a_key": { # Contains information about how a particular
2195 # google.dataflow.v1beta3.Step will be executed.
2196 "stepName": [ # The steps associated with the execution stage.
2197 # Note that stages may have several steps, and that a given step
2198 # might be run by more than one stage.
2199 "A String",
2200 ],
2201 },
2202 },
2203 },
2204 },
2205 ],
2206 }</pre>
2207 </div>
2208
2209 <div class="method">
2210 <code class="details" id="list_next">list_next(previous_request, previous_response)</code>
2211 <pre>Retrieves the next page of results.
2212
2213 Args:
2214 previous_request: The request for the previous page. (required)
2215 previous_response: The response from the request for the previous page. (required)
2216
2217 Returns:
2218 A request object that you can call 'execute()' on to request the next
2219 page. Returns None if there are no more items in the collection.
2220 </pre>
2221 </div>
2222
2223 <div class="method">
2224 <code class="details" id="update">update(projectId, location, jobId, body, x__xgafv=None)</code>
2225 <pre>Updates the state of an existing Cloud Dataflow job.
2226
2227 Args:
2228 projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
2229 location: string, The location that contains this job. (required)
2230 jobId: string, The job ID. (required)
2231 body: object, The request body. (required)
2232 The object takes the form of:
2233
2234 { # Defines a job to be run by the Cloud Dataflow service.
2235 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
2236 # If this field is set, the service will ensure its uniqueness.
2237 # The request to create a job will fail if the service has knowledge of a
2238 # previously submitted job with the same client's ID and job name.
2239 # The caller may use this field to ensure idempotence of job
2240 # creation across retried attempts to create a job.
2241 # By default, the field is empty and, in that case, the service ignores it.
2242 "requestedState": "A String", # The job's requested state.
2243 #
2244 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
2245 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
2246 # also be used to directly set a job's requested state to
2247 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
2248 # job if it has not already reached a terminal state.
2249 "name": "A String", # The user-specified Cloud Dataflow job name.
2250 #
2251 # Only one Job with a given name may exist in a project at any
2252 # given time. If a caller attempts to create a Job with the same
2253 # name as an already-existing Job, the attempt returns the
2254 # existing Job.
2255 #
2256 # The name must match the regular expression
2257 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
2258 "location": "A String", # The location that contains this job.
2259 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
2260 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
2261 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
2262 "currentState": "A String", # The current state of the job.
2263 #
2264 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
2265 # specified.
2266 #
2267 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
2268 # terminal state. After a job has reached a terminal state, no
2269 # further state updates may be made.
2270 #
2271 # This field may be mutated by the Cloud Dataflow service;
2272 # callers cannot mutate it.
2273 "labels": { # User-defined labels for this job.
2274 #
2275 # The labels map can contain no more than 64 entries. Entries of the labels
2276 # map are UTF8 strings that comply with the following restrictions:
2277 #
2278 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
2279 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
2280 # * Both keys and values are additionally constrained to be <= 128 bytes in
2281 # size.
2282 "a_key": "A String",
2283 },
2284 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
2285 # corresponding name prefixes of the new job.
2286 "a_key": "A String",
2287 },
2288 "id": "A String", # The unique ID of this job.
2289 #
2290 # This field is set by the Cloud Dataflow service when the Job is
2291 # created, and is immutable for the life of the job.
2292 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
2293 "version": { # A structure describing which components and their versions of the service
2294 # are required in order to run the job.
2295 "a_key": "", # Properties of the object.
2296 },
2297 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2298 # storage. The system will append the suffix "/temp-{JOBNAME} to
2299 # this resource prefix, where {JOBNAME} is the value of the
2300 # job_name field. The resulting bucket and object prefix is used
2301 # as the prefix of the resources used to store temporary data
2302 # needed during the job execution. NOTE: This will override the
2303 # value in taskrunner_settings.
2304 # The supported resource type is:
2305 #
2306 # Google Cloud Storage:
2307 #
2308 # storage.googleapis.com/{bucket}/{object}
2309 # bucket.storage.googleapis.com/{object}
2310 "internalExperiments": { # Experimental settings.
2311 "a_key": "", # Properties of the object. Contains field @type with type URL.
2312 },
2313 "dataset": "A String", # The dataset for the current project where various workflow
2314 # related tables are stored.
2315 #
2316 # The supported resource type is:
2317 #
2318 # Google BigQuery:
2319 # bigquery.googleapis.com/{dataset}
2320 "experiments": [ # The list of experiments to enable.
2321 "A String",
2322 ],
2323 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
2324 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
2325 # options are passed through the service and are used to recreate the
2326 # SDK pipeline options on the worker in a language agnostic and platform
2327 # independent way.
2328 "a_key": "", # Properties of the object.
2329 },
2330 "userAgent": { # A description of the process that generated the request.
2331 "a_key": "", # Properties of the object.
2332 },
2333 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
2334 # unspecified, the service will attempt to choose a reasonable
2335 # default. This should be in the form of the API service name,
2336 # e.g. "compute.googleapis.com".
2337 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
2338 # specified in order for the job to have workers.
2339 { # Describes one particular pool of Cloud Dataflow workers to be
2340 # instantiated by the Cloud Dataflow service in order to perform the
2341 # computations required by a job. Note that a workflow job may use
2342 # multiple pools, in order to match the various computational
2343 # requirements of the various stages of the job.
2344 "diskSourceImage": "A String", # Fully qualified source image for disks.
2345 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
2346 # using the standard Dataflow task runner. Users should ignore
2347 # this field.
2348 "workflowFileName": "A String", # The file to store the workflow in.
2349 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
2350 # will not be uploaded.
2351 #
2352 # The supported resource type is:
2353 #
2354 # Google Cloud Storage:
2355 # storage.googleapis.com/{bucket}/{object}
2356 # bucket.storage.googleapis.com/{object}
2357 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
2358 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
2359 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
2360 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
2361 # "shuffle/v1beta1".
2362 "workerId": "A String", # The ID of the worker running this pipeline.
2363 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
2364 #
2365 # When workers access Google Cloud APIs, they logically do so via
2366 # relative URLs. If this field is specified, it supplies the base
2367 # URL to use for resolving these relative URLs. The normative
2368 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2369 # Locators".
2370 #
2371 # If not specified, the default value is "http://www.googleapis.com/"
2372 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
2373 # "dataflow/v1b3/projects".
2374 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2375 # storage.
2376 #
2377 # The supported resource type is:
2378 #
2379 # Google Cloud Storage:
2380 #
2381 # storage.googleapis.com/{bucket}/{object}
2382 # bucket.storage.googleapis.com/{object}
2383 },
2384 "vmId": "A String", # The ID string of the VM.
2385 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
2386 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
2387 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
2388 # access the Cloud Dataflow API.
2389 "A String",
2390 ],
2391 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
2392 # taskrunner; e.g. "root".
2393 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
2394 #
2395 # When workers access Google Cloud APIs, they logically do so via
2396 # relative URLs. If this field is specified, it supplies the base
2397 # URL to use for resolving these relative URLs. The normative
2398 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2399 # Locators".
2400 #
2401 # If not specified, the default value is "http://www.googleapis.com/"
2402 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
2403 # taskrunner; e.g. "wheel".
2404 "languageHint": "A String", # The suggested backend language.
2405 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
2406 # console.
2407 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
2408 "logDir": "A String", # The directory on the VM to store logs.
2409 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
2410 "harnessCommand": "A String", # The command to launch the worker harness.
2411 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
2412 # temporary storage.
2413 #
2414 # The supported resource type is:
2415 #
2416 # Google Cloud Storage:
2417 # storage.googleapis.com/{bucket}/{object}
2418 # bucket.storage.googleapis.com/{object}
2419 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
2420 },
2421 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
2422 # are supported.
2423 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
2424 # service will attempt to choose a reasonable default.
2425 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
2426 # the service will use the network "default".
2427 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
2428 # will attempt to choose a reasonable default.
2429 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
2430 # attempt to choose a reasonable default.
2431 "dataDisks": [ # Data disks that are used by a VM in this workflow.
2432 { # Describes the data disk used by a workflow job.
2433 "mountPoint": "A String", # Directory in a VM where disk is mounted.
2434 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
2435 # attempt to choose a reasonable default.
2436 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
2437 # must be a disk type appropriate to the project and zone in which
2438 # the workers will run. If unknown or unspecified, the service
2439 # will attempt to choose a reasonable default.
2440 #
2441 # For example, the standard persistent disk type is a resource name
2442 # typically ending in "pd-standard". If SSD persistent disks are
2443 # available, the resource name typically ends with "pd-ssd". The
2444 # actual valid values are defined the Google Compute Engine API,
2445 # not by the Cloud Dataflow API; consult the Google Compute Engine
2446 # documentation for more information about determining the set of
2447 # available disk types for a particular project and zone.
2448 #
2449 # Google Compute Engine Disk types are local to a particular
2450 # project in a particular zone, and so the resource name will
2451 # typically look something like this:
2452 #
2453 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
2454 },
2455 ],
2456 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
2457 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
2458 # `TEARDOWN_NEVER`.
2459 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
2460 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
2461 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
2462 # down.
2463 #
2464 # If the workers are not torn down by the service, they will
2465 # continue to run and use Google Compute Engine VM resources in the
2466 # user's project until they are explicitly terminated by the user.
2467 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
2468 # policy except for small, manually supervised test jobs.
2469 #
2470 # If unknown or unspecified, the service will attempt to choose a reasonable
2471 # default.
2472 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
2473 # Compute Engine API.
2474 "ipConfiguration": "A String", # Configuration for VM IPs.
2475 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
2476 # service will choose a number of threads (according to the number of cores
2477 # on the selected machine type for batch, or 1 by convention for streaming).
2478 "poolArgs": { # Extra arguments for this worker pool.
2479 "a_key": "", # Properties of the object. Contains field @type with type URL.
2480 },
2481 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
2482 # execute the job. If zero or unspecified, the service will
2483 # attempt to choose a reasonable default.
2484 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
2485 # harness, residing in Google Container Registry.
2486 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
2487 # the form "regions/REGION/subnetworks/SUBNETWORK".
2488 "packages": [ # Packages to be installed on workers.
2489 { # The packages that must be installed in order for a worker to run the
2490 # steps of the Cloud Dataflow job that will be assigned to its worker
2491 # pool.
2492 #
2493 # This is the mechanism by which the Cloud Dataflow SDK causes code to
2494 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
2495 # might use this to install jars containing the user's code and all of the
2496 # various dependencies (libraries, data files, etc.) required in order
2497 # for that code to run.
2498 "location": "A String", # The resource to read the package from. The supported resource type is:
2499 #
2500 # Google Cloud Storage:
2501 #
2502 # storage.googleapis.com/{bucket}
2503 # bucket.storage.googleapis.com/
2504 "name": "A String", # The name of the package.
2505 },
2506 ],
2507 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
2508 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
2509 "algorithm": "A String", # The algorithm to use for autoscaling.
2510 },
2511 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
2512 # select a default set of packages which are useful to worker
2513 # harnesses written in a particular language.
2514 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
2515 # attempt to choose a reasonable default.
2516 "metadata": { # Metadata to set on the Google Compute Engine VMs.
2517 "a_key": "A String",
2518 },
2519 },
2520 ],
2521 },
2522 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
2523 # A description of the user pipeline and stages through which it is executed.
2524 # Created by Cloud Dataflow service. Only retrieved with
2525 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
2526 # form. This data is provided by the Dataflow service for ease of visualizing
2527 # the pipeline and interpretting Dataflow provided metrics.
2528 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
2529 { # Description of the type, names/ids, and input/outputs for a transform.
2530 "kind": "A String", # Type of transform.
2531 "name": "A String", # User provided name for this transform instance.
2532 "inputCollectionName": [ # User names for all collection inputs to this transform.
2533 "A String",
2534 ],
2535 "displayData": [ # Transform-specific display data.
2536 { # Data provided with a pipeline or transform to provide descriptive info.
2537 "shortStrValue": "A String", # A possible additional shorter value to display.
2538 # For example a java_class_name_value of com.mypackage.MyDoFn
2539 # will be stored with MyDoFn as the short_str_value and
2540 # com.mypackage.MyDoFn as the java_class_name value.
2541 # short_str_value can be displayed and java_class_name_value
2542 # will be displayed as a tooltip.
2543 "durationValue": "A String", # Contains value if the data is of duration type.
2544 "url": "A String", # An optional full URL.
2545 "floatValue": 3.14, # Contains value if the data is of float type.
2546 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2547 # language namespace (i.e. python module) which defines the display data.
2548 # This allows a dax monitoring system to specially handle the data
2549 # and perform custom rendering.
2550 "javaClassValue": "A String", # Contains value if the data is of java class type.
2551 "label": "A String", # An optional label to display in a dax UI for the element.
2552 "boolValue": True or False, # Contains value if the data is of a boolean type.
2553 "strValue": "A String", # Contains value if the data is of string type.
2554 "key": "A String", # The key identifying the display data.
2555 # This is intended to be used as a label for the display data
2556 # when viewed in a dax monitoring system.
2557 "int64Value": "A String", # Contains value if the data is of int64 type.
2558 "timestampValue": "A String", # Contains value if the data is of timestamp type.
2559 },
2560 ],
2561 "outputCollectionName": [ # User names for all collection outputs to this transform.
2562 "A String",
2563 ],
2564 "id": "A String", # SDK generated id of this transform instance.
2565 },
2566 ],
2567 "displayData": [ # Pipeline level display data.
2568 { # Data provided with a pipeline or transform to provide descriptive info.
2569 "shortStrValue": "A String", # A possible additional shorter value to display.
2570 # For example a java_class_name_value of com.mypackage.MyDoFn
2571 # will be stored with MyDoFn as the short_str_value and
2572 # com.mypackage.MyDoFn as the java_class_name value.
2573 # short_str_value can be displayed and java_class_name_value
2574 # will be displayed as a tooltip.
2575 "durationValue": "A String", # Contains value if the data is of duration type.
2576 "url": "A String", # An optional full URL.
2577 "floatValue": 3.14, # Contains value if the data is of float type.
2578 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2579 # language namespace (i.e. python module) which defines the display data.
2580 # This allows a dax monitoring system to specially handle the data
2581 # and perform custom rendering.
2582 "javaClassValue": "A String", # Contains value if the data is of java class type.
2583 "label": "A String", # An optional label to display in a dax UI for the element.
2584 "boolValue": True or False, # Contains value if the data is of a boolean type.
2585 "strValue": "A String", # Contains value if the data is of string type.
2586 "key": "A String", # The key identifying the display data.
2587 # This is intended to be used as a label for the display data
2588 # when viewed in a dax monitoring system.
2589 "int64Value": "A String", # Contains value if the data is of int64 type.
2590 "timestampValue": "A String", # Contains value if the data is of timestamp type.
2591 },
2592 ],
2593 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
2594 { # Description of the composing transforms, names/ids, and input/outputs of a
2595 # stage of execution. Some composing transforms and sources may have been
2596 # generated by the Dataflow service during execution planning.
2597 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
2598 { # Description of an interstitial value between transforms in an execution
2599 # stage.
2600 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2601 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2602 # source is most closely associated.
2603 "name": "A String", # Dataflow service generated name for this source.
2604 },
2605 ],
2606 "kind": "A String", # Type of tranform this stage is executing.
2607 "name": "A String", # Dataflow service generated name for this stage.
2608 "outputSource": [ # Output sources for this stage.
2609 { # Description of an input or output of an execution stage.
2610 "userName": "A String", # Human-readable name for this source; may be user or system generated.
2611 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2612 # source is most closely associated.
2613 "name": "A String", # Dataflow service generated name for this source.
2614 "sizeBytes": "A String", # Size of the source, if measurable.
2615 },
2616 ],
2617 "inputSource": [ # Input sources for this stage.
2618 { # Description of an input or output of an execution stage.
2619 "userName": "A String", # Human-readable name for this source; may be user or system generated.
2620 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2621 # source is most closely associated.
2622 "name": "A String", # Dataflow service generated name for this source.
2623 "sizeBytes": "A String", # Size of the source, if measurable.
2624 },
2625 ],
2626 "componentTransform": [ # Transforms that comprise this execution stage.
2627 { # Description of a transform executed as part of an execution stage.
2628 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2629 "originalTransform": "A String", # User name for the original user transform with which this transform is
2630 # most closely associated.
2631 "name": "A String", # Dataflow service generated name for this source.
2632 },
2633 ],
2634 "id": "A String", # Dataflow service generated id for this stage.
2635 },
2636 ],
2637 },
2638 "steps": [ # The top-level steps that constitute the entire job.
2639 { # Defines a particular step within a Cloud Dataflow job.
2640 #
2641 # A job consists of multiple steps, each of which performs some
2642 # specific operation as part of the overall job. Data is typically
2643 # passed from one step to another as part of the job.
2644 #
2645 # Here's an example of a sequence of steps which together implement a
2646 # Map-Reduce job:
2647 #
2648 # * Read a collection of data from some source, parsing the
2649 # collection's elements.
2650 #
2651 # * Validate the elements.
2652 #
2653 # * Apply a user-defined function to map each element to some value
2654 # and extract an element-specific key value.
2655 #
2656 # * Group elements with the same key into a single element with
2657 # that key, transforming a multiply-keyed collection into a
2658 # uniquely-keyed collection.
2659 #
2660 # * Write the elements out to some data sink.
2661 #
2662 # Note that the Cloud Dataflow service may be used to run many different
2663 # types of jobs, not just Map-Reduce.
2664 "kind": "A String", # The kind of step in the Cloud Dataflow job.
2665 "properties": { # Named properties associated with the step. Each kind of
2666 # predefined step has its own required set of properties.
2667 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
2668 "a_key": "", # Properties of the object.
2669 },
2670 "name": "A String", # The name that identifies the step. This must be unique for each
2671 # step with respect to all other steps in the Cloud Dataflow job.
2672 },
2673 ],
2674 "currentStateTime": "A String", # The timestamp associated with the current state.
2675 "tempFiles": [ # A set of files the system should be aware of that are used
2676 # for temporary storage. These temporary files will be
2677 # removed on job completion.
2678 # No duplicates are allowed.
2679 # No file patterns are supported.
2680 #
2681 # The supported files are:
2682 #
2683 # Google Cloud Storage:
2684 #
2685 # storage.googleapis.com/{bucket}/{object}
2686 # bucket.storage.googleapis.com/{object}
2687 "A String",
2688 ],
2689 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
2690 # callers cannot mutate it.
2691 { # A message describing the state of a particular execution stage.
2692 "executionStageName": "A String", # The name of the execution stage.
2693 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
2694 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
2695 },
2696 ],
2697 "type": "A String", # The type of Cloud Dataflow job.
2698 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
2699 # Cloud Dataflow service.
2700 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
2701 # of the job it replaced.
2702 #
2703 # When sending a `CreateJobRequest`, you can update a job by specifying it
2704 # here. The job named here is stopped, and its intermediate state is
2705 # transferred to this job.
2706 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
2707 # isn't contained in the submitted job.
2708 "stages": { # A mapping from each stage to the information about that stage.
2709 "a_key": { # Contains information about how a particular
2710 # google.dataflow.v1beta3.Step will be executed.
2711 "stepName": [ # The steps associated with the execution stage.
2712 # Note that stages may have several steps, and that a given step
2713 # might be run by more than one stage.
2714 "A String",
2715 ],
2716 },
2717 },
2718 },
2719 }
2720
2721 x__xgafv: string, V1 error format.
2722 Allowed values
2723 1 - v1 error format
2724 2 - v2 error format
2725
2726 Returns:
2727 An object of the form:
2728
2729 { # Defines a job to be run by the Cloud Dataflow service.
2730 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
2731 # If this field is set, the service will ensure its uniqueness.
2732 # The request to create a job will fail if the service has knowledge of a
2733 # previously submitted job with the same client's ID and job name.
2734 # The caller may use this field to ensure idempotence of job
2735 # creation across retried attempts to create a job.
2736 # By default, the field is empty and, in that case, the service ignores it.
2737 "requestedState": "A String", # The job's requested state.
2738 #
2739 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
2740 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
2741 # also be used to directly set a job's requested state to
2742 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
2743 # job if it has not already reached a terminal state.
2744 "name": "A String", # The user-specified Cloud Dataflow job name.
2745 #
2746 # Only one Job with a given name may exist in a project at any
2747 # given time. If a caller attempts to create a Job with the same
2748 # name as an already-existing Job, the attempt returns the
2749 # existing Job.
2750 #
2751 # The name must match the regular expression
2752 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
2753 "location": "A String", # The location that contains this job.
2754 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
2755 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
2756 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
2757 "currentState": "A String", # The current state of the job.
2758 #
2759 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
2760 # specified.
2761 #
2762 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
2763 # terminal state. After a job has reached a terminal state, no
2764 # further state updates may be made.
2765 #
2766 # This field may be mutated by the Cloud Dataflow service;
2767 # callers cannot mutate it.
2768 "labels": { # User-defined labels for this job.
2769 #
2770 # The labels map can contain no more than 64 entries. Entries of the labels
2771 # map are UTF8 strings that comply with the following restrictions:
2772 #
2773 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
2774 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
2775 # * Both keys and values are additionally constrained to be <= 128 bytes in
2776 # size.
2777 "a_key": "A String",
2778 },
2779 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
2780 # corresponding name prefixes of the new job.
2781 "a_key": "A String",
2782 },
2783 "id": "A String", # The unique ID of this job.
2784 #
2785 # This field is set by the Cloud Dataflow service when the Job is
2786 # created, and is immutable for the life of the job.
2787 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
2788 "version": { # A structure describing which components and their versions of the service
2789 # are required in order to run the job.
2790 "a_key": "", # Properties of the object.
2791 },
2792 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2793 # storage. The system will append the suffix "/temp-{JOBNAME} to
2794 # this resource prefix, where {JOBNAME} is the value of the
2795 # job_name field. The resulting bucket and object prefix is used
2796 # as the prefix of the resources used to store temporary data
2797 # needed during the job execution. NOTE: This will override the
2798 # value in taskrunner_settings.
2799 # The supported resource type is:
2800 #
2801 # Google Cloud Storage:
2802 #
2803 # storage.googleapis.com/{bucket}/{object}
2804 # bucket.storage.googleapis.com/{object}
2805 "internalExperiments": { # Experimental settings.
2806 "a_key": "", # Properties of the object. Contains field @type with type URL.
2807 },
2808 "dataset": "A String", # The dataset for the current project where various workflow
2809 # related tables are stored.
2810 #
2811 # The supported resource type is:
2812 #
2813 # Google BigQuery:
2814 # bigquery.googleapis.com/{dataset}
2815 "experiments": [ # The list of experiments to enable.
2816 "A String",
2817 ],
2818 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
2819 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
2820 # options are passed through the service and are used to recreate the
2821 # SDK pipeline options on the worker in a language agnostic and platform
2822 # independent way.
2823 "a_key": "", # Properties of the object.
2824 },
2825 "userAgent": { # A description of the process that generated the request.
2826 "a_key": "", # Properties of the object.
2827 },
2828 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
2829 # unspecified, the service will attempt to choose a reasonable
2830 # default. This should be in the form of the API service name,
2831 # e.g. "compute.googleapis.com".
2832 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
2833 # specified in order for the job to have workers.
2834 { # Describes one particular pool of Cloud Dataflow workers to be
2835 # instantiated by the Cloud Dataflow service in order to perform the
2836 # computations required by a job. Note that a workflow job may use
2837 # multiple pools, in order to match the various computational
2838 # requirements of the various stages of the job.
2839 "diskSourceImage": "A String", # Fully qualified source image for disks.
2840 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
2841 # using the standard Dataflow task runner. Users should ignore
2842 # this field.
2843 "workflowFileName": "A String", # The file to store the workflow in.
2844 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
2845 # will not be uploaded.
2846 #
2847 # The supported resource type is:
2848 #
2849 # Google Cloud Storage:
2850 # storage.googleapis.com/{bucket}/{object}
2851 # bucket.storage.googleapis.com/{object}
2852 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
2853 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
2854 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
2855 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
2856 # "shuffle/v1beta1".
2857 "workerId": "A String", # The ID of the worker running this pipeline.
2858 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
2859 #
2860 # When workers access Google Cloud APIs, they logically do so via
2861 # relative URLs. If this field is specified, it supplies the base
2862 # URL to use for resolving these relative URLs. The normative
2863 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2864 # Locators".
2865 #
2866 # If not specified, the default value is "http://www.googleapis.com/"
2867 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
2868 # "dataflow/v1b3/projects".
2869 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2870 # storage.
2871 #
2872 # The supported resource type is:
2873 #
2874 # Google Cloud Storage:
2875 #
2876 # storage.googleapis.com/{bucket}/{object}
2877 # bucket.storage.googleapis.com/{object}
2878 },
2879 "vmId": "A String", # The ID string of the VM.
2880 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
2881 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
2882 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
2883 # access the Cloud Dataflow API.
2884 "A String",
2885 ],
2886 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
2887 # taskrunner; e.g. "root".
2888 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
2889 #
2890 # When workers access Google Cloud APIs, they logically do so via
2891 # relative URLs. If this field is specified, it supplies the base
2892 # URL to use for resolving these relative URLs. The normative
2893 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2894 # Locators".
2895 #
2896 # If not specified, the default value is "http://www.googleapis.com/"
2897 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
2898 # taskrunner; e.g. "wheel".
2899 "languageHint": "A String", # The suggested backend language.
2900 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
2901 # console.
2902 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
2903 "logDir": "A String", # The directory on the VM to store logs.
2904 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
2905 "harnessCommand": "A String", # The command to launch the worker harness.
2906 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
2907 # temporary storage.
2908 #
2909 # The supported resource type is:
2910 #
2911 # Google Cloud Storage:
2912 # storage.googleapis.com/{bucket}/{object}
2913 # bucket.storage.googleapis.com/{object}
2914 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
2915 },
2916 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
2917 # are supported.
2918 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
2919 # service will attempt to choose a reasonable default.
2920 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
2921 # the service will use the network "default".
2922 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
2923 # will attempt to choose a reasonable default.
2924 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
2925 # attempt to choose a reasonable default.
2926 "dataDisks": [ # Data disks that are used by a VM in this workflow.
2927 { # Describes the data disk used by a workflow job.
2928 "mountPoint": "A String", # Directory in a VM where disk is mounted.
2929 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
2930 # attempt to choose a reasonable default.
2931 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
2932 # must be a disk type appropriate to the project and zone in which
2933 # the workers will run. If unknown or unspecified, the service
2934 # will attempt to choose a reasonable default.
2935 #
2936 # For example, the standard persistent disk type is a resource name
2937 # typically ending in "pd-standard". If SSD persistent disks are
2938 # available, the resource name typically ends with "pd-ssd". The
2939 # actual valid values are defined the Google Compute Engine API,
2940 # not by the Cloud Dataflow API; consult the Google Compute Engine
2941 # documentation for more information about determining the set of
2942 # available disk types for a particular project and zone.
2943 #
2944 # Google Compute Engine Disk types are local to a particular
2945 # project in a particular zone, and so the resource name will
2946 # typically look something like this:
2947 #
2948 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
2949 },
2950 ],
2951 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
2952 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
2953 # `TEARDOWN_NEVER`.
2954 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
2955 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
2956 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
2957 # down.
2958 #
2959 # If the workers are not torn down by the service, they will
2960 # continue to run and use Google Compute Engine VM resources in the
2961 # user's project until they are explicitly terminated by the user.
2962 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
2963 # policy except for small, manually supervised test jobs.
2964 #
2965 # If unknown or unspecified, the service will attempt to choose a reasonable
2966 # default.
2967 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
2968 # Compute Engine API.
2969 "ipConfiguration": "A String", # Configuration for VM IPs.
2970 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
2971 # service will choose a number of threads (according to the number of cores
2972 # on the selected machine type for batch, or 1 by convention for streaming).
2973 "poolArgs": { # Extra arguments for this worker pool.
2974 "a_key": "", # Properties of the object. Contains field @type with type URL.
2975 },
2976 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
2977 # execute the job. If zero or unspecified, the service will
2978 # attempt to choose a reasonable default.
2979 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
2980 # harness, residing in Google Container Registry.
2981 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
2982 # the form "regions/REGION/subnetworks/SUBNETWORK".
2983 "packages": [ # Packages to be installed on workers.
2984 { # The packages that must be installed in order for a worker to run the
2985 # steps of the Cloud Dataflow job that will be assigned to its worker
2986 # pool.
2987 #
2988 # This is the mechanism by which the Cloud Dataflow SDK causes code to
2989 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
2990 # might use this to install jars containing the user's code and all of the
2991 # various dependencies (libraries, data files, etc.) required in order
2992 # for that code to run.
2993 "location": "A String", # The resource to read the package from. The supported resource type is:
2994 #
2995 # Google Cloud Storage:
2996 #
2997 # storage.googleapis.com/{bucket}
2998 # bucket.storage.googleapis.com/
2999 "name": "A String", # The name of the package.
3000 },
3001 ],
3002 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
3003 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
3004 "algorithm": "A String", # The algorithm to use for autoscaling.
3005 },
3006 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
3007 # select a default set of packages which are useful to worker
3008 # harnesses written in a particular language.
3009 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
3010 # attempt to choose a reasonable default.
3011 "metadata": { # Metadata to set on the Google Compute Engine VMs.
3012 "a_key": "A String",
3013 },
3014 },
3015 ],
3016 },
3017 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
3018 # A description of the user pipeline and stages through which it is executed.
3019 # Created by Cloud Dataflow service. Only retrieved with
3020 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
3021 # form. This data is provided by the Dataflow service for ease of visualizing
3022 # the pipeline and interpretting Dataflow provided metrics.
3023 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
3024 { # Description of the type, names/ids, and input/outputs for a transform.
3025 "kind": "A String", # Type of transform.
3026 "name": "A String", # User provided name for this transform instance.
3027 "inputCollectionName": [ # User names for all collection inputs to this transform.
3028 "A String",
3029 ],
3030 "displayData": [ # Transform-specific display data.
3031 { # Data provided with a pipeline or transform to provide descriptive info.
3032 "shortStrValue": "A String", # A possible additional shorter value to display.
3033 # For example a java_class_name_value of com.mypackage.MyDoFn
3034 # will be stored with MyDoFn as the short_str_value and
3035 # com.mypackage.MyDoFn as the java_class_name value.
3036 # short_str_value can be displayed and java_class_name_value
3037 # will be displayed as a tooltip.
3038 "durationValue": "A String", # Contains value if the data is of duration type.
3039 "url": "A String", # An optional full URL.
3040 "floatValue": 3.14, # Contains value if the data is of float type.
3041 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
3042 # language namespace (i.e. python module) which defines the display data.
3043 # This allows a dax monitoring system to specially handle the data
3044 # and perform custom rendering.
3045 "javaClassValue": "A String", # Contains value if the data is of java class type.
3046 "label": "A String", # An optional label to display in a dax UI for the element.
3047 "boolValue": True or False, # Contains value if the data is of a boolean type.
3048 "strValue": "A String", # Contains value if the data is of string type.
3049 "key": "A String", # The key identifying the display data.
3050 # This is intended to be used as a label for the display data
3051 # when viewed in a dax monitoring system.
3052 "int64Value": "A String", # Contains value if the data is of int64 type.
3053 "timestampValue": "A String", # Contains value if the data is of timestamp type.
3054 },
3055 ],
3056 "outputCollectionName": [ # User names for all collection outputs to this transform.
3057 "A String",
3058 ],
3059 "id": "A String", # SDK generated id of this transform instance.
3060 },
3061 ],
3062 "displayData": [ # Pipeline level display data.
3063 { # Data provided with a pipeline or transform to provide descriptive info.
3064 "shortStrValue": "A String", # A possible additional shorter value to display.
3065 # For example a java_class_name_value of com.mypackage.MyDoFn
3066 # will be stored with MyDoFn as the short_str_value and
3067 # com.mypackage.MyDoFn as the java_class_name value.
3068 # short_str_value can be displayed and java_class_name_value
3069 # will be displayed as a tooltip.
3070 "durationValue": "A String", # Contains value if the data is of duration type.
3071 "url": "A String", # An optional full URL.
3072 "floatValue": 3.14, # Contains value if the data is of float type.
3073 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
3074 # language namespace (i.e. python module) which defines the display data.
3075 # This allows a dax monitoring system to specially handle the data
3076 # and perform custom rendering.
3077 "javaClassValue": "A String", # Contains value if the data is of java class type.
3078 "label": "A String", # An optional label to display in a dax UI for the element.
3079 "boolValue": True or False, # Contains value if the data is of a boolean type.
3080 "strValue": "A String", # Contains value if the data is of string type.
3081 "key": "A String", # The key identifying the display data.
3082 # This is intended to be used as a label for the display data
3083 # when viewed in a dax monitoring system.
3084 "int64Value": "A String", # Contains value if the data is of int64 type.
3085 "timestampValue": "A String", # Contains value if the data is of timestamp type.
3086 },
3087 ],
3088 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
3089 { # Description of the composing transforms, names/ids, and input/outputs of a
3090 # stage of execution. Some composing transforms and sources may have been
3091 # generated by the Dataflow service during execution planning.
3092 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
3093 { # Description of an interstitial value between transforms in an execution
3094 # stage.
3095 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3096 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3097 # source is most closely associated.
3098 "name": "A String", # Dataflow service generated name for this source.
3099 },
3100 ],
3101 "kind": "A String", # Type of tranform this stage is executing.
3102 "name": "A String", # Dataflow service generated name for this stage.
3103 "outputSource": [ # Output sources for this stage.
3104 { # Description of an input or output of an execution stage.
3105 "userName": "A String", # Human-readable name for this source; may be user or system generated.
3106 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3107 # source is most closely associated.
3108 "name": "A String", # Dataflow service generated name for this source.
3109 "sizeBytes": "A String", # Size of the source, if measurable.
3110 },
3111 ],
3112 "inputSource": [ # Input sources for this stage.
3113 { # Description of an input or output of an execution stage.
3114 "userName": "A String", # Human-readable name for this source; may be user or system generated.
3115 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3116 # source is most closely associated.
3117 "name": "A String", # Dataflow service generated name for this source.
3118 "sizeBytes": "A String", # Size of the source, if measurable.
3119 },
3120 ],
3121 "componentTransform": [ # Transforms that comprise this execution stage.
3122 { # Description of a transform executed as part of an execution stage.
3123 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3124 "originalTransform": "A String", # User name for the original user transform with which this transform is
3125 # most closely associated.
3126 "name": "A String", # Dataflow service generated name for this source.
3127 },
3128 ],
3129 "id": "A String", # Dataflow service generated id for this stage.
3130 },
3131 ],
3132 },
3133 "steps": [ # The top-level steps that constitute the entire job.
3134 { # Defines a particular step within a Cloud Dataflow job.
3135 #
3136 # A job consists of multiple steps, each of which performs some
3137 # specific operation as part of the overall job. Data is typically
3138 # passed from one step to another as part of the job.
3139 #
3140 # Here's an example of a sequence of steps which together implement a
3141 # Map-Reduce job:
3142 #
3143 # * Read a collection of data from some source, parsing the
3144 # collection's elements.
3145 #
3146 # * Validate the elements.
3147 #
3148 # * Apply a user-defined function to map each element to some value
3149 # and extract an element-specific key value.
3150 #
3151 # * Group elements with the same key into a single element with
3152 # that key, transforming a multiply-keyed collection into a
3153 # uniquely-keyed collection.
3154 #
3155 # * Write the elements out to some data sink.
3156 #
3157 # Note that the Cloud Dataflow service may be used to run many different
3158 # types of jobs, not just Map-Reduce.
3159 "kind": "A String", # The kind of step in the Cloud Dataflow job.
3160 "properties": { # Named properties associated with the step. Each kind of
3161 # predefined step has its own required set of properties.
3162 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
3163 "a_key": "", # Properties of the object.
3164 },
3165 "name": "A String", # The name that identifies the step. This must be unique for each
3166 # step with respect to all other steps in the Cloud Dataflow job.
3167 },
3168 ],
3169 "currentStateTime": "A String", # The timestamp associated with the current state.
3170 "tempFiles": [ # A set of files the system should be aware of that are used
3171 # for temporary storage. These temporary files will be
3172 # removed on job completion.
3173 # No duplicates are allowed.
3174 # No file patterns are supported.
3175 #
3176 # The supported files are:
3177 #
3178 # Google Cloud Storage:
3179 #
3180 # storage.googleapis.com/{bucket}/{object}
3181 # bucket.storage.googleapis.com/{object}
3182 "A String",
3183 ],
3184 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
3185 # callers cannot mutate it.
3186 { # A message describing the state of a particular execution stage.
3187 "executionStageName": "A String", # The name of the execution stage.
3188 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
3189 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
3190 },
3191 ],
3192 "type": "A String", # The type of Cloud Dataflow job.
3193 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
3194 # Cloud Dataflow service.
3195 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
3196 # of the job it replaced.
3197 #
3198 # When sending a `CreateJobRequest`, you can update a job by specifying it
3199 # here. The job named here is stopped, and its intermediate state is
3200 # transferred to this job.
3201 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
3202 # isn't contained in the submitted job.
3203 "stages": { # A mapping from each stage to the information about that stage.
3204 "a_key": { # Contains information about how a particular
3205 # google.dataflow.v1beta3.Step will be executed.
3206 "stepName": [ # The steps associated with the execution stage.
3207 # Note that stages may have several steps, and that a given step
3208 # might be run by more than one stage.
3209 "A String",
3210 ],
3211 },
3212 },
3213 },
3214 }</pre>
3215 </div>
3216
3217 </body></html>