1 <html><body>
2 <style>
3
4 body, h1, h2, h3, div, span, p, pre, a {
5 margin: 0;
6 padding: 0;
7 border: 0;
8 font-weight: inherit;
9 font-style: inherit;
10 font-size: 100%;
11 font-family: inherit;
12 vertical-align: baseline;
13 }
14
15 body {
16 font-size: 13px;
17 padding: 1em;
18 }
19
20 h1 {
21 font-size: 26px;
22 margin-bottom: 1em;
23 }
24
25 h2 {
26 font-size: 24px;
27 margin-bottom: 1em;
28 }
29
30 h3 {
31 font-size: 20px;
32 margin-bottom: 1em;
33 margin-top: 1em;
34 }
35
36 pre, code {
37 line-height: 1.5;
38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39 }
40
41 pre {
42 margin-top: 0.5em;
43 }
44
45 h1, h2, h3, p {
46 font-family: Arial, sans serif;
47 }
48
49 h1, h2, h3 {
50 border-bottom: solid #CCC 1px;
51 }
52
53 .toc_element {
54 margin-top: 0.5em;
55 }
56
57 .firstline {
58 margin-left: 2 em;
59 }
60
61 .method {
62 margin-top: 1em;
63 border: solid 1px #CCC;
64 padding: 1em;
65 background: #EEE;
66 }
67
68 .details {
69 font-weight: bold;
70 font-size: 14px;
71 }
72
73 </style>
74
75 <h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.templates.html">templates</a></h1>
76 <h2>Instance Methods</h2>
77 <p class="toc_element">
78 <code><a href="#create">create(projectId, body, x__xgafv=None)</a></code></p>
79 <p class="firstline">Creates a Cloud Dataflow job from a template.</p>
80 <p class="toc_element">
81 <code><a href="#get">get(projectId, gcsPath=None, location=None, x__xgafv=None, view=None)</a></code></p>
82 <p class="firstline">Get the template associated with a template.</p>
83 <p class="toc_element">
84 <code><a href="#launch">launch(projectId, body, gcsPath=None, location=None, validateOnly=None, x__xgafv=None)</a></code></p>
85 <p class="firstline">Launch a template.</p>
86 <h3>Method Details</h3>
87 <div class="method">
88 <code class="details" id="create">create(projectId, body, x__xgafv=None)</code>
89 <pre>Creates a Cloud Dataflow job from a template.
90
91 Args:
92 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
93 body: object, The request body. (required)
94 The object takes the form of:
95
96 { # A request to create a Cloud Dataflow job from a template.
97 "environment": { # The environment values to set at runtime. # The runtime environment for the job.
98 "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
99 # template if not specified.
100 "zone": "A String", # The Compute Engine [availability
101 # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
102 # for launching worker instances to run your pipeline.
103 "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
104 # Use with caution.
105 "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
106 # Must be a valid Cloud Storage URL, beginning with `gs://`.
107 "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
108 "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
109 # available to your pipeline during execution, from 1 to 1000.
110 },
111 "gcsPath": "A String", # Required. A Cloud Storage path to the template from which to
112 # create the job.
113 # Must be a valid Cloud Storage URL, beginning with `gs://`.
114 "location": "A String", # The location to which to direct the request.
115 "parameters": { # The runtime parameters to pass to the job.
116 "a_key": "A String",
117 },
118 "jobName": "A String", # Required. The job name to use for the created job.
119 }
120
121 x__xgafv: string, V1 error format.
122 Allowed values
123 1 - v1 error format
124 2 - v2 error format
125
126 Returns:
127 An object of the form:
128
129 { # Defines a job to be run by the Cloud Dataflow service.
130 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
131 # If this field is set, the service will ensure its uniqueness.
132 # The request to create a job will fail if the service has knowledge of a
133 # previously submitted job with the same client's ID and job name.
134 # The caller may use this field to ensure idempotence of job
135 # creation across retried attempts to create a job.
136 # By default, the field is empty and, in that case, the service ignores it.
137 "requestedState": "A String", # The job's requested state.
138 #
139 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
140 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
141 # also be used to directly set a job's requested state to
142 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
143 # job if it has not already reached a terminal state.
144 "name": "A String", # The user-specified Cloud Dataflow job name.
145 #
146 # Only one Job with a given name may exist in a project at any
147 # given time. If a caller attempts to create a Job with the same
148 # name as an already-existing Job, the attempt returns the
149 # existing Job.
150 #
151 # The name must match the regular expression
152 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
153 "location": "A String", # The location that contains this job.
154 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
155 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
156 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
157 "currentState": "A String", # The current state of the job.
158 #
159 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
160 # specified.
161 #
162 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
163 # terminal state. After a job has reached a terminal state, no
164 # further state updates may be made.
165 #
166 # This field may be mutated by the Cloud Dataflow service;
167 # callers cannot mutate it.
168 "labels": { # User-defined labels for this job.
169 #
170 # The labels map can contain no more than 64 entries. Entries of the labels
171 # map are UTF8 strings that comply with the following restrictions:
172 #
173 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
174 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
175 # * Both keys and values are additionally constrained to be <= 128 bytes in
176 # size.
177 "a_key": "A String",
178 },
179 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
180 # corresponding name prefixes of the new job.
181 "a_key": "A String",
182 },
183 "id": "A String", # The unique ID of this job.
184 #
185 # This field is set by the Cloud Dataflow service when the Job is
186 # created, and is immutable for the life of the job.
187 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
188 "version": { # A structure describing which components and their versions of the service
189 # are required in order to run the job.
190 "a_key": "", # Properties of the object.
191 },
192 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
193 # storage. The system will append the suffix "/temp-{JOBNAME} to
194 # this resource prefix, where {JOBNAME} is the value of the
195 # job_name field. The resulting bucket and object prefix is used
196 # as the prefix of the resources used to store temporary data
197 # needed during the job execution. NOTE: This will override the
198 # value in taskrunner_settings.
199 # The supported resource type is:
200 #
201 # Google Cloud Storage:
202 #
203 # storage.googleapis.com/{bucket}/{object}
204 # bucket.storage.googleapis.com/{object}
205 "internalExperiments": { # Experimental settings.
206 "a_key": "", # Properties of the object. Contains field @type with type URL.
207 },
208 "dataset": "A String", # The dataset for the current project where various workflow
209 # related tables are stored.
210 #
211 # The supported resource type is:
212 #
213 # Google BigQuery:
214 # bigquery.googleapis.com/{dataset}
215 "experiments": [ # The list of experiments to enable.
216 "A String",
217 ],
218 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
219 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
220 # options are passed through the service and are used to recreate the
221 # SDK pipeline options on the worker in a language agnostic and platform
222 # independent way.
223 "a_key": "", # Properties of the object.
224 },
225 "userAgent": { # A description of the process that generated the request.
226 "a_key": "", # Properties of the object.
227 },
228 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
229 # unspecified, the service will attempt to choose a reasonable
230 # default. This should be in the form of the API service name,
231 # e.g. "compute.googleapis.com".
232 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
233 # specified in order for the job to have workers.
234 { # Describes one particular pool of Cloud Dataflow workers to be
235 # instantiated by the Cloud Dataflow service in order to perform the
236 # computations required by a job. Note that a workflow job may use
237 # multiple pools, in order to match the various computational
238 # requirements of the various stages of the job.
239 "diskSourceImage": "A String", # Fully qualified source image for disks.
240 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
241 # using the standard Dataflow task runner. Users should ignore
242 # this field.
243 "workflowFileName": "A String", # The file to store the workflow in.
244 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
245 # will not be uploaded.
246 #
247 # The supported resource type is:
248 #
249 # Google Cloud Storage:
250 # storage.googleapis.com/{bucket}/{object}
251 # bucket.storage.googleapis.com/{object}
252 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
253 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
254 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
255 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
256 # "shuffle/v1beta1".
257 "workerId": "A String", # The ID of the worker running this pipeline.
258 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
259 #
260 # When workers access Google Cloud APIs, they logically do so via
261 # relative URLs. If this field is specified, it supplies the base
262 # URL to use for resolving these relative URLs. The normative
263 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
264 # Locators".
265 #
266 # If not specified, the default value is "http://www.googleapis.com/"
267 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
268 # "dataflow/v1b3/projects".
269 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
270 # storage.
271 #
272 # The supported resource type is:
273 #
274 # Google Cloud Storage:
275 #
276 # storage.googleapis.com/{bucket}/{object}
277 # bucket.storage.googleapis.com/{object}
278 },
279 "vmId": "A String", # The ID string of the VM.
280 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
281 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
282 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
283 # access the Cloud Dataflow API.
284 "A String",
285 ],
286 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
287 # taskrunner; e.g. "root".
288 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
289 #
290 # When workers access Google Cloud APIs, they logically do so via
291 # relative URLs. If this field is specified, it supplies the base
292 # URL to use for resolving these relative URLs. The normative
293 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
294 # Locators".
295 #
296 # If not specified, the default value is "http://www.googleapis.com/"
297 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
298 # taskrunner; e.g. "wheel".
299 "languageHint": "A String", # The suggested backend language.
300 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
301 # console.
302 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
303 "logDir": "A String", # The directory on the VM to store logs.
304 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
305 "harnessCommand": "A String", # The command to launch the worker harness.
306 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
307 # temporary storage.
308 #
309 # The supported resource type is:
310 #
311 # Google Cloud Storage:
312 # storage.googleapis.com/{bucket}/{object}
313 # bucket.storage.googleapis.com/{object}
314 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
315 },
316 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
317 # are supported.
318 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
319 # service will attempt to choose a reasonable default.
320 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
321 # the service will use the network "default".
322 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
323 # will attempt to choose a reasonable default.
324 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
325 # attempt to choose a reasonable default.
326 "dataDisks": [ # Data disks that are used by a VM in this workflow.
327 { # Describes the data disk used by a workflow job.
328 "mountPoint": "A String", # Directory in a VM where disk is mounted.
329 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
330 # attempt to choose a reasonable default.
331 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
332 # must be a disk type appropriate to the project and zone in which
333 # the workers will run. If unknown or unspecified, the service
334 # will attempt to choose a reasonable default.
335 #
336 # For example, the standard persistent disk type is a resource name
337 # typically ending in "pd-standard". If SSD persistent disks are
338 # available, the resource name typically ends with "pd-ssd". The
339 # actual valid values are defined the Google Compute Engine API,
340 # not by the Cloud Dataflow API; consult the Google Compute Engine
341 # documentation for more information about determining the set of
342 # available disk types for a particular project and zone.
343 #
344 # Google Compute Engine Disk types are local to a particular
345 # project in a particular zone, and so the resource name will
346 # typically look something like this:
347 #
348 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
349 },
350 ],
351 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
352 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
353 # `TEARDOWN_NEVER`.
354 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
355 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
356 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
357 # down.
358 #
359 # If the workers are not torn down by the service, they will
360 # continue to run and use Google Compute Engine VM resources in the
361 # user's project until they are explicitly terminated by the user.
362 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
363 # policy except for small, manually supervised test jobs.
364 #
365 # If unknown or unspecified, the service will attempt to choose a reasonable
366 # default.
367 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
368 # Compute Engine API.
369 "ipConfiguration": "A String", # Configuration for VM IPs.
370 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
371 # service will choose a number of threads (according to the number of cores
372 # on the selected machine type for batch, or 1 by convention for streaming).
373 "poolArgs": { # Extra arguments for this worker pool.
374 "a_key": "", # Properties of the object. Contains field @type with type URL.
375 },
376 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
377 # execute the job. If zero or unspecified, the service will
378 # attempt to choose a reasonable default.
379 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
380 # harness, residing in Google Container Registry.
381 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
382 # the form "regions/REGION/subnetworks/SUBNETWORK".
383 "packages": [ # Packages to be installed on workers.
384 { # The packages that must be installed in order for a worker to run the
385 # steps of the Cloud Dataflow job that will be assigned to its worker
386 # pool.
387 #
388 # This is the mechanism by which the Cloud Dataflow SDK causes code to
389 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
390 # might use this to install jars containing the user's code and all of the
391 # various dependencies (libraries, data files, etc.) required in order
392 # for that code to run.
393 "location": "A String", # The resource to read the package from. The supported resource type is:
394 #
395 # Google Cloud Storage:
396 #
397 # storage.googleapis.com/{bucket}
398 # bucket.storage.googleapis.com/
399 "name": "A String", # The name of the package.
400 },
401 ],
402 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
403 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
404 "algorithm": "A String", # The algorithm to use for autoscaling.
405 },
406 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
407 # select a default set of packages which are useful to worker
408 # harnesses written in a particular language.
409 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
410 # attempt to choose a reasonable default.
411 "metadata": { # Metadata to set on the Google Compute Engine VMs.
412 "a_key": "A String",
413 },
414 },
415 ],
416 },
417 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
418 # A description of the user pipeline and stages through which it is executed.
419 # Created by Cloud Dataflow service. Only retrieved with
420 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
421 # form. This data is provided by the Dataflow service for ease of visualizing
422 # the pipeline and interpretting Dataflow provided metrics.
423 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
424 { # Description of the type, names/ids, and input/outputs for a transform.
425 "kind": "A String", # Type of transform.
426 "name": "A String", # User provided name for this transform instance.
427 "inputCollectionName": [ # User names for all collection inputs to this transform.
428 "A String",
429 ],
430 "displayData": [ # Transform-specific display data.
431 { # Data provided with a pipeline or transform to provide descriptive info.
432 "shortStrValue": "A String", # A possible additional shorter value to display.
433 # For example a java_class_name_value of com.mypackage.MyDoFn
434 # will be stored with MyDoFn as the short_str_value and
435 # com.mypackage.MyDoFn as the java_class_name value.
436 # short_str_value can be displayed and java_class_name_value
437 # will be displayed as a tooltip.
438 "durationValue": "A String", # Contains value if the data is of duration type.
439 "url": "A String", # An optional full URL.
440 "floatValue": 3.14, # Contains value if the data is of float type.
441 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
442 # language namespace (i.e. python module) which defines the display data.
443 # This allows a dax monitoring system to specially handle the data
444 # and perform custom rendering.
445 "javaClassValue": "A String", # Contains value if the data is of java class type.
446 "label": "A String", # An optional label to display in a dax UI for the element.
447 "boolValue": True or False, # Contains value if the data is of a boolean type.
448 "strValue": "A String", # Contains value if the data is of string type.
449 "key": "A String", # The key identifying the display data.
450 # This is intended to be used as a label for the display data
451 # when viewed in a dax monitoring system.
452 "int64Value": "A String", # Contains value if the data is of int64 type.
453 "timestampValue": "A String", # Contains value if the data is of timestamp type.
454 },
455 ],
456 "outputCollectionName": [ # User names for all collection outputs to this transform.
457 "A String",
458 ],
459 "id": "A String", # SDK generated id of this transform instance.
460 },
461 ],
462 "displayData": [ # Pipeline level display data.
463 { # Data provided with a pipeline or transform to provide descriptive info.
464 "shortStrValue": "A String", # A possible additional shorter value to display.
465 # For example a java_class_name_value of com.mypackage.MyDoFn
466 # will be stored with MyDoFn as the short_str_value and
467 # com.mypackage.MyDoFn as the java_class_name value.
468 # short_str_value can be displayed and java_class_name_value
469 # will be displayed as a tooltip.
470 "durationValue": "A String", # Contains value if the data is of duration type.
471 "url": "A String", # An optional full URL.
472 "floatValue": 3.14, # Contains value if the data is of float type.
473 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
474 # language namespace (i.e. python module) which defines the display data.
475 # This allows a dax monitoring system to specially handle the data
476 # and perform custom rendering.
477 "javaClassValue": "A String", # Contains value if the data is of java class type.
478 "label": "A String", # An optional label to display in a dax UI for the element.
479 "boolValue": True or False, # Contains value if the data is of a boolean type.
480 "strValue": "A String", # Contains value if the data is of string type.
481 "key": "A String", # The key identifying the display data.
482 # This is intended to be used as a label for the display data
483 # when viewed in a dax monitoring system.
484 "int64Value": "A String", # Contains value if the data is of int64 type.
485 "timestampValue": "A String", # Contains value if the data is of timestamp type.
486 },
487 ],
488 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
489 { # Description of the composing transforms, names/ids, and input/outputs of a
490 # stage of execution. Some composing transforms and sources may have been
491 # generated by the Dataflow service during execution planning.
492 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
493 { # Description of an interstitial value between transforms in an execution
494 # stage.
495 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
496 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
497 # source is most closely associated.
498 "name": "A String", # Dataflow service generated name for this source.
499 },
500 ],
501 "kind": "A String", # Type of tranform this stage is executing.
502 "name": "A String", # Dataflow service generated name for this stage.
503 "outputSource": [ # Output sources for this stage.
504 { # Description of an input or output of an execution stage.
505 "userName": "A String", # Human-readable name for this source; may be user or system generated.
506 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
507 # source is most closely associated.
508 "name": "A String", # Dataflow service generated name for this source.
509 "sizeBytes": "A String", # Size of the source, if measurable.
510 },
511 ],
512 "inputSource": [ # Input sources for this stage.
513 { # Description of an input or output of an execution stage.
514 "userName": "A String", # Human-readable name for this source; may be user or system generated.
515 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
516 # source is most closely associated.
517 "name": "A String", # Dataflow service generated name for this source.
518 "sizeBytes": "A String", # Size of the source, if measurable.
519 },
520 ],
521 "componentTransform": [ # Transforms that comprise this execution stage.
522 { # Description of a transform executed as part of an execution stage.
523 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
524 "originalTransform": "A String", # User name for the original user transform with which this transform is
525 # most closely associated.
526 "name": "A String", # Dataflow service generated name for this source.
527 },
528 ],
529 "id": "A String", # Dataflow service generated id for this stage.
530 },
531 ],
532 },
533 "steps": [ # The top-level steps that constitute the entire job.
534 { # Defines a particular step within a Cloud Dataflow job.
535 #
536 # A job consists of multiple steps, each of which performs some
537 # specific operation as part of the overall job. Data is typically
538 # passed from one step to another as part of the job.
539 #
540 # Here's an example of a sequence of steps which together implement a
541 # Map-Reduce job:
542 #
543 # * Read a collection of data from some source, parsing the
544 # collection's elements.
545 #
546 # * Validate the elements.
547 #
548 # * Apply a user-defined function to map each element to some value
549 # and extract an element-specific key value.
550 #
551 # * Group elements with the same key into a single element with
552 # that key, transforming a multiply-keyed collection into a
553 # uniquely-keyed collection.
554 #
555 # * Write the elements out to some data sink.
556 #
557 # Note that the Cloud Dataflow service may be used to run many different
558 # types of jobs, not just Map-Reduce.
559 "kind": "A String", # The kind of step in the Cloud Dataflow job.
560 "properties": { # Named properties associated with the step. Each kind of
561 # predefined step has its own required set of properties.
562 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
563 "a_key": "", # Properties of the object.
564 },
565 "name": "A String", # The name that identifies the step. This must be unique for each
566 # step with respect to all other steps in the Cloud Dataflow job.
567 },
568 ],
569 "currentStateTime": "A String", # The timestamp associated with the current state.
570 "tempFiles": [ # A set of files the system should be aware of that are used
571 # for temporary storage. These temporary files will be
572 # removed on job completion.
573 # No duplicates are allowed.
574 # No file patterns are supported.
575 #
576 # The supported files are:
577 #
578 # Google Cloud Storage:
579 #
580 # storage.googleapis.com/{bucket}/{object}
581 # bucket.storage.googleapis.com/{object}
582 "A String",
583 ],
584 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
585 # callers cannot mutate it.
586 { # A message describing the state of a particular execution stage.
587 "executionStageName": "A String", # The name of the execution stage.
588 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
589 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
590 },
591 ],
592 "type": "A String", # The type of Cloud Dataflow job.
593 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
594 # Cloud Dataflow service.
595 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
596 # of the job it replaced.
597 #
598 # When sending a `CreateJobRequest`, you can update a job by specifying it
599 # here. The job named here is stopped, and its intermediate state is
600 # transferred to this job.
601 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
602 # isn't contained in the submitted job.
603 "stages": { # A mapping from each stage to the information about that stage.
604 "a_key": { # Contains information about how a particular
605 # google.dataflow.v1beta3.Step will be executed.
606 "stepName": [ # The steps associated with the execution stage.
607 # Note that stages may have several steps, and that a given step
608 # might be run by more than one stage.
609 "A String",
610 ],
611 },
612 },
613 },
614 }</pre>
615 </div>
616
617 <div class="method">
618 <code class="details" id="get">get(projectId, gcsPath=None, location=None, x__xgafv=None, view=None)</code>
619 <pre>Get the template associated with a template.
620
621 Args:
622 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
623 gcsPath: string, Required. A Cloud Storage path to the template from which to
624 create the job.
625 Must be a valid Cloud Storage URL, beginning with `gs://`.
626 location: string, The location to which to direct the request.
627 x__xgafv: string, V1 error format.
628 Allowed values
629 1 - v1 error format
630 2 - v2 error format
631 view: string, The view to retrieve. Defaults to METADATA_ONLY.
632
633 Returns:
634 An object of the form:
635
636 { # The response to a GetTemplate request.
637 "status": { # The `Status` type defines a logical error model that is suitable for different # The status of the get template request. Any problems with the
638 # request will be indicated in the error_details.
639 # programming environments, including REST APIs and RPC APIs. It is used by
640 # [gRPC](https://github.com/grpc). The error model is designed to be:
641 #
642 # - Simple to use and understand for most users
643 # - Flexible enough to meet unexpected needs
644 #
645 # # Overview
646 #
647 # The `Status` message contains three pieces of data: error code, error message,
648 # and error details. The error code should be an enum value of
649 # google.rpc.Code, but it may accept additional error codes if needed. The
650 # error message should be a developer-facing English message that helps
651 # developers *understand* and *resolve* the error. If a localized user-facing
652 # error message is needed, put the localized message in the error details or
653 # localize it in the client. The optional error details may contain arbitrary
654 # information about the error. There is a predefined set of error detail types
655 # in the package `google.rpc` that can be used for common error conditions.
656 #
657 # # Language mapping
658 #
659 # The `Status` message is the logical representation of the error model, but it
660 # is not necessarily the actual wire format. When the `Status` message is
661 # exposed in different client libraries and different wire protocols, it can be
662 # mapped differently. For example, it will likely be mapped to some exceptions
663 # in Java, but more likely mapped to some error codes in C.
664 #
665 # # Other uses
666 #
667 # The error model and the `Status` message can be used in a variety of
668 # environments, either with or without APIs, to provide a
669 # consistent developer experience across different environments.
670 #
671 # Example uses of this error model include:
672 #
673 # - Partial errors. If a service needs to return partial errors to the client,
674 # it may embed the `Status` in the normal response to indicate the partial
675 # errors.
676 #
677 # - Workflow errors. A typical workflow has multiple steps. Each step may
678 # have a `Status` message for error reporting.
679 #
680 # - Batch operations. If a client uses batch request and batch response, the
681 # `Status` message should be used directly inside batch response, one for
682 # each error sub-response.
683 #
684 # - Asynchronous operations. If an API call embeds asynchronous operation
685 # results in its response, the status of those operations should be
686 # represented directly using the `Status` message.
687 #
688 # - Logging. If some API errors are stored in logs, the message `Status` could
689 # be used directly after any stripping needed for security/privacy reasons.
690 "message": "A String", # A developer-facing error message, which should be in English. Any
691 # user-facing error message should be localized and sent in the
692 # google.rpc.Status.details field, or localized by the client.
693 "code": 42, # The status code, which should be an enum value of google.rpc.Code.
694 "details": [ # A list of messages that carry the error details. There will be a
695 # common set of message types for APIs to use.
696 {
697 "a_key": "", # Properties of the object. Contains field @type with type URL.
698 },
699 ],
700 },
701 "metadata": { # Metadata describing a template. # The template metadata describing the template name, available
702 # parameters, etc.
703 "name": "A String", # Required. The name of the template.
704 "parameters": [ # The parameters for the template.
705 { # Metadata for a specific parameter.
706 "regexes": [ # Optional. Regexes that the parameter must match.
707 "A String",
708 ],
709 "helpText": "A String", # Required. The help text to display for the parameter.
710 "name": "A String", # Required. The name of the parameter.
711 "isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.
712 "label": "A String", # Required. The label to display for the parameter.
713 },
714 ],
715 "description": "A String", # Optional. A description of the template.
716 },
717 }</pre>
718 </div>
719
720 <div class="method">
721 <code class="details" id="launch">launch(projectId, body, gcsPath=None, location=None, validateOnly=None, x__xgafv=None)</code>
722 <pre>Launch a template.
723
724 Args:
725 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
726 body: object, The request body. (required)
727 The object takes the form of:
728
729 { # Parameters to provide to the template being launched.
730 "environment": { # The environment values to set at runtime. # The runtime environment for the job.
731 "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
732 # template if not specified.
733 "zone": "A String", # The Compute Engine [availability
734 # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
735 # for launching worker instances to run your pipeline.
736 "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
737 # Use with caution.
738 "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
739 # Must be a valid Cloud Storage URL, beginning with `gs://`.
740 "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
741 "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
742 # available to your pipeline during execution, from 1 to 1000.
743 },
744 "parameters": { # The runtime parameters to pass to the job.
745 "a_key": "A String",
746 },
747 "jobName": "A String", # Required. The job name to use for the created job.
748 }
749
750 gcsPath: string, Required. A Cloud Storage path to the template from which to create
751 the job.
752 Must be valid Cloud Storage URL, beginning with 'gs://'.
753 location: string, The location to which to direct the request.
754 validateOnly: boolean, If true, the request is validated but not actually executed.
755 Defaults to false.
756 x__xgafv: string, V1 error format.
757 Allowed values
758 1 - v1 error format
759 2 - v2 error format
760
761 Returns:
762 An object of the form:
763
764 { # Response to the request to launch a template.
765 "job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and
766 # the job was successfully launched.
767 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
768 # If this field is set, the service will ensure its uniqueness.
769 # The request to create a job will fail if the service has knowledge of a
770 # previously submitted job with the same client's ID and job name.
771 # The caller may use this field to ensure idempotence of job
772 # creation across retried attempts to create a job.
773 # By default, the field is empty and, in that case, the service ignores it.
774 "requestedState": "A String", # The job's requested state.
775 #
776 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
777 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
778 # also be used to directly set a job's requested state to
779 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
780 # job if it has not already reached a terminal state.
781 "name": "A String", # The user-specified Cloud Dataflow job name.
782 #
783 # Only one Job with a given name may exist in a project at any
784 # given time. If a caller attempts to create a Job with the same
785 # name as an already-existing Job, the attempt returns the
786 # existing Job.
787 #
788 # The name must match the regular expression
789 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
790 "location": "A String", # The location that contains this job.
791 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
792 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
793 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
794 "currentState": "A String", # The current state of the job.
795 #
796 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
797 # specified.
798 #
799 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
800 # terminal state. After a job has reached a terminal state, no
801 # further state updates may be made.
802 #
803 # This field may be mutated by the Cloud Dataflow service;
804 # callers cannot mutate it.
805 "labels": { # User-defined labels for this job.
806 #
807 # The labels map can contain no more than 64 entries. Entries of the labels
808 # map are UTF8 strings that comply with the following restrictions:
809 #
810 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
811 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
812 # * Both keys and values are additionally constrained to be <= 128 bytes in
813 # size.
814 "a_key": "A String",
815 },
816 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
817 # corresponding name prefixes of the new job.
818 "a_key": "A String",
819 },
820 "id": "A String", # The unique ID of this job.
821 #
822 # This field is set by the Cloud Dataflow service when the Job is
823 # created, and is immutable for the life of the job.
824 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
825 "version": { # A structure describing which components and their versions of the service
826 # are required in order to run the job.
827 "a_key": "", # Properties of the object.
828 },
829 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
830 # storage. The system will append the suffix "/temp-{JOBNAME} to
831 # this resource prefix, where {JOBNAME} is the value of the
832 # job_name field. The resulting bucket and object prefix is used
833 # as the prefix of the resources used to store temporary data
834 # needed during the job execution. NOTE: This will override the
835 # value in taskrunner_settings.
836 # The supported resource type is:
837 #
838 # Google Cloud Storage:
839 #
840 # storage.googleapis.com/{bucket}/{object}
841 # bucket.storage.googleapis.com/{object}
842 "internalExperiments": { # Experimental settings.
843 "a_key": "", # Properties of the object. Contains field @type with type URL.
844 },
845 "dataset": "A String", # The dataset for the current project where various workflow
846 # related tables are stored.
847 #
848 # The supported resource type is:
849 #
850 # Google BigQuery:
851 # bigquery.googleapis.com/{dataset}
852 "experiments": [ # The list of experiments to enable.
853 "A String",
854 ],
855 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
856 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
857 # options are passed through the service and are used to recreate the
858 # SDK pipeline options on the worker in a language agnostic and platform
859 # independent way.
860 "a_key": "", # Properties of the object.
861 },
862 "userAgent": { # A description of the process that generated the request.
863 "a_key": "", # Properties of the object.
864 },
865 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
866 # unspecified, the service will attempt to choose a reasonable
867 # default. This should be in the form of the API service name,
868 # e.g. "compute.googleapis.com".
869 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
870 # specified in order for the job to have workers.
871 { # Describes one particular pool of Cloud Dataflow workers to be
872 # instantiated by the Cloud Dataflow service in order to perform the
873 # computations required by a job. Note that a workflow job may use
874 # multiple pools, in order to match the various computational
875 # requirements of the various stages of the job.
876 "diskSourceImage": "A String", # Fully qualified source image for disks.
877 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
878 # using the standard Dataflow task runner. Users should ignore
879 # this field.
880 "workflowFileName": "A String", # The file to store the workflow in.
881 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
882 # will not be uploaded.
883 #
884 # The supported resource type is:
885 #
886 # Google Cloud Storage:
887 # storage.googleapis.com/{bucket}/{object}
888 # bucket.storage.googleapis.com/{object}
889 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
890 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
891 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
892 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
893 # "shuffle/v1beta1".
894 "workerId": "A String", # The ID of the worker running this pipeline.
895 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
896 #
897 # When workers access Google Cloud APIs, they logically do so via
898 # relative URLs. If this field is specified, it supplies the base
899 # URL to use for resolving these relative URLs. The normative
900 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
901 # Locators".
902 #
903 # If not specified, the default value is "http://www.googleapis.com/"
904 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
905 # "dataflow/v1b3/projects".
906 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
907 # storage.
908 #
909 # The supported resource type is:
910 #
911 # Google Cloud Storage:
912 #
913 # storage.googleapis.com/{bucket}/{object}
914 # bucket.storage.googleapis.com/{object}
915 },
916 "vmId": "A String", # The ID string of the VM.
917 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
918 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
919 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
920 # access the Cloud Dataflow API.
921 "A String",
922 ],
923 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
924 # taskrunner; e.g. "root".
925 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
926 #
927 # When workers access Google Cloud APIs, they logically do so via
928 # relative URLs. If this field is specified, it supplies the base
929 # URL to use for resolving these relative URLs. The normative
930 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
931 # Locators".
932 #
933 # If not specified, the default value is "http://www.googleapis.com/"
934 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
935 # taskrunner; e.g. "wheel".
936 "languageHint": "A String", # The suggested backend language.
937 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
938 # console.
939 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
940 "logDir": "A String", # The directory on the VM to store logs.
941 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
942 "harnessCommand": "A String", # The command to launch the worker harness.
943 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
944 # temporary storage.
945 #
946 # The supported resource type is:
947 #
948 # Google Cloud Storage:
949 # storage.googleapis.com/{bucket}/{object}
950 # bucket.storage.googleapis.com/{object}
951 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
952 },
953 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
954 # are supported.
955 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
956 # service will attempt to choose a reasonable default.
957 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
958 # the service will use the network "default".
959 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
960 # will attempt to choose a reasonable default.
961 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
962 # attempt to choose a reasonable default.
963 "dataDisks": [ # Data disks that are used by a VM in this workflow.
964 { # Describes the data disk used by a workflow job.
965 "mountPoint": "A String", # Directory in a VM where disk is mounted.
966 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
967 # attempt to choose a reasonable default.
968 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
969 # must be a disk type appropriate to the project and zone in which
970 # the workers will run. If unknown or unspecified, the service
971 # will attempt to choose a reasonable default.
972 #
973 # For example, the standard persistent disk type is a resource name
974 # typically ending in "pd-standard". If SSD persistent disks are
975 # available, the resource name typically ends with "pd-ssd". The
976 # actual valid values are defined the Google Compute Engine API,
977 # not by the Cloud Dataflow API; consult the Google Compute Engine
978 # documentation for more information about determining the set of
979 # available disk types for a particular project and zone.
980 #
981 # Google Compute Engine Disk types are local to a particular
982 # project in a particular zone, and so the resource name will
983 # typically look something like this:
984 #
985 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
986 },
987 ],
988 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
989 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
990 # `TEARDOWN_NEVER`.
991 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
992 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
993 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
994 # down.
995 #
996 # If the workers are not torn down by the service, they will
997 # continue to run and use Google Compute Engine VM resources in the
998 # user's project until they are explicitly terminated by the user.
999 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1000 # policy except for small, manually supervised test jobs.
1001 #
1002 # If unknown or unspecified, the service will attempt to choose a reasonable
1003 # default.
1004 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1005 # Compute Engine API.
1006 "ipConfiguration": "A String", # Configuration for VM IPs.
1007 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1008 # service will choose a number of threads (according to the number of cores
1009 # on the selected machine type for batch, or 1 by convention for streaming).
1010 "poolArgs": { # Extra arguments for this worker pool.
1011 "a_key": "", # Properties of the object. Contains field @type with type URL.
1012 },
1013 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1014 # execute the job. If zero or unspecified, the service will
1015 # attempt to choose a reasonable default.
1016 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1017 # harness, residing in Google Container Registry.
1018 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
1019 # the form "regions/REGION/subnetworks/SUBNETWORK".
1020 "packages": [ # Packages to be installed on workers.
1021 { # The packages that must be installed in order for a worker to run the
1022 # steps of the Cloud Dataflow job that will be assigned to its worker
1023 # pool.
1024 #
1025 # This is the mechanism by which the Cloud Dataflow SDK causes code to
1026 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1027 # might use this to install jars containing the user's code and all of the
1028 # various dependencies (libraries, data files, etc.) required in order
1029 # for that code to run.
1030 "location": "A String", # The resource to read the package from. The supported resource type is:
1031 #
1032 # Google Cloud Storage:
1033 #
1034 # storage.googleapis.com/{bucket}
1035 # bucket.storage.googleapis.com/
1036 "name": "A String", # The name of the package.
1037 },
1038 ],
1039 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1040 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1041 "algorithm": "A String", # The algorithm to use for autoscaling.
1042 },
1043 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
1044 # select a default set of packages which are useful to worker
1045 # harnesses written in a particular language.
1046 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
1047 # attempt to choose a reasonable default.
1048 "metadata": { # Metadata to set on the Google Compute Engine VMs.
1049 "a_key": "A String",
1050 },
1051 },
1052 ],
1053 },
1054 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1055 # A description of the user pipeline and stages through which it is executed.
1056 # Created by Cloud Dataflow service. Only retrieved with
1057 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1058 # form. This data is provided by the Dataflow service for ease of visualizing
1059 # the pipeline and interpretting Dataflow provided metrics.
1060 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1061 { # Description of the type, names/ids, and input/outputs for a transform.
1062 "kind": "A String", # Type of transform.
1063 "name": "A String", # User provided name for this transform instance.
1064 "inputCollectionName": [ # User names for all collection inputs to this transform.
1065 "A String",
1066 ],
1067 "displayData": [ # Transform-specific display data.
1068 { # Data provided with a pipeline or transform to provide descriptive info.
1069 "shortStrValue": "A String", # A possible additional shorter value to display.
1070 # For example a java_class_name_value of com.mypackage.MyDoFn
1071 # will be stored with MyDoFn as the short_str_value and
1072 # com.mypackage.MyDoFn as the java_class_name value.
1073 # short_str_value can be displayed and java_class_name_value
1074 # will be displayed as a tooltip.
1075 "durationValue": "A String", # Contains value if the data is of duration type.
1076 "url": "A String", # An optional full URL.
1077 "floatValue": 3.14, # Contains value if the data is of float type.
1078 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1079 # language namespace (i.e. python module) which defines the display data.
1080 # This allows a dax monitoring system to specially handle the data
1081 # and perform custom rendering.
1082 "javaClassValue": "A String", # Contains value if the data is of java class type.
1083 "label": "A String", # An optional label to display in a dax UI for the element.
1084 "boolValue": True or False, # Contains value if the data is of a boolean type.
1085 "strValue": "A String", # Contains value if the data is of string type.
1086 "key": "A String", # The key identifying the display data.
1087 # This is intended to be used as a label for the display data
1088 # when viewed in a dax monitoring system.
1089 "int64Value": "A String", # Contains value if the data is of int64 type.
1090 "timestampValue": "A String", # Contains value if the data is of timestamp type.
1091 },
1092 ],
1093 "outputCollectionName": [ # User names for all collection outputs to this transform.
1094 "A String",
1095 ],
1096 "id": "A String", # SDK generated id of this transform instance.
1097 },
1098 ],
1099 "displayData": [ # Pipeline level display data.
1100 { # Data provided with a pipeline or transform to provide descriptive info.
1101 "shortStrValue": "A String", # A possible additional shorter value to display.
1102 # For example a java_class_name_value of com.mypackage.MyDoFn
1103 # will be stored with MyDoFn as the short_str_value and
1104 # com.mypackage.MyDoFn as the java_class_name value.
1105 # short_str_value can be displayed and java_class_name_value
1106 # will be displayed as a tooltip.
1107 "durationValue": "A String", # Contains value if the data is of duration type.
1108 "url": "A String", # An optional full URL.
1109 "floatValue": 3.14, # Contains value if the data is of float type.
1110 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1111 # language namespace (i.e. python module) which defines the display data.
1112 # This allows a dax monitoring system to specially handle the data
1113 # and perform custom rendering.
1114 "javaClassValue": "A String", # Contains value if the data is of java class type.
1115 "label": "A String", # An optional label to display in a dax UI for the element.
1116 "boolValue": True or False, # Contains value if the data is of a boolean type.
1117 "strValue": "A String", # Contains value if the data is of string type.
1118 "key": "A String", # The key identifying the display data.
1119 # This is intended to be used as a label for the display data
1120 # when viewed in a dax monitoring system.
1121 "int64Value": "A String", # Contains value if the data is of int64 type.
1122 "timestampValue": "A String", # Contains value if the data is of timestamp type.
1123 },
1124 ],
1125 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
1126 { # Description of the composing transforms, names/ids, and input/outputs of a
1127 # stage of execution. Some composing transforms and sources may have been
1128 # generated by the Dataflow service during execution planning.
1129 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
1130 { # Description of an interstitial value between transforms in an execution
1131 # stage.
1132 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1133 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1134 # source is most closely associated.
1135 "name": "A String", # Dataflow service generated name for this source.
1136 },
1137 ],
1138 "kind": "A String", # Type of tranform this stage is executing.
1139 "name": "A String", # Dataflow service generated name for this stage.
1140 "outputSource": [ # Output sources for this stage.
1141 { # Description of an input or output of an execution stage.
1142 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1143 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1144 # source is most closely associated.
1145 "name": "A String", # Dataflow service generated name for this source.
1146 "sizeBytes": "A String", # Size of the source, if measurable.
1147 },
1148 ],
1149 "inputSource": [ # Input sources for this stage.
1150 { # Description of an input or output of an execution stage.
1151 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1152 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1153 # source is most closely associated.
1154 "name": "A String", # Dataflow service generated name for this source.
1155 "sizeBytes": "A String", # Size of the source, if measurable.
1156 },
1157 ],
1158 "componentTransform": [ # Transforms that comprise this execution stage.
1159 { # Description of a transform executed as part of an execution stage.
1160 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1161 "originalTransform": "A String", # User name for the original user transform with which this transform is
1162 # most closely associated.
1163 "name": "A String", # Dataflow service generated name for this source.
1164 },
1165 ],
1166 "id": "A String", # Dataflow service generated id for this stage.
1167 },
1168 ],
1169 },
1170 "steps": [ # The top-level steps that constitute the entire job.
1171 { # Defines a particular step within a Cloud Dataflow job.
1172 #
1173 # A job consists of multiple steps, each of which performs some
1174 # specific operation as part of the overall job. Data is typically
1175 # passed from one step to another as part of the job.
1176 #
1177 # Here's an example of a sequence of steps which together implement a
1178 # Map-Reduce job:
1179 #
1180 # * Read a collection of data from some source, parsing the
1181 # collection's elements.
1182 #
1183 # * Validate the elements.
1184 #
1185 # * Apply a user-defined function to map each element to some value
1186 # and extract an element-specific key value.
1187 #
1188 # * Group elements with the same key into a single element with
1189 # that key, transforming a multiply-keyed collection into a
1190 # uniquely-keyed collection.
1191 #
1192 # * Write the elements out to some data sink.
1193 #
1194 # Note that the Cloud Dataflow service may be used to run many different
1195 # types of jobs, not just Map-Reduce.
1196 "kind": "A String", # The kind of step in the Cloud Dataflow job.
1197 "properties": { # Named properties associated with the step. Each kind of
1198 # predefined step has its own required set of properties.
1199 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
1200 "a_key": "", # Properties of the object.
1201 },
1202 "name": "A String", # The name that identifies the step. This must be unique for each
1203 # step with respect to all other steps in the Cloud Dataflow job.
1204 },
1205 ],
1206 "currentStateTime": "A String", # The timestamp associated with the current state.
1207 "tempFiles": [ # A set of files the system should be aware of that are used
1208 # for temporary storage. These temporary files will be
1209 # removed on job completion.
1210 # No duplicates are allowed.
1211 # No file patterns are supported.
1212 #
1213 # The supported files are:
1214 #
1215 # Google Cloud Storage:
1216 #
1217 # storage.googleapis.com/{bucket}/{object}
1218 # bucket.storage.googleapis.com/{object}
1219 "A String",
1220 ],
1221 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1222 # callers cannot mutate it.
1223 { # A message describing the state of a particular execution stage.
1224 "executionStageName": "A String", # The name of the execution stage.
1225 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1226 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1227 },
1228 ],
1229 "type": "A String", # The type of Cloud Dataflow job.
1230 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1231 # Cloud Dataflow service.
1232 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1233 # of the job it replaced.
1234 #
1235 # When sending a `CreateJobRequest`, you can update a job by specifying it
1236 # here. The job named here is stopped, and its intermediate state is
1237 # transferred to this job.
1238 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1239 # isn't contained in the submitted job.
1240 "stages": { # A mapping from each stage to the information about that stage.
1241 "a_key": { # Contains information about how a particular
1242 # google.dataflow.v1beta3.Step will be executed.
1243 "stepName": [ # The steps associated with the execution stage.
1244 # Note that stages may have several steps, and that a given step
1245 # might be run by more than one stage.
1246 "A String",
1247 ],
1248 },
1249 },
1250 },
1251 },
1252 }
1253