1 <html><body>
2 <style>
3
4 body, h1, h2, h3, div, span, p, pre, a {
5 margin: 0;
6 padding: 0;
7 border: 0;
8 font-weight: inherit;
9 font-style: inherit;
10 font-size: 100%;
11 font-family: inherit;
12 vertical-align: baseline;
13 }
14
15 body {
16 font-size: 13px;
17 padding: 1em;
18 }
19
20 h1 {
21 font-size: 26px;
22 margin-bottom: 1em;
23 }
24
25 h2 {
26 font-size: 24px;
27 margin-bottom: 1em;
28 }
29
30 h3 {
31 font-size: 20px;
32 margin-bottom: 1em;
33 margin-top: 1em;
34 }
35
36 pre, code {
37 line-height: 1.5;
38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39 }
40
41 pre {
42 margin-top: 0.5em;
43 }
44
45 h1, h2, h3, p {
46 font-family: Arial, sans serif;
47 }
48
49 h1, h2, h3 {
50 border-bottom: solid #CCC 1px;
51 }
52
53 .toc_element {
54 margin-top: 0.5em;
55 }
56
57 .firstline {
58 margin-left: 2 em;
59 }
60
61 .method {
62 margin-top: 1em;
63 border: solid 1px #CCC;
64 padding: 1em;
65 background: #EEE;
66 }
67
68 .details {
69 font-weight: bold;
70 font-size: 14px;
71 }
72
73 </style>
74
75 <h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.locations.html">locations</a> . <a href="dataflow_v1b3.projects.locations.templates.html">templates</a></h1>
76 <h2>Instance Methods</h2>
77 <p class="toc_element">
78 <code><a href="#create">create(projectId, location, body, x__xgafv=None)</a></code></p>
79 <p class="firstline">Creates a Cloud Dataflow job from a template.</p>
80 <p class="toc_element">
81 <code><a href="#get">get(projectId, location, gcsPath=None, x__xgafv=None, view=None)</a></code></p>
82 <p class="firstline">Get the template associated with a template.</p>
83 <p class="toc_element">
84 <code><a href="#launch">launch(projectId, location, body, gcsPath=None, validateOnly=None, x__xgafv=None)</a></code></p>
85 <p class="firstline">Launch a template.</p>
86 <h3>Method Details</h3>
87 <div class="method">
88 <code class="details" id="create">create(projectId, location, body, x__xgafv=None)</code>
89 <pre>Creates a Cloud Dataflow job from a template.
90
91 Args:
92 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
93 location: string, The location to which to direct the request. (required)
94 body: object, The request body. (required)
95 The object takes the form of:
96
97 { # A request to create a Cloud Dataflow job from a template.
98 "environment": { # The environment values to set at runtime. # The runtime environment for the job.
99 "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
100 # template if not specified.
101 "zone": "A String", # The Compute Engine [availability
102 # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
103 # for launching worker instances to run your pipeline.
104 "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
105 # Use with caution.
106 "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
107 # Must be a valid Cloud Storage URL, beginning with `gs://`.
108 "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
109 "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
110 # available to your pipeline during execution, from 1 to 1000.
111 },
112 "gcsPath": "A String", # Required. A Cloud Storage path to the template from which to
113 # create the job.
114 # Must be a valid Cloud Storage URL, beginning with `gs://`.
115 "location": "A String", # The location to which to direct the request.
116 "parameters": { # The runtime parameters to pass to the job.
117 "a_key": "A String",
118 },
119 "jobName": "A String", # Required. The job name to use for the created job.
120 }
121
122 x__xgafv: string, V1 error format.
123 Allowed values
124 1 - v1 error format
125 2 - v2 error format
126
127 Returns:
128 An object of the form:
129
130 { # Defines a job to be run by the Cloud Dataflow service.
131 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
132 # If this field is set, the service will ensure its uniqueness.
133 # The request to create a job will fail if the service has knowledge of a
134 # previously submitted job with the same client's ID and job name.
135 # The caller may use this field to ensure idempotence of job
136 # creation across retried attempts to create a job.
137 # By default, the field is empty and, in that case, the service ignores it.
138 "requestedState": "A String", # The job's requested state.
139 #
140 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
141 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
142 # also be used to directly set a job's requested state to
143 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
144 # job if it has not already reached a terminal state.
145 "name": "A String", # The user-specified Cloud Dataflow job name.
146 #
147 # Only one Job with a given name may exist in a project at any
148 # given time. If a caller attempts to create a Job with the same
149 # name as an already-existing Job, the attempt returns the
150 # existing Job.
151 #
152 # The name must match the regular expression
153 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
154 "location": "A String", # The location that contains this job.
155 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
156 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
157 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
158 "currentState": "A String", # The current state of the job.
159 #
160 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
161 # specified.
162 #
163 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
164 # terminal state. After a job has reached a terminal state, no
165 # further state updates may be made.
166 #
167 # This field may be mutated by the Cloud Dataflow service;
168 # callers cannot mutate it.
169 "labels": { # User-defined labels for this job.
170 #
171 # The labels map can contain no more than 64 entries. Entries of the labels
172 # map are UTF8 strings that comply with the following restrictions:
173 #
174 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
175 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
176 # * Both keys and values are additionally constrained to be <= 128 bytes in
177 # size.
178 "a_key": "A String",
179 },
180 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
181 # corresponding name prefixes of the new job.
182 "a_key": "A String",
183 },
184 "id": "A String", # The unique ID of this job.
185 #
186 # This field is set by the Cloud Dataflow service when the Job is
187 # created, and is immutable for the life of the job.
188 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
189 "version": { # A structure describing which components and their versions of the service
190 # are required in order to run the job.
191 "a_key": "", # Properties of the object.
192 },
193 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
194 # storage. The system will append the suffix "/temp-{JOBNAME} to
195 # this resource prefix, where {JOBNAME} is the value of the
196 # job_name field. The resulting bucket and object prefix is used
197 # as the prefix of the resources used to store temporary data
198 # needed during the job execution. NOTE: This will override the
199 # value in taskrunner_settings.
200 # The supported resource type is:
201 #
202 # Google Cloud Storage:
203 #
204 # storage.googleapis.com/{bucket}/{object}
205 # bucket.storage.googleapis.com/{object}
206 "internalExperiments": { # Experimental settings.
207 "a_key": "", # Properties of the object. Contains field @type with type URL.
208 },
209 "dataset": "A String", # The dataset for the current project where various workflow
210 # related tables are stored.
211 #
212 # The supported resource type is:
213 #
214 # Google BigQuery:
215 # bigquery.googleapis.com/{dataset}
216 "experiments": [ # The list of experiments to enable.
217 "A String",
218 ],
219 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
220 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
221 # options are passed through the service and are used to recreate the
222 # SDK pipeline options on the worker in a language agnostic and platform
223 # independent way.
224 "a_key": "", # Properties of the object.
225 },
226 "userAgent": { # A description of the process that generated the request.
227 "a_key": "", # Properties of the object.
228 },
229 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
230 # unspecified, the service will attempt to choose a reasonable
231 # default. This should be in the form of the API service name,
232 # e.g. "compute.googleapis.com".
233 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
234 # specified in order for the job to have workers.
235 { # Describes one particular pool of Cloud Dataflow workers to be
236 # instantiated by the Cloud Dataflow service in order to perform the
237 # computations required by a job. Note that a workflow job may use
238 # multiple pools, in order to match the various computational
239 # requirements of the various stages of the job.
240 "diskSourceImage": "A String", # Fully qualified source image for disks.
241 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
242 # using the standard Dataflow task runner. Users should ignore
243 # this field.
244 "workflowFileName": "A String", # The file to store the workflow in.
245 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
246 # will not be uploaded.
247 #
248 # The supported resource type is:
249 #
250 # Google Cloud Storage:
251 # storage.googleapis.com/{bucket}/{object}
252 # bucket.storage.googleapis.com/{object}
253 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
254 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
255 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
256 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
257 # "shuffle/v1beta1".
258 "workerId": "A String", # The ID of the worker running this pipeline.
259 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
260 #
261 # When workers access Google Cloud APIs, they logically do so via
262 # relative URLs. If this field is specified, it supplies the base
263 # URL to use for resolving these relative URLs. The normative
264 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
265 # Locators".
266 #
267 # If not specified, the default value is "http://www.googleapis.com/"
268 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
269 # "dataflow/v1b3/projects".
270 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
271 # storage.
272 #
273 # The supported resource type is:
274 #
275 # Google Cloud Storage:
276 #
277 # storage.googleapis.com/{bucket}/{object}
278 # bucket.storage.googleapis.com/{object}
279 },
280 "vmId": "A String", # The ID string of the VM.
281 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
282 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
283 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
284 # access the Cloud Dataflow API.
285 "A String",
286 ],
287 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
288 # taskrunner; e.g. "root".
289 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
290 #
291 # When workers access Google Cloud APIs, they logically do so via
292 # relative URLs. If this field is specified, it supplies the base
293 # URL to use for resolving these relative URLs. The normative
294 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
295 # Locators".
296 #
297 # If not specified, the default value is "http://www.googleapis.com/"
298 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
299 # taskrunner; e.g. "wheel".
300 "languageHint": "A String", # The suggested backend language.
301 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
302 # console.
303 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
304 "logDir": "A String", # The directory on the VM to store logs.
305 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
306 "harnessCommand": "A String", # The command to launch the worker harness.
307 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
308 # temporary storage.
309 #
310 # The supported resource type is:
311 #
312 # Google Cloud Storage:
313 # storage.googleapis.com/{bucket}/{object}
314 # bucket.storage.googleapis.com/{object}
315 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
316 },
317 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
318 # are supported.
319 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
320 # service will attempt to choose a reasonable default.
321 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
322 # the service will use the network "default".
323 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
324 # will attempt to choose a reasonable default.
325 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
326 # attempt to choose a reasonable default.
327 "dataDisks": [ # Data disks that are used by a VM in this workflow.
328 { # Describes the data disk used by a workflow job.
329 "mountPoint": "A String", # Directory in a VM where disk is mounted.
330 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
331 # attempt to choose a reasonable default.
332 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
333 # must be a disk type appropriate to the project and zone in which
334 # the workers will run. If unknown or unspecified, the service
335 # will attempt to choose a reasonable default.
336 #
337 # For example, the standard persistent disk type is a resource name
338 # typically ending in "pd-standard". If SSD persistent disks are
339 # available, the resource name typically ends with "pd-ssd". The
340 # actual valid values are defined the Google Compute Engine API,
341 # not by the Cloud Dataflow API; consult the Google Compute Engine
342 # documentation for more information about determining the set of
343 # available disk types for a particular project and zone.
344 #
345 # Google Compute Engine Disk types are local to a particular
346 # project in a particular zone, and so the resource name will
347 # typically look something like this:
348 #
349 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
350 },
351 ],
352 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
353 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
354 # `TEARDOWN_NEVER`.
355 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
356 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
357 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
358 # down.
359 #
360 # If the workers are not torn down by the service, they will
361 # continue to run and use Google Compute Engine VM resources in the
362 # user's project until they are explicitly terminated by the user.
363 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
364 # policy except for small, manually supervised test jobs.
365 #
366 # If unknown or unspecified, the service will attempt to choose a reasonable
367 # default.
368 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
369 # Compute Engine API.
370 "ipConfiguration": "A String", # Configuration for VM IPs.
371 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
372 # service will choose a number of threads (according to the number of cores
373 # on the selected machine type for batch, or 1 by convention for streaming).
374 "poolArgs": { # Extra arguments for this worker pool.
375 "a_key": "", # Properties of the object. Contains field @type with type URL.
376 },
377 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
378 # execute the job. If zero or unspecified, the service will
379 # attempt to choose a reasonable default.
380 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
381 # harness, residing in Google Container Registry.
382 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
383 # the form "regions/REGION/subnetworks/SUBNETWORK".
384 "packages": [ # Packages to be installed on workers.
385 { # The packages that must be installed in order for a worker to run the
386 # steps of the Cloud Dataflow job that will be assigned to its worker
387 # pool.
388 #
389 # This is the mechanism by which the Cloud Dataflow SDK causes code to
390 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
391 # might use this to install jars containing the user's code and all of the
392 # various dependencies (libraries, data files, etc.) required in order
393 # for that code to run.
394 "location": "A String", # The resource to read the package from. The supported resource type is:
395 #
396 # Google Cloud Storage:
397 #
398 # storage.googleapis.com/{bucket}
399 # bucket.storage.googleapis.com/
400 "name": "A String", # The name of the package.
401 },
402 ],
403 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
404 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
405 "algorithm": "A String", # The algorithm to use for autoscaling.
406 },
407 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
408 # select a default set of packages which are useful to worker
409 # harnesses written in a particular language.
410 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
411 # attempt to choose a reasonable default.
412 "metadata": { # Metadata to set on the Google Compute Engine VMs.
413 "a_key": "A String",
414 },
415 },
416 ],
417 },
418 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
419 # A description of the user pipeline and stages through which it is executed.
420 # Created by Cloud Dataflow service. Only retrieved with
421 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
422 # form. This data is provided by the Dataflow service for ease of visualizing
423 # the pipeline and interpretting Dataflow provided metrics.
424 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
425 { # Description of the type, names/ids, and input/outputs for a transform.
426 "kind": "A String", # Type of transform.
427 "name": "A String", # User provided name for this transform instance.
428 "inputCollectionName": [ # User names for all collection inputs to this transform.
429 "A String",
430 ],
431 "displayData": [ # Transform-specific display data.
432 { # Data provided with a pipeline or transform to provide descriptive info.
433 "shortStrValue": "A String", # A possible additional shorter value to display.
434 # For example a java_class_name_value of com.mypackage.MyDoFn
435 # will be stored with MyDoFn as the short_str_value and
436 # com.mypackage.MyDoFn as the java_class_name value.
437 # short_str_value can be displayed and java_class_name_value
438 # will be displayed as a tooltip.
439 "durationValue": "A String", # Contains value if the data is of duration type.
440 "url": "A String", # An optional full URL.
441 "floatValue": 3.14, # Contains value if the data is of float type.
442 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
443 # language namespace (i.e. python module) which defines the display data.
444 # This allows a dax monitoring system to specially handle the data
445 # and perform custom rendering.
446 "javaClassValue": "A String", # Contains value if the data is of java class type.
447 "label": "A String", # An optional label to display in a dax UI for the element.
448 "boolValue": True or False, # Contains value if the data is of a boolean type.
449 "strValue": "A String", # Contains value if the data is of string type.
450 "key": "A String", # The key identifying the display data.
451 # This is intended to be used as a label for the display data
452 # when viewed in a dax monitoring system.
453 "int64Value": "A String", # Contains value if the data is of int64 type.
454 "timestampValue": "A String", # Contains value if the data is of timestamp type.
455 },
456 ],
457 "outputCollectionName": [ # User names for all collection outputs to this transform.
458 "A String",
459 ],
460 "id": "A String", # SDK generated id of this transform instance.
461 },
462 ],
463 "displayData": [ # Pipeline level display data.
464 { # Data provided with a pipeline or transform to provide descriptive info.
465 "shortStrValue": "A String", # A possible additional shorter value to display.
466 # For example a java_class_name_value of com.mypackage.MyDoFn
467 # will be stored with MyDoFn as the short_str_value and
468 # com.mypackage.MyDoFn as the java_class_name value.
469 # short_str_value can be displayed and java_class_name_value
470 # will be displayed as a tooltip.
471 "durationValue": "A String", # Contains value if the data is of duration type.
472 "url": "A String", # An optional full URL.
473 "floatValue": 3.14, # Contains value if the data is of float type.
474 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
475 # language namespace (i.e. python module) which defines the display data.
476 # This allows a dax monitoring system to specially handle the data
477 # and perform custom rendering.
478 "javaClassValue": "A String", # Contains value if the data is of java class type.
479 "label": "A String", # An optional label to display in a dax UI for the element.
480 "boolValue": True or False, # Contains value if the data is of a boolean type.
481 "strValue": "A String", # Contains value if the data is of string type.
482 "key": "A String", # The key identifying the display data.
483 # This is intended to be used as a label for the display data
484 # when viewed in a dax monitoring system.
485 "int64Value": "A String", # Contains value if the data is of int64 type.
486 "timestampValue": "A String", # Contains value if the data is of timestamp type.
487 },
488 ],
489 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
490 { # Description of the composing transforms, names/ids, and input/outputs of a
491 # stage of execution. Some composing transforms and sources may have been
492 # generated by the Dataflow service during execution planning.
493 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
494 { # Description of an interstitial value between transforms in an execution
495 # stage.
496 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
497 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
498 # source is most closely associated.
499 "name": "A String", # Dataflow service generated name for this source.
500 },
501 ],
502 "kind": "A String", # Type of tranform this stage is executing.
503 "name": "A String", # Dataflow service generated name for this stage.
504 "outputSource": [ # Output sources for this stage.
505 { # Description of an input or output of an execution stage.
506 "userName": "A String", # Human-readable name for this source; may be user or system generated.
507 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
508 # source is most closely associated.
509 "name": "A String", # Dataflow service generated name for this source.
510 "sizeBytes": "A String", # Size of the source, if measurable.
511 },
512 ],
513 "inputSource": [ # Input sources for this stage.
514 { # Description of an input or output of an execution stage.
515 "userName": "A String", # Human-readable name for this source; may be user or system generated.
516 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
517 # source is most closely associated.
518 "name": "A String", # Dataflow service generated name for this source.
519 "sizeBytes": "A String", # Size of the source, if measurable.
520 },
521 ],
522 "componentTransform": [ # Transforms that comprise this execution stage.
523 { # Description of a transform executed as part of an execution stage.
524 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
525 "originalTransform": "A String", # User name for the original user transform with which this transform is
526 # most closely associated.
527 "name": "A String", # Dataflow service generated name for this source.
528 },
529 ],
530 "id": "A String", # Dataflow service generated id for this stage.
531 },
532 ],
533 },
534 "steps": [ # The top-level steps that constitute the entire job.
535 { # Defines a particular step within a Cloud Dataflow job.
536 #
537 # A job consists of multiple steps, each of which performs some
538 # specific operation as part of the overall job. Data is typically
539 # passed from one step to another as part of the job.
540 #
541 # Here's an example of a sequence of steps which together implement a
542 # Map-Reduce job:
543 #
544 # * Read a collection of data from some source, parsing the
545 # collection's elements.
546 #
547 # * Validate the elements.
548 #
549 # * Apply a user-defined function to map each element to some value
550 # and extract an element-specific key value.
551 #
552 # * Group elements with the same key into a single element with
553 # that key, transforming a multiply-keyed collection into a
554 # uniquely-keyed collection.
555 #
556 # * Write the elements out to some data sink.
557 #
558 # Note that the Cloud Dataflow service may be used to run many different
559 # types of jobs, not just Map-Reduce.
560 "kind": "A String", # The kind of step in the Cloud Dataflow job.
561 "properties": { # Named properties associated with the step. Each kind of
562 # predefined step has its own required set of properties.
563 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
564 "a_key": "", # Properties of the object.
565 },
566 "name": "A String", # The name that identifies the step. This must be unique for each
567 # step with respect to all other steps in the Cloud Dataflow job.
568 },
569 ],
570 "currentStateTime": "A String", # The timestamp associated with the current state.
571 "tempFiles": [ # A set of files the system should be aware of that are used
572 # for temporary storage. These temporary files will be
573 # removed on job completion.
574 # No duplicates are allowed.
575 # No file patterns are supported.
576 #
577 # The supported files are:
578 #
579 # Google Cloud Storage:
580 #
581 # storage.googleapis.com/{bucket}/{object}
582 # bucket.storage.googleapis.com/{object}
583 "A String",
584 ],
585 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
586 # callers cannot mutate it.
587 { # A message describing the state of a particular execution stage.
588 "executionStageName": "A String", # The name of the execution stage.
589 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
590 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
591 },
592 ],
593 "type": "A String", # The type of Cloud Dataflow job.
594 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
595 # Cloud Dataflow service.
596 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
597 # of the job it replaced.
598 #
599 # When sending a `CreateJobRequest`, you can update a job by specifying it
600 # here. The job named here is stopped, and its intermediate state is
601 # transferred to this job.
602 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
603 # isn't contained in the submitted job.
604 "stages": { # A mapping from each stage to the information about that stage.
605 "a_key": { # Contains information about how a particular
606 # google.dataflow.v1beta3.Step will be executed.
607 "stepName": [ # The steps associated with the execution stage.
608 # Note that stages may have several steps, and that a given step
609 # might be run by more than one stage.
610 "A String",
611 ],
612 },
613 },
614 },
615 }</pre>
616 </div>
617
618 <div class="method">
619 <code class="details" id="get">get(projectId, location, gcsPath=None, x__xgafv=None, view=None)</code>
620 <pre>Get the template associated with a template.
621
622 Args:
623 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
624 location: string, The location to which to direct the request. (required)
625 gcsPath: string, Required. A Cloud Storage path to the template from which to
626 create the job.
627 Must be a valid Cloud Storage URL, beginning with `gs://`.
628 x__xgafv: string, V1 error format.
629 Allowed values
630 1 - v1 error format
631 2 - v2 error format
632 view: string, The view to retrieve. Defaults to METADATA_ONLY.
633
634 Returns:
635 An object of the form:
636
637 { # The response to a GetTemplate request.
638 "status": { # The `Status` type defines a logical error model that is suitable for different # The status of the get template request. Any problems with the
639 # request will be indicated in the error_details.
640 # programming environments, including REST APIs and RPC APIs. It is used by
641 # [gRPC](https://github.com/grpc). The error model is designed to be:
642 #
643 # - Simple to use and understand for most users
644 # - Flexible enough to meet unexpected needs
645 #
646 # # Overview
647 #
648 # The `Status` message contains three pieces of data: error code, error message,
649 # and error details. The error code should be an enum value of
650 # google.rpc.Code, but it may accept additional error codes if needed. The
651 # error message should be a developer-facing English message that helps
652 # developers *understand* and *resolve* the error. If a localized user-facing
653 # error message is needed, put the localized message in the error details or
654 # localize it in the client. The optional error details may contain arbitrary
655 # information about the error. There is a predefined set of error detail types
656 # in the package `google.rpc` that can be used for common error conditions.
657 #
658 # # Language mapping
659 #
660 # The `Status` message is the logical representation of the error model, but it
661 # is not necessarily the actual wire format. When the `Status` message is
662 # exposed in different client libraries and different wire protocols, it can be
663 # mapped differently. For example, it will likely be mapped to some exceptions
664 # in Java, but more likely mapped to some error codes in C.
665 #
666 # # Other uses
667 #
668 # The error model and the `Status` message can be used in a variety of
669 # environments, either with or without APIs, to provide a
670 # consistent developer experience across different environments.
671 #
672 # Example uses of this error model include:
673 #
674 # - Partial errors. If a service needs to return partial errors to the client,
675 # it may embed the `Status` in the normal response to indicate the partial
676 # errors.
677 #
678 # - Workflow errors. A typical workflow has multiple steps. Each step may
679 # have a `Status` message for error reporting.
680 #
681 # - Batch operations. If a client uses batch request and batch response, the
682 # `Status` message should be used directly inside batch response, one for
683 # each error sub-response.
684 #
685 # - Asynchronous operations. If an API call embeds asynchronous operation
686 # results in its response, the status of those operations should be
687 # represented directly using the `Status` message.
688 #
689 # - Logging. If some API errors are stored in logs, the message `Status` could
690 # be used directly after any stripping needed for security/privacy reasons.
691 "message": "A String", # A developer-facing error message, which should be in English. Any
692 # user-facing error message should be localized and sent in the
693 # google.rpc.Status.details field, or localized by the client.
694 "code": 42, # The status code, which should be an enum value of google.rpc.Code.
695 "details": [ # A list of messages that carry the error details. There will be a
696 # common set of message types for APIs to use.
697 {
698 "a_key": "", # Properties of the object. Contains field @type with type URL.
699 },
700 ],
701 },
702 "metadata": { # Metadata describing a template. # The template metadata describing the template name, available
703 # parameters, etc.
704 "name": "A String", # Required. The name of the template.
705 "parameters": [ # The parameters for the template.
706 { # Metadata for a specific parameter.
707 "regexes": [ # Optional. Regexes that the parameter must match.
708 "A String",
709 ],
710 "helpText": "A String", # Required. The help text to display for the parameter.
711 "name": "A String", # Required. The name of the parameter.
712 "isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.
713 "label": "A String", # Required. The label to display for the parameter.
714 },
715 ],
716 "description": "A String", # Optional. A description of the template.
717 },
718 }</pre>
719 </div>
720
721 <div class="method">
722 <code class="details" id="launch">launch(projectId, location, body, gcsPath=None, validateOnly=None, x__xgafv=None)</code>
723 <pre>Launch a template.
724
725 Args:
726 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
727 location: string, The location to which to direct the request. (required)
728 body: object, The request body. (required)
729 The object takes the form of:
730
731 { # Parameters to provide to the template being launched.
732 "environment": { # The environment values to set at runtime. # The runtime environment for the job.
733 "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
734 # template if not specified.
735 "zone": "A String", # The Compute Engine [availability
736 # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
737 # for launching worker instances to run your pipeline.
738 "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
739 # Use with caution.
740 "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
741 # Must be a valid Cloud Storage URL, beginning with `gs://`.
742 "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
743 "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
744 # available to your pipeline during execution, from 1 to 1000.
745 },
746 "parameters": { # The runtime parameters to pass to the job.
747 "a_key": "A String",
748 },
749 "jobName": "A String", # Required. The job name to use for the created job.
750 }
751
752 gcsPath: string, Required. A Cloud Storage path to the template from which to create
753 the job.
754 Must be valid Cloud Storage URL, beginning with 'gs://'.
755 validateOnly: boolean, If true, the request is validated but not actually executed.
756 Defaults to false.
757 x__xgafv: string, V1 error format.
758 Allowed values
759 1 - v1 error format
760 2 - v2 error format
761
762 Returns:
763 An object of the form:
764
765 { # Response to the request to launch a template.
766 "job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and
767 # the job was successfully launched.
768 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
769 # If this field is set, the service will ensure its uniqueness.
770 # The request to create a job will fail if the service has knowledge of a
771 # previously submitted job with the same client's ID and job name.
772 # The caller may use this field to ensure idempotence of job
773 # creation across retried attempts to create a job.
774 # By default, the field is empty and, in that case, the service ignores it.
775 "requestedState": "A String", # The job's requested state.
776 #
777 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
778 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
779 # also be used to directly set a job's requested state to
780 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
781 # job if it has not already reached a terminal state.
782 "name": "A String", # The user-specified Cloud Dataflow job name.
783 #
784 # Only one Job with a given name may exist in a project at any
785 # given time. If a caller attempts to create a Job with the same
786 # name as an already-existing Job, the attempt returns the
787 # existing Job.
788 #
789 # The name must match the regular expression
790 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
791 "location": "A String", # The location that contains this job.
792 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
793 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
794 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
795 "currentState": "A String", # The current state of the job.
796 #
797 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
798 # specified.
799 #
800 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
801 # terminal state. After a job has reached a terminal state, no
802 # further state updates may be made.
803 #
804 # This field may be mutated by the Cloud Dataflow service;
805 # callers cannot mutate it.
806 "labels": { # User-defined labels for this job.
807 #
808 # The labels map can contain no more than 64 entries. Entries of the labels
809 # map are UTF8 strings that comply with the following restrictions:
810 #
811 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
812 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
813 # * Both keys and values are additionally constrained to be <= 128 bytes in
814 # size.
815 "a_key": "A String",
816 },
817 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
818 # corresponding name prefixes of the new job.
819 "a_key": "A String",
820 },
821 "id": "A String", # The unique ID of this job.
822 #
823 # This field is set by the Cloud Dataflow service when the Job is
824 # created, and is immutable for the life of the job.
825 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
826 "version": { # A structure describing which components and their versions of the service
827 # are required in order to run the job.
828 "a_key": "", # Properties of the object.
829 },
830 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
831 # storage. The system will append the suffix "/temp-{JOBNAME} to
832 # this resource prefix, where {JOBNAME} is the value of the
833 # job_name field. The resulting bucket and object prefix is used
834 # as the prefix of the resources used to store temporary data
835 # needed during the job execution. NOTE: This will override the
836 # value in taskrunner_settings.
837 # The supported resource type is:
838 #
839 # Google Cloud Storage:
840 #
841 # storage.googleapis.com/{bucket}/{object}
842 # bucket.storage.googleapis.com/{object}
843 "internalExperiments": { # Experimental settings.
844 "a_key": "", # Properties of the object. Contains field @type with type URL.
845 },
846 "dataset": "A String", # The dataset for the current project where various workflow
847 # related tables are stored.
848 #
849 # The supported resource type is:
850 #
851 # Google BigQuery:
852 # bigquery.googleapis.com/{dataset}
853 "experiments": [ # The list of experiments to enable.
854 "A String",
855 ],
856 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
857 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
858 # options are passed through the service and are used to recreate the
859 # SDK pipeline options on the worker in a language agnostic and platform
860 # independent way.
861 "a_key": "", # Properties of the object.
862 },
863 "userAgent": { # A description of the process that generated the request.
864 "a_key": "", # Properties of the object.
865 },
866 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
867 # unspecified, the service will attempt to choose a reasonable
868 # default. This should be in the form of the API service name,
869 # e.g. "compute.googleapis.com".
870 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
871 # specified in order for the job to have workers.
872 { # Describes one particular pool of Cloud Dataflow workers to be
873 # instantiated by the Cloud Dataflow service in order to perform the
874 # computations required by a job. Note that a workflow job may use
875 # multiple pools, in order to match the various computational
876 # requirements of the various stages of the job.
877 "diskSourceImage": "A String", # Fully qualified source image for disks.
878 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
879 # using the standard Dataflow task runner. Users should ignore
880 # this field.
881 "workflowFileName": "A String", # The file to store the workflow in.
882 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
883 # will not be uploaded.
884 #
885 # The supported resource type is:
886 #
887 # Google Cloud Storage:
888 # storage.googleapis.com/{bucket}/{object}
889 # bucket.storage.googleapis.com/{object}
890 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
891 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
892 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
893 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
894 # "shuffle/v1beta1".
895 "workerId": "A String", # The ID of the worker running this pipeline.
896 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
897 #
898 # When workers access Google Cloud APIs, they logically do so via
899 # relative URLs. If this field is specified, it supplies the base
900 # URL to use for resolving these relative URLs. The normative
901 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
902 # Locators".
903 #
904 # If not specified, the default value is "http://www.googleapis.com/"
905 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
906 # "dataflow/v1b3/projects".
907 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
908 # storage.
909 #
910 # The supported resource type is:
911 #
912 # Google Cloud Storage:
913 #
914 # storage.googleapis.com/{bucket}/{object}
915 # bucket.storage.googleapis.com/{object}
916 },
917 "vmId": "A String", # The ID string of the VM.
918 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
919 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
920 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
921 # access the Cloud Dataflow API.
922 "A String",
923 ],
924 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
925 # taskrunner; e.g. "root".
926 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
927 #
928 # When workers access Google Cloud APIs, they logically do so via
929 # relative URLs. If this field is specified, it supplies the base
930 # URL to use for resolving these relative URLs. The normative
931 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
932 # Locators".
933 #
934 # If not specified, the default value is "http://www.googleapis.com/"
935 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
936 # taskrunner; e.g. "wheel".
937 "languageHint": "A String", # The suggested backend language.
938 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
939 # console.
940 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
941 "logDir": "A String", # The directory on the VM to store logs.
942 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
943 "harnessCommand": "A String", # The command to launch the worker harness.
944 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
945 # temporary storage.
946 #
947 # The supported resource type is:
948 #
949 # Google Cloud Storage:
950 # storage.googleapis.com/{bucket}/{object}
951 # bucket.storage.googleapis.com/{object}
952 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
953 },
954 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
955 # are supported.
956 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
957 # service will attempt to choose a reasonable default.
958 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
959 # the service will use the network "default".
960 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
961 # will attempt to choose a reasonable default.
962 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
963 # attempt to choose a reasonable default.
964 "dataDisks": [ # Data disks that are used by a VM in this workflow.
965 { # Describes the data disk used by a workflow job.
966 "mountPoint": "A String", # Directory in a VM where disk is mounted.
967 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
968 # attempt to choose a reasonable default.
969 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
970 # must be a disk type appropriate to the project and zone in which
971 # the workers will run. If unknown or unspecified, the service
972 # will attempt to choose a reasonable default.
973 #
974 # For example, the standard persistent disk type is a resource name
975 # typically ending in "pd-standard". If SSD persistent disks are
976 # available, the resource name typically ends with "pd-ssd". The
977 # actual valid values are defined the Google Compute Engine API,
978 # not by the Cloud Dataflow API; consult the Google Compute Engine
979 # documentation for more information about determining the set of
980 # available disk types for a particular project and zone.
981 #
982 # Google Compute Engine Disk types are local to a particular
983 # project in a particular zone, and so the resource name will
984 # typically look something like this:
985 #
986 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
987 },
988 ],
989 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
990 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
991 # `TEARDOWN_NEVER`.
992 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
993 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
994 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
995 # down.
996 #
997 # If the workers are not torn down by the service, they will
998 # continue to run and use Google Compute Engine VM resources in the
999 # user's project until they are explicitly terminated by the user.
1000 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1001 # policy except for small, manually supervised test jobs.
1002 #
1003 # If unknown or unspecified, the service will attempt to choose a reasonable
1004 # default.
1005 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1006 # Compute Engine API.
1007 "ipConfiguration": "A String", # Configuration for VM IPs.
1008 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1009 # service will choose a number of threads (according to the number of cores
1010 # on the selected machine type for batch, or 1 by convention for streaming).
1011 "poolArgs": { # Extra arguments for this worker pool.
1012 "a_key": "", # Properties of the object. Contains field @type with type URL.
1013 },
1014 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1015 # execute the job. If zero or unspecified, the service will
1016 # attempt to choose a reasonable default.
1017 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1018 # harness, residing in Google Container Registry.
1019 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
1020 # the form "regions/REGION/subnetworks/SUBNETWORK".
1021 "packages": [ # Packages to be installed on workers.
1022 { # The packages that must be installed in order for a worker to run the
1023 # steps of the Cloud Dataflow job that will be assigned to its worker
1024 # pool.
1025 #
1026 # This is the mechanism by which the Cloud Dataflow SDK causes code to
1027 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1028 # might use this to install jars containing the user's code and all of the
1029 # various dependencies (libraries, data files, etc.) required in order
1030 # for that code to run.
1031 "location": "A String", # The resource to read the package from. The supported resource type is:
1032 #
1033 # Google Cloud Storage:
1034 #
1035 # storage.googleapis.com/{bucket}
1036 # bucket.storage.googleapis.com/
1037 "name": "A String", # The name of the package.
1038 },
1039 ],
1040 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1041 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1042 "algorithm": "A String", # The algorithm to use for autoscaling.
1043 },
1044 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
1045 # select a default set of packages which are useful to worker
1046 # harnesses written in a particular language.
1047 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
1048 # attempt to choose a reasonable default.
1049 "metadata": { # Metadata to set on the Google Compute Engine VMs.
1050 "a_key": "A String",
1051 },
1052 },
1053 ],
1054 },
1055 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1056 # A description of the user pipeline and stages through which it is executed.
1057 # Created by Cloud Dataflow service. Only retrieved with
1058 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1059 # form. This data is provided by the Dataflow service for ease of visualizing
1060 # the pipeline and interpretting Dataflow provided metrics.
1061 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1062 { # Description of the type, names/ids, and input/outputs for a transform.
1063 "kind": "A String", # Type of transform.
1064 "name": "A String", # User provided name for this transform instance.
1065 "inputCollectionName": [ # User names for all collection inputs to this transform.
1066 "A String",
1067 ],
1068 "displayData": [ # Transform-specific display data.
1069 { # Data provided with a pipeline or transform to provide descriptive info.
1070 "shortStrValue": "A String", # A possible additional shorter value to display.
1071 # For example a java_class_name_value of com.mypackage.MyDoFn
1072 # will be stored with MyDoFn as the short_str_value and
1073 # com.mypackage.MyDoFn as the java_class_name value.
1074 # short_str_value can be displayed and java_class_name_value
1075 # will be displayed as a tooltip.
1076 "durationValue": "A String", # Contains value if the data is of duration type.
1077 "url": "A String", # An optional full URL.
1078 "floatValue": 3.14, # Contains value if the data is of float type.
1079 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1080 # language namespace (i.e. python module) which defines the display data.
1081 # This allows a dax monitoring system to specially handle the data
1082 # and perform custom rendering.
1083 "javaClassValue": "A String", # Contains value if the data is of java class type.
1084 "label": "A String", # An optional label to display in a dax UI for the element.
1085 "boolValue": True or False, # Contains value if the data is of a boolean type.
1086 "strValue": "A String", # Contains value if the data is of string type.
1087 "key": "A String", # The key identifying the display data.
1088 # This is intended to be used as a label for the display data
1089 # when viewed in a dax monitoring system.
1090 "int64Value": "A String", # Contains value if the data is of int64 type.
1091 "timestampValue": "A String", # Contains value if the data is of timestamp type.
1092 },
1093 ],
1094 "outputCollectionName": [ # User names for all collection outputs to this transform.
1095 "A String",
1096 ],
1097 "id": "A String", # SDK generated id of this transform instance.
1098 },
1099 ],
1100 "displayData": [ # Pipeline level display data.
1101 { # Data provided with a pipeline or transform to provide descriptive info.
1102 "shortStrValue": "A String", # A possible additional shorter value to display.
1103 # For example a java_class_name_value of com.mypackage.MyDoFn
1104 # will be stored with MyDoFn as the short_str_value and
1105 # com.mypackage.MyDoFn as the java_class_name value.
1106 # short_str_value can be displayed and java_class_name_value
1107 # will be displayed as a tooltip.
1108 "durationValue": "A String", # Contains value if the data is of duration type.
1109 "url": "A String", # An optional full URL.
1110 "floatValue": 3.14, # Contains value if the data is of float type.
1111 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1112 # language namespace (i.e. python module) which defines the display data.
1113 # This allows a dax monitoring system to specially handle the data
1114 # and perform custom rendering.
1115 "javaClassValue": "A String", # Contains value if the data is of java class type.
1116 "label": "A String", # An optional label to display in a dax UI for the element.
1117 "boolValue": True or False, # Contains value if the data is of a boolean type.
1118 "strValue": "A String", # Contains value if the data is of string type.
1119 "key": "A String", # The key identifying the display data.
1120 # This is intended to be used as a label for the display data
1121 # when viewed in a dax monitoring system.
1122 "int64Value": "A String", # Contains value if the data is of int64 type.
1123 "timestampValue": "A String", # Contains value if the data is of timestamp type.
1124 },
1125 ],
1126 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
1127 { # Description of the composing transforms, names/ids, and input/outputs of a
1128 # stage of execution. Some composing transforms and sources may have been
1129 # generated by the Dataflow service during execution planning.
1130 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
1131 { # Description of an interstitial value between transforms in an execution
1132 # stage.
1133 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1134 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1135 # source is most closely associated.
1136 "name": "A String", # Dataflow service generated name for this source.
1137 },
1138 ],
1139 "kind": "A String", # Type of tranform this stage is executing.
1140 "name": "A String", # Dataflow service generated name for this stage.
1141 "outputSource": [ # Output sources for this stage.
1142 { # Description of an input or output of an execution stage.
1143 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1144 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1145 # source is most closely associated.
1146 "name": "A String", # Dataflow service generated name for this source.
1147 "sizeBytes": "A String", # Size of the source, if measurable.
1148 },
1149 ],
1150 "inputSource": [ # Input sources for this stage.
1151 { # Description of an input or output of an execution stage.
1152 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1153 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1154 # source is most closely associated.
1155 "name": "A String", # Dataflow service generated name for this source.
1156 "sizeBytes": "A String", # Size of the source, if measurable.
1157 },
1158 ],
1159 "componentTransform": [ # Transforms that comprise this execution stage.
1160 { # Description of a transform executed as part of an execution stage.
1161 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1162 "originalTransform": "A String", # User name for the original user transform with which this transform is
1163 # most closely associated.
1164 "name": "A String", # Dataflow service generated name for this source.
1165 },
1166 ],
1167 "id": "A String", # Dataflow service generated id for this stage.
1168 },
1169 ],
1170 },
1171 "steps": [ # The top-level steps that constitute the entire job.
1172 { # Defines a particular step within a Cloud Dataflow job.
1173 #
1174 # A job consists of multiple steps, each of which performs some
1175 # specific operation as part of the overall job. Data is typically
1176 # passed from one step to another as part of the job.
1177 #
1178 # Here's an example of a sequence of steps which together implement a
1179 # Map-Reduce job:
1180 #
1181 # * Read a collection of data from some source, parsing the
1182 # collection's elements.
1183 #
1184 # * Validate the elements.
1185 #
1186 # * Apply a user-defined function to map each element to some value
1187 # and extract an element-specific key value.
1188 #
1189 # * Group elements with the same key into a single element with
1190 # that key, transforming a multiply-keyed collection into a
1191 # uniquely-keyed collection.
1192 #
1193 # * Write the elements out to some data sink.
1194 #
1195 # Note that the Cloud Dataflow service may be used to run many different
1196 # types of jobs, not just Map-Reduce.
1197 "kind": "A String", # The kind of step in the Cloud Dataflow job.
1198 "properties": { # Named properties associated with the step. Each kind of
1199 # predefined step has its own required set of properties.
1200 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
1201 "a_key": "", # Properties of the object.
1202 },
1203 "name": "A String", # The name that identifies the step. This must be unique for each
1204 # step with respect to all other steps in the Cloud Dataflow job.
1205 },
1206 ],
1207 "currentStateTime": "A String", # The timestamp associated with the current state.
1208 "tempFiles": [ # A set of files the system should be aware of that are used
1209 # for temporary storage. These temporary files will be
1210 # removed on job completion.
1211 # No duplicates are allowed.
1212 # No file patterns are supported.
1213 #
1214 # The supported files are:
1215 #
1216 # Google Cloud Storage:
1217 #
1218 # storage.googleapis.com/{bucket}/{object}
1219 # bucket.storage.googleapis.com/{object}
1220 "A String",
1221 ],
1222 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1223 # callers cannot mutate it.
1224 { # A message describing the state of a particular execution stage.
1225 "executionStageName": "A String", # The name of the execution stage.
1226 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1227 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1228 },
1229 ],
1230 "type": "A String", # The type of Cloud Dataflow job.
1231 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1232 # Cloud Dataflow service.
1233 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1234 # of the job it replaced.
1235 #
1236 # When sending a `CreateJobRequest`, you can update a job by specifying it
1237 # here. The job named here is stopped, and its intermediate state is
1238 # transferred to this job.
1239 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1240 # isn't contained in the submitted job.
1241 "stages": { # A mapping from each stage to the information about that stage.
1242 "a_key": { # Contains information about how a particular
1243 # google.dataflow.v1beta3.Step will be executed.
1244 "stepName": [ # The steps associated with the execution stage.
1245 # Note that stages may have several steps, and that a given step
1246 # might be run by more than one stage.
1247 "A String",
1248 ],
1249 },
1250 },
1251 },
1252 },
1253 }
1254