0%

How TensorFlow Python APIs are generated

TensorFlow Python APIs are automatically generated by Pybind11 and some utility scripts. This blog introduces how these Python APIs are generated.

1. bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package

This command builds TensorFlow Python packages, including automatically generating Python APIs. The corresponding BUILD file is located at tensorflow/tools/pip_package/BUILD as below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
sh_binary(
name = "build_pip_package",
srcs = ["build_pip_package.sh"],
data = COMMON_PIP_DEPS +
select({
"//tensorflow:windows": [
":simple_console_for_windows",
],
"//conditions:default": [
":simple_console",
],
}) +
select({
"//tensorflow:dynamic_loaded_kernels": DYNAMIC_LOADED_KERNELS,
"//conditions:default": [],
}) + if_mkl_ml(["//third_party/mkl:intel_binary_blob"]),
)

Where, COMMON_PIP_DEPS contains all the Python dependencies.

COMMON_PIP_DEPS looks like below

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
COMMON_PIP_DEPS = [
":licenses",
"MANIFEST.in",
"README",
"setup.py",
":included_headers",
":xla_compiled_cpu_runtime_srcs.txt_file",
":xla_cmake",
"//tensorflow:tensorflow_py",
"//tensorflow/compiler/tf2xla:xla_compiled_cpu_runtime_hdrs",
...
"//tensorflow/python/data/service:server_lib",
"//tensorflow/python:distributed_framework_test_lib",
"//tensorflow/python/distribute:distribute_test_lib_pip",
"//tensorflow/python:loss_scale",
"//tensorflow/python:loss_scale_optimizer",
"//tensorflow/python:meta_graph_testdata",
"//tensorflow/python:util_example_parser_configuration",
"//tensorflow/python/data/benchmarks:benchmark_base",
...
"//tensorflow/python/data/experimental/ops:testing",
"//tensorflow/python/data/kernel_tests:test_base",
"//tensorflow/python/debug:debug_pip",
"//tensorflow/python/distribute:combinations",
"//tensorflow/python/distribute:multi_process_runner",
"//tensorflow/python/eager:eager_pip",
"//tensorflow/python/keras:combinations",
"//tensorflow/python/keras/layers/preprocessing:preprocessing_test_utils",
"//tensorflow/python/keras/distribute:distribute_strategy_test_lib",
"//tensorflow/python/keras/distribute:multi_worker_testing_utils",
"//tensorflow/python/keras/mixed_precision/experimental:test_util",
...
"//tensorflow/tools/docs:py_guide_parser",
]

Where, //tensorflow:tensorflow_py contains the Python low level APIs.

2. //tensorflow:tensorflow_py

It is located at tensorflow/BUILD and defined as below

1
2
3
4
5
6
7
8
9
10
11
12
py_library(
name = "tensorflow_py",
srcs_version = "PY2AND3",
visibility = ["//visibility:public"],
deps = select({
"api_version_2": [],
"//conditions:default": [],
}) + [
":tensorflow_py_no_contrib",
"//tensorflow/python/estimator:estimator_py",
],
)

Where, :tensorflow_py_no_contrib is defined as below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
py_library(
name = "tensorflow_py_no_contrib",
srcs = select({
"api_version_2": [":tf_python_api_gen_v2"],
"//conditions:default": [":tf_python_api_gen_v1"],
}) + [
":root_init_gen",
"//tensorflow/python/keras/api:keras_python_api_gen",
"//tensorflow/python/keras/api:keras_python_api_gen_compat_v1",
"//tensorflow/python/keras/api:keras_python_api_gen_compat_v2",
],
srcs_version = "PY2AND3",
visibility = ["//visibility:public"],
deps = ["//tensorflow/python:no_contrib"],
)

It contains the dependency //tensorflow/python:no_contrib.

3. //tensorflow/python:no_contrib

It is defined at tensorflow/python/BUILD as below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
py_library(
name = "no_contrib",
srcs = ["__init__.py"],
srcs_version = "PY2AND3",
visibility = [
"//tensorflow:__pkg__",
"//tensorflow/python/estimator:__subpackages__",
"//tensorflow/python/keras:__subpackages__",
"//tensorflow/python/tools:__pkg__",
"//tensorflow/python/tools/api/generator:__pkg__",
"//tensorflow/tools/api/tests:__pkg__",
"//tensorflow/tools/compatibility/update:__pkg__",
"//third_party/py/tensorflow_core:__subpackages__",
],
deps = [
":_pywrap_checkpoint_reader",
":_pywrap_events_writer",
":_pywrap_kernel_registry",
":_pywrap_py_exception_registry",
":_pywrap_python_op_gen",
":_pywrap_quantize_training",
":_pywrap_stacktrace_handler",
":_pywrap_stat_summarizer",
":_pywrap_tfprof",
":_pywrap_transform_graph",
":_pywrap_util_port",
":_pywrap_utils",
":array_ops",
":audio_ops_gen",
":bitwise_ops",
":boosted_trees_ops",
":check_ops",
":client",
":client_testlib",
":clustering_ops",
":collective_ops",
":cond_v2",
":config",
":confusion_matrix",
":control_flow_ops",
":cudnn_rnn_ops_gen",
":errors",
":framework",
":framework_combinations",
":framework_for_generated_wrappers",
":functional_ops",
":gradient_checker",
":gradient_checker_v2",
":graph_util",
":histogram_ops",
":image_ops",
":initializers_ns",
":io_ops",
":keras_lib",
":kernels",
":lib",
":list_ops",
":manip_ops",
":map_fn",
":math_ops",
":metrics",
":nccl_ops",
":nn",
":ops",
":platform",
":proto_ops",
":pywrap_tensorflow",
":pywrap_tf_session",
":pywrap_tfe",
":rnn_ops_gen",
":saver_test_utils",
":script_ops",
":sendrecv_ops_gen",
":session_ops",
":sets",
":sparse_ops",
":standard_ops",
":state_ops",
":string_ops",
":subscribe",
":summary",
":tensor_array_ops",
":tensor_forest_ops",
":test_ops", # TODO: Break testing code out into separate rule.
":tf_cluster",
":tf_item",
":tf_optimizer",
":training",
":util",
":weights_broadcast_ops",
":while_v2",
"//tensorflow/core:protos_all_py",
"//tensorflow/lite/python:lite",
"//tensorflow/python/compat",
"//tensorflow/python/compat:v2_compat",
"//tensorflow/python/compiler",
"//tensorflow/python/data",
"//tensorflow/python/debug:debug_py",
"//tensorflow/python/distribute",
"//tensorflow/python/distribute:combinations",
"//tensorflow/python/distribute:distribute_config",
"//tensorflow/python/distribute:estimator_training",
"//tensorflow/python/distribute:multi_worker_test_base",
"//tensorflow/python/distribute:strategy_combinations",
"//tensorflow/python/dlpack",
"//tensorflow/python/eager:def_function",
"//tensorflow/python/eager:monitoring",
"//tensorflow/python/eager:profiler",
"//tensorflow/python/eager:profiler_client",
"//tensorflow/python/eager:remote",
"//tensorflow/python/module",
"//tensorflow/python/ops/distributions",
"//tensorflow/python/ops/linalg",
"//tensorflow/python/ops/linalg/sparse",
"//tensorflow/python/ops/losses",
"//tensorflow/python/ops/parallel_for",
"//tensorflow/python/ops/ragged",
"//tensorflow/python/ops/signal",
"//tensorflow/python/profiler",
"//tensorflow/python/profiler:profiler_client",
"//tensorflow/python/profiler:profiler_v2",
"//tensorflow/python/profiler:trace",
"//tensorflow/python/saved_model",
"//tensorflow/python/tools:module_util",
"//tensorflow/python/tools/api/generator:create_python_api",
"//tensorflow/python/tpu:tpu_noestimator",
"//third_party/py/numpy",
],
)

Where, _pywrap_python_op_gen and :pywrap_tensorflow are used to automatically generate the Python APIs.

  • _pywrap_python_op_gen is defined as below:
1
2
3
4
5
6
7
8
9
10
11
12
tf_python_pybind_extension(
name = "_pywrap_python_op_gen",
srcs = ["framework/python_op_gen_wrapper.cc"],
module_name = "_pywrap_python_op_gen",
deps = [
":pybind11_absl",
":pybind11_lib",
":python_op_gen_headers_lib",
"//third_party/python_runtime:headers",
"@pybind11",
],
)

More details about how it is executed can be found in def tf_python_pybind_extension(...) and def pybind_extension(...) in the file tensorflow/tensorflow.bzl.

The Python APIs are generated for different groups of operations seperately. For example, the above :array_ops is defined as below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
py_library(
name = "array_ops",
srcs = [
"ops/array_ops.py",
"ops/inplace_ops.py",
],
srcs_version = "PY2AND3",
deps = [
":array_ops_gen",
":common_shapes",
":constant_op",
":dtypes",
":framework_ops",
":math_ops_gen",
":sparse_tensor",
":tensor_shape",
":tensor_util",
":util",
"//third_party/py/numpy",
"@six_archive//:six",
],
)

Where :array_ops_gen is included in the dependencies and defined as below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
tf_gen_op_wrapper_private_py(
name = "array_ops_gen",
visibility = [
# To pass open source testing in the pip Kokoros.
"//bazel_pip/tensorflow/compiler/tests:__pkg__",
"//learning/brain/python/ops:__pkg__",
"//tensorflow/compiler/tests:__pkg__",
"//tensorflow/python/kernel_tests:__pkg__",
],
deps = [
"//tensorflow/c/kernels:bitcast_op_lib",
"//tensorflow/core:array_ops_op_lib",
],
)

4. tf_gen_op_wrapper_private_py

It id defined at tensorflow/python/build_defs.bzl as below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py")

# Intended only for use within this directory.
# Generated python wrappers are "private" visibility, users should depend on the
# full python code that incorporates the wrappers. The generated targets have
# a _gen suffix, so that the full python version can use the bare name.
# We also hard code the hidden_file here to reduce duplication.
#
# We should consider moving the "out" default pattern into here, many other
# consumers of the tf_gen_op_wrapper_py rule would be simplified if we don't
# hard code the ops/ directory.

def tf_gen_op_wrapper_private_py(
name,
out = None,
deps = [],
require_shape_functions = False,
visibility = []):
if not name.endswith("_gen"):
fail("name must end in _gen")
if not visibility:
visibility = ["//visibility:private"]
bare_op_name = name[:-4] # Strip off the _gen
tf_gen_op_wrapper_py(
name = bare_op_name,
out = out,
visibility = visibility,
deps = deps,
require_shape_functions = require_shape_functions,
generated_target_name = name,
api_def_srcs = [
"//tensorflow/core/api_def:base_api_def",
"//tensorflow/core/api_def:python_api_def",
],
)

It calls the function tf_gen_op_wrapper_py internally.

5. tf_gen_op_wrapper_py

It is defined at tensorflow/tensorflow.bzl as below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Generates a Python library target wrapping the ops registered in "deps".
#
# Args:
# name: used as the name of the generated target and as a name component of
# the intermediate files.
# out: name of the python file created by this rule. If None, then
# "ops/gen_{name}.py" is used.
# hidden: Optional list of ops names to make private in the Python module.
# It is invalid to specify both "hidden" and "op_whitelist".
# visibility: passed to py_library.
# deps: list of dependencies for the intermediate tool used to generate the
# python target. NOTE these `deps` are not applied to the final python
# library target itself.
# require_shape_functions: Unused. Leave this as False.
# hidden_file: optional file that contains a list of op names to make private
# in the generated Python module. Each op name should be on a line by
# itself. Lines that start with characters that are invalid op name
# starting characters are treated as comments and ignored.
# generated_target_name: name of the generated target (overrides the
# "name" arg)
# op_whitelist: if not empty, only op names in this list will be wrapped. It
# is invalid to specify both "hidden" and "op_whitelist".
# cc_linkopts: Optional linkopts to be added to tf_cc_binary that contains the
# specified ops.

def tf_gen_op_wrapper_py(
name,
out = None,
hidden = None,
visibility = None,
deps = [],
require_shape_functions = False,
hidden_file = None,
generated_target_name = None,
op_whitelist = [],
cc_linkopts = lrt_if_needed(),
api_def_srcs = []):
_ = require_shape_functions # Unused.

if (hidden or hidden_file) and op_whitelist:
fail("Cannot pass specify both hidden and op_whitelist.")

# Construct a cc_binary containing the specified ops.
tool_name = "gen_" + name + "_py_wrappers_cc"
if not deps:
deps = [str(Label("//tensorflow/core:" + name + "_op_lib"))]
tf_cc_binary(
name = tool_name,
copts = tf_copts(),
linkopts = if_not_windows(["-lm", "-Wl,-ldl"]) + cc_linkopts,
linkstatic = 1, # Faster to link this one-time-use binary dynamically
visibility = [clean_dep("//tensorflow:internal")],
deps = ([
clean_dep("//tensorflow/core:framework"),
clean_dep("//tensorflow/python:python_op_gen_main"),
] + deps),
)

# Invoke the previous cc_binary to generate a python file.
if not out:
out = "ops/gen_" + name + ".py"

if hidden:
op_list_arg = ",".join(hidden)
op_list_is_whitelist = False
elif op_whitelist:
op_list_arg = ",".join(op_whitelist)
op_list_is_whitelist = True
else:
op_list_arg = "''"
op_list_is_whitelist = False

# Prepare ApiDef directories to pass to the genrule.
if not api_def_srcs:
api_def_args_str = ","
else:
api_def_args = []
for api_def_src in api_def_srcs:
# Add directory of the first ApiDef source to args.
# We are assuming all ApiDefs in a single api_def_src are in the
# same directory.
api_def_args.append(
"$$(dirname $$(echo $(locations " + api_def_src +
") | cut -d\" \" -f1))",
)
api_def_args_str = ",".join(api_def_args)

if hidden_file:
# `hidden_file` is file containing a list of op names to be hidden in the
# generated module.
native.genrule(
name = name + "_pygenrule",
outs = [out],
srcs = api_def_srcs + [hidden_file],
exec_tools = [tool_name] + tf_binary_additional_srcs(),
cmd = ("$(location " + tool_name + ") " + api_def_args_str +
" @$(location " + hidden_file + ") > $@"),
)
else:
native.genrule(
name = name + "_pygenrule",
outs = [out],
srcs = api_def_srcs,
exec_tools = [tool_name] + tf_binary_additional_srcs(),
cmd = ("$(location " + tool_name + ") " + api_def_args_str + " " +
op_list_arg + " " +
("1" if op_list_is_whitelist else "0") + " > $@"),
)

# Make a py_library out of the generated python file.
if not generated_target_name:
generated_target_name = name
native.py_library(
name = generated_target_name,
srcs = [out],
srcs_version = "PY2AND3",
visibility = visibility,
deps = [
clean_dep("//tensorflow/python:framework_for_generated_wrappers_v2"),
],
# Instruct build_cleaner to try to avoid using this rule; typically ops
# creators will provide their own tf_custom_op_py_library based target
# that wraps this one.
tags = ["avoid_dep"],
)

The above function defines a tf_cc_binary as a util tool for generating the Python APIs for the input Ops as below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
.
.
.
tool_name = "gen_" + name + "_py_wrappers_cc"
.
.
.
tf_cc_binary(
name = tool_name,
copts = tf_copts(),
linkopts = if_not_windows(["-lm", "-Wl,-ldl"]) + cc_linkopts,
linkstatic = 1, # Faster to link this one-time-use binary dynamically
visibility = [clean_dep("//tensorflow:internal")],
deps = ([
clean_dep("//tensorflow/core:framework"),
clean_dep("//tensorflow/python:python_op_gen_main"),
] + deps),
)

The above function is trigger by below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
if hidden_file:
# `hidden_file` is file containing a list of op names to be hidden in the
# generated module.
native.genrule(
name = name + "_pygenrule",
outs = [out],
srcs = api_def_srcs + [hidden_file],
exec_tools = [tool_name] + tf_binary_additional_srcs(),
cmd = ("$(location " + tool_name + ") " + api_def_args_str +
" @$(location " + hidden_file + ") > $@"),
)
else:
native.genrule(
name = name + "_pygenrule",
outs = [out],
srcs = api_def_srcs,
exec_tools = [tool_name] + tf_binary_additional_srcs(),
cmd = ("$(location " + tool_name + ") " + api_def_args_str + " " +
op_list_arg + " " +
("1" if op_list_is_whitelist else "0") + " > $@"),
)

The actual code used to generate Python code is defined at //tensorflow/python:python_op_gen_main.

6. //tensorflow/python:python_op_gen_main

It is located at tensorflow/python/BUILD as below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
cc_library(
name = "python_op_gen_main",
srcs = ["framework/python_op_gen_main.cc"],
visibility = ["//visibility:public"],
deps = [
":python_op_gen",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:op_gen_lib",
"//tensorflow/core:protos_all_cc",
],
alwayslink = 1,
)
  • tensorflow/python/framework/python_op_gen_main.cc is the entrance for Python code generation. It takes the API definition files under tensorflow/core/api_def/base_api as the inputs.

  • tensorflow/python/framework/python_op_gen.[h|cc] are used to automatically generate Python files for the TensorFlow Python ops based on the op definitions. The automatic generated files are located at bazel-bin/tensorflow/python/ops and these files start with gen_...

BTW: C++ APIs

C++ APIs are generated by the similar way with Python APIs. The code is located at tensorflow/cc/framework/cc_op_gen_main.cc. The BUILD block is located at tensorflow/cc/BUILD as below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
cc_library(
name = "cc_op_gen_main",
srcs = [
"framework/cc_op_gen.cc",
"framework/cc_op_gen.h",
"framework/cc_op_gen_main.cc",
],
copts = tf_copts(),
data = [
"//tensorflow/core/api_def:base_api_def",
],
deps = [
"//tensorflow/core:framework_headers_lib",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:op_gen_lib",
"//tensorflow/core:protos_all_cc",
"@com_google_absl//absl/strings",
],
)

where, all the API definition files under tensorflow/core/api_def/base_api will be used as the input for cc_op_gen_main.