使用tf.py_func产生输入数据
Python版本3.6.3 = Tensorflow版本= 1.3.0使用tf.py_func产生输入数据
我在Keras
工作,但现在想直接在TensorFlow
工作。 我试图实现Keras
的fit_generator
的等效性,从而我不必将所有训练数据都在开始时加载到内存中,但可以根据需要将其加载到网络中进行训练。下面的代码代表了我尝试开始这样的事情,但如果我正在讨论这一切错误,我很想知道我应该看看文档的位置以及我应该使用什么关键字来搜索这些内容。
我的系统目前基于一个读取sqlite数据库文件以提取np.arrays
然后将它们转换为我想要的数据形状(具有一个预测前向的时间序列)的生成器。我正在尝试将该系统迁移到Tensorflow Dataset
s,并在申请tf.py_func
时遇到困难。这里是我在尝试,现在工作
import tensorflow as tf
import os
from tensorflow.contrib.data import Dataset, Iterator
import sqlite3
import pandas as pd
import numpy as np
LOOKBACK_ROWS = 600
DATA_DIR = '/mnt/derived_data/processedData'
files = os.listdir(DATA_DIR)
def data_from_files(f):
with sqlite3.connect(DATA_DIR + f) as conn:
results = conn.execute("SELECT col1, col2, FROM tbl")
col_names = [d[0] for d in results.description]
arr = np.array(results.fetchall())
num_obs = arr.shape[0] - LOOKBACK_ROWS + 1
X = np.zeros((num_obs, LOOKBACK_ROWS, 1), dtype = np.float32)
Y = np.zeros((num_obs, 1), dtype = np.float32)
for i in range(num_obs):
idx = i + LOOKBACK_ROWS - 1
X[i , :, 0] = arr[(idx - LOOKBACK_ROWS + 1):(idx + 1), 0]
Y[i, 0] = arr[idx, 1]
return tf.convert_to_tensor(X, name = 'X'), tf.convert_to_tensor(Y, name = 'Y')
filenames = tf.constant(files)
dataset = Dataset.from_tensor_slices((filenames))
dataset = dataset.map(lambda filename: tuple(tf.py_func(
data_from_files,
[filename],
[tf.float32, tf.float32])))
iterator = Iterator.from_structure(dataset.output_types, dataset.output_shapes)
next_element = iterator.get_next()
dataset_init_op = iterator.make_initializer(dataset)
with tf.Session() as sess:
sess.run(dataset_init_op)
while True:
try:
elem = sess.run(next_element)
print('Success')
except tf.errors.OutOfRangeError:
print('End of dataset.')
break
的初始化运行正常,但后来当我开始会话,并运行我收到以下错误:
2017-10-16 16:58:45.227612: I tensorflow/core/common_runtime/gpu/gpu_device.cc:976] DMA: 0
2017-10-16 16:58:45.227615: I tensorflow/core/common_runtime/gpu/gpu_device.cc:986] 0: Y
2017-10-16 16:58:45.227620: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1045] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:65:00.0)
2017-10-16 16:58:45.276138: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: TypeError: must be str, not bytes
2017-10-16 16:58:45.276306: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: TypeError: must be str, not bytes
[[Node: PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_FLOAT, DT_FLOAT], token="pyfunc_0"](arg0)]]
Traceback (most recent call last):
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1327, in _do_call
return fn(*args)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1306, in _run_fn
status, run_metadata)
File "/opt/python/3.6.3/lib/python3.6/contextlib.py", line 88, in __exit__
next(self.gen)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: TypeError: must be str, not bytes
[[Node: PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_FLOAT, DT_FLOAT], token="pyfunc_0"](arg0)]]
[[Node: IteratorGetNext = IteratorGetNext[output_shapes=[<unknown>, <unknown>], output_types=[DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/usr/code/nn/data_folder/pipeline.py", line 51, in <module>
elem = sess.run(next_element)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1124, in _run
feed_dict_tensor, options, run_metadata)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1321, in _do_run
options, run_metadata)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1340, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: TypeError: must be str, not bytes
[[Node: PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_FLOAT, DT_FLOAT], token="pyfunc_0"](arg0)]]
[[Node: IteratorGetNext = IteratorGetNext[output_shapes=[<unknown>, <unknown>], output_types=[DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator)]]
>>> python.el: native completion setup loaded
>>>
问题
( 1)这看起来好像是py_func
的用例,但我错了吗?如果没有,任何人都可以向我指出一些比Tensorflow文档更深入的资源? (我注意到git上有一个潜在的相关问题:https://github.com/tensorflow/tensorflow/issues/12396,但用tuple
包装所有东西的修复程序并没有帮助我)。
(2)什么是我应该遵循的一般流程,特别是当我想从一些文件名开始并输出每个文件名的多个训练Example
?
谢谢。
下面我重写了我的脚本,以便它可以是一个独立的可运行示例。我相信这个问题仍然与上面的代码相同,但我也在重新列出错误以确认。
自给结合了来自@ mrry的回答变化可运行的代码示例:
import tensorflow as tf
import os
import numpy as np
LOOKBACK_ROWS = 600
arr = np.random.random_sample((2000, 2))
np.save("npfile.npy", arr)
def data_from_files(f):
arr = np.load(f)
num_obs = arr.shape[0] - LOOKBACK_ROWS + 1
X = np.zeros((num_obs, LOOKBACK_ROWS, 1), dtype = np.float32)
Y = np.zeros((num_obs, 1), dtype = np.float32)
for i in range(num_obs):
idx = i + LOOKBACK_ROWS - 1
X[i , :, 0] = arr[(idx - LOOKBACK_ROWS + 1):(idx + 1), 0]
Y[i, 0] = arr[idx, 1]
return X, Y
files = ["npfile.npy"]
filenames = tf.constant(files)
# NOTE: In TensorFlow 1.4, `tf.contrib.data` is now `tf.data`.
dataset = tf.contrib.data.Dataset.from_tensor_slices(filenames)
# NOTE: In TensorFlow 1.4, the `tuple` is no longer needed.
dataset = dataset.map(lambda filename: tuple(tf.py_func(
data_from_files,
[filename],
[tf.float32, tf.float32])))
# NOTE: If you only have one `Dataset`, you do not need to use
# `Iterator.from_structure()`.
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()
with tf.Session() as sess:
sess.run(iterator.initializer)
while True:
try:
elem = sess.run(next_element)
print('Success')
except tf.errors.OutOfRangeError:
print('End of dataset.')
break
错误:
2017-10-16 18:30:44.143668: I tensorflow/core/common_runtime/gpu/gpu_device.cc:976] DMA: 0
2017-10-16 18:30:44.143672: I tensorflow/core/common_runtime/gpu/gpu_device.cc:986] 0: Y
2017-10-16 18:30:44.143679: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1045] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:65:00.0)
2017-10-16 18:30:44.190852: W tensorflow/core/framework/op_kernel.cc:1192] Unknown: AttributeError: 'bytes' object has no attribute 'read'
2017-10-16 18:30:44.190959: W tensorflow/core/framework/op_kernel.cc:1192] Unknown: AttributeError: 'bytes' object has no attribute 'read'
[[Node: PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_FLOAT, DT_FLOAT], token="pyfunc_0"](arg0)]]
Traceback (most recent call last):
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1327, in _do_call
return fn(*args)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1306, in _run_fn
status, run_metadata)
File "/opt/python/3.6.3/lib/python3.6/contextlib.py", line 88, in __exit__
next(self.gen)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.UnknownError: AttributeError: 'bytes' object has no attribute 'read'
[[Node: PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_FLOAT, DT_FLOAT], token="pyfunc_0"](arg0)]]
[[Node: IteratorGetNext = IteratorGetNext[output_shapes=[<unknown>, <unknown>], output_types=[DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "demo.py", line 48, in <module>
elem = sess.run(next_element)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1124, in _run
feed_dict_tensor, options, run_metadata)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1321, in _do_run
options, run_metadata)
File "/opt/python/3.6.3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1340, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.UnknownError: AttributeError: 'bytes' object has no attribute 'read'
[[Node: PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_FLOAT, DT_FLOAT], token="pyfunc_0"](arg0)]]
[[Node: IteratorGetNext = IteratorGetNext[output_shapes=[<unknown>, <unknown>], output_types=[DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator)]]
考虑您的问题以相反的顺序:
What is the general flow I should be following, particularly where I want to start with something like a bunch of filenames and output more than one training Example per file name?
将一种元素成许多,使用Dataset.flat_map(f)
转换。通过这种转换,您可以定义一个函数f(x)
,即将单个元素x
映射到嵌套的Dataset
对象,然后处理平铺嵌套的数据集。
This seems like exactly a use case for
py_func
but am I wrong about that?
这是一个用例tf.py_func()
但你的程序有一个微小的错误:tf.py_func()
运算期待你的函数(data_from_files()
)返回与NumPy阵列,以及不tf.Tensor
对象。简单地返回X
和Y
应该工作。
有了回答这两个点,让我们来看看如何可以重写代码:
import tensorflow as tf
import os
import sqlite3
import pandas as pd
import numpy as np
LOOKBACK_ROWS = 600
DATA_DIR = '/mnt/derived_data/processedData'
files = os.listdir(DATA_DIR)
def data_from_files(f):
with sqlite3.connect(DATA_DIR + f) as conn:
results = conn.execute("SELECT col1, col2, FROM tbl")
col_names = [d[0] for d in results.description]
arr = np.array(results.fetchall())
num_obs = arr.shape[0] - LOOKBACK_ROWS + 1
X = np.zeros((num_obs, LOOKBACK_ROWS, 1), dtype = np.float32)
Y = np.zeros((num_obs, 1), dtype = np.float32)
for i in range(num_obs):
idx = i + LOOKBACK_ROWS - 1
X[i , :, 0] = arr[(idx - LOOKBACK_ROWS + 1):(idx + 1), 0]
Y[i, 0] = arr[idx, 1]
return X, Y
filenames = tf.constant(files)
# NOTE: In TensorFlow 1.4, `tf.contrib.data` is now `tf.data`.
dataset = tf.contrib.data.Dataset.from_tensor_slices(filenames)
# NOTE: In TensorFlow 1.4, the `tuple` is no longer needed.
dataset = dataset.map(lambda filename: tuple(tf.py_func(
data_from_files,
[filename],
[tf.float32, tf.float32])))
# NOTE: If you only have one `Dataset`, you do not need to use
# `Iterator.from_structure()`.
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()
with tf.Session() as sess:
sess.run(iterator.initializer)
while True:
try:
elem = sess.run(next_element)
print('Success')
except tf.errors.OutOfRangeError:
print('End of dataset.')
break
我和你同样的问题,这是我的代码,它的问世与tf.py_func ()和numpy。
import tensorflow as tf
import numpy as np
myname = ".\mags\LJ001-0002.npy"
print(np.load(myname))
def printsomthing(name):
print(name)
return np.load(name)
op = tf.py_func(printsomthing,[myname],[tf.float32])
session = tf.Session()
print(session.run(op))
输出:
2018-03-10 20:03:24.722478: I C:\tf_jenkins\workspace\rel-win\M\windows-gpu\PY\36\tensorflow\core\platform\cpu_feature_guard.cc:140] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
2018-03-10 20:03:24.973617: I C:\tf_jenkins\workspace\rel-win\M\windows-gpu\PY\36\tensorflow\core\common_runtime\gpu\gpu_device.cc:1212] Found device 0 with properties:
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.8095
pciBusID: 0000:01:00.0
totalMemory: 8.00GiB freeMemory: 6.59GiB
2018-03-10 20:03:24.977676: I C:\tf_jenkins\workspace\rel-win\M\windows-gpu\PY\36\tensorflow\core\common_runtime\gpu\gpu_device.cc:1312] Adding visible gpu devices: 0
2018-03-10 20:03:25.427432: I C:\tf_jenkins\workspace\rel-win\M\windows-gpu\PY\36\tensorflow\core\common_runtime\gpu\gpu_device.cc:993] Creating TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6372 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080, pci bus id: 0000:01:00.0, compute capability: 6.1)
b'.\\mags\\LJ001-0002.npy'
2018-03-10 20:03:25.666649: W C:\tf_jenkins\workspace\rel-win\M\windows-gpu\PY\36\tensorflow\core\framework\op_kernel.cc:1190] Unknown: AttributeError: 'bytes' object has no attribute 'read'
Traceback (most recent call last):
File "C:\Users\lichao\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1361, in _do_call
return fn(*args)
File "C:\Users\lichao\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1340, in _run_fn
target_list, status, run_metadata)
File "C:\Users\lichao\Anaconda3\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 516, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.UnknownError: AttributeError: 'bytes' object has no attribute 'read'
[[Node: PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_FLOAT], token="pyfunc_0", _device="/job:localhost/replica:0/task:0/device:CPU:0"](PyFunc/input_0)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "d:/Dev2018/tacotron/tacotron/test.py", line 13, in <module>
print(session.run(op))
File "C:\Users\lichao\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 905, in run
run_metadata_ptr)
File "C:\Users\lichao\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1137, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Users\lichao\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1355, in _do_run
options, run_metadata)
File "C:\Users\lichao\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1374, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.UnknownError: AttributeError: 'bytes' object has no attribute 'read'
[[Node: PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_FLOAT], token="pyfunc_0", _device="/job:localhost/replica:0/task:0/device:CPU:0"](PyFunc/input_0)]]
Caused by op 'PyFunc', defined at:
File "d:/Dev2018/tacotron/tacotron/test.py", line 11, in <module>
op = tf.py_func(printsomthing,[myname],[tf.float32])
File "C:\Users\lichao\Anaconda3\lib\site-packages\tensorflow\python\ops\script_ops.py", line 317, in py_func
func=func, inp=inp, Tout=Tout, stateful=stateful, eager=False, name=name)
File "C:\Users\lichao\Anaconda3\lib\site-packages\tensorflow\python\ops\script_ops.py", line 225, in _internal_py_func
input=inp, token=token, Tout=Tout, name=name)
File "C:\Users\lichao\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_script_ops.py", line 95, in _py_func
"PyFunc", input=input, token=token, Tout=Tout, name=name)
File "C:\Users\lichao\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "C:\Users\lichao\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3271, in create_op
op_def=op_def)
File "C:\Users\lichao\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1650, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
UnknownError (see above for traceback): AttributeError: 'bytes' object has no attribute 'read'
[[Node: PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_FLOAT], token="pyfunc_0", _device="/job:localhost/replica:0/task:0/device:CPU:0"](PyFunc/input_0)]]
这并没有真正回答这个问题。如果您有不同的问题,可以通过单击[提问](https://stackoverflow.com/questions/ask)来提问。您也可以[添加赏金(https://stackoverflow.com/help/privileges/set-bounties),以吸引更多的关注到这个问题,一旦你有足够的[口碑](https://stackoverflow.com/help/什么声誉)。 - [来自评论](/ review/low-quality-posts/19070078) – Blastfurnace
感谢这个非常详细的和有用的答案。我可能会错过一些东西,但我发现即使包含您的更改,我仍然有同样的错误。我在上面的示例中添加了独立代码以及在这种情况下出现的错误。 – TFdoe