gpt4 book ai didi

python - 如何保存 Tensorflow 估计器模型以在 Google ML Engine 上提供服务

转载 作者:太空宇宙 更新时间:2023-11-03 20:53:07 25 4
gpt4 key购买 nike

我是 Tensorflow 新手。我正在尝试使用 Google ML Engine 上的 Estimator 构建并提供模型。但是,在尝试了几种方法后,我不确定如何保存模型以供服务。

我已经以可接受的精度成功训练了模型。当我尝试保存模型以供服务时,我四处搜索并找到了几种方法。然而,我仍然遇到了一些问题......

根据针对发布的其他一些问题提出的建议,我尝试了 3 种导出方法:

1) 获取序列化示例作为输入 - 我遇到错误“TypeError:字节类型的对象不是 JSON 可序列化”。另外,我找不到一种好方法来提供序列化示例以实现有效服务。由于我使用 ML Engine 进行服务,因此使用 JSON 输入似乎会更容易。

2) 通过“基本”预处理获取 JSON 作为输入 - 我能够成功导出模型。将模型加载到 ML Engine 后,我尝试做出一些预测。虽然返回了预测结果,但我发现无论我如何更改 JSON 输入,都会返回相同的结果。我查看了培训期间获得的验证结果。该模型应该能够返回各种结果。我认为服务函数中的预处理有问题,所以我尝试了第三种方法......

3)具有“相同”预处理的 JSON 输入 - 我无法理解这一点,但我认为可能需要进行与我在模型训练期间处理数据的方式完全相同的预处理。然而,由于服务输入函数使用 tf.placeholders,我不知道如何复制相同的预处理以使导出的模型起作用......

(请原谅我糟糕的编码风格......)

<小时/>

训练代码:

col_names = ['featureA','featureB','featureC']
target_name = 'langIntel'

col_def = {}
col_def['featureA'] = {'type':'float','tfType':tf.float32,'len':'fixed'}
col_def['featureB'] = {'type':'int','tfType':tf.int64,'len':'fixed'}
col_def['featureC'] = {'type':'bytes','tfType':tf.string,'len':'var'}


def _float_feature(value):
if not isinstance(value, list): value = [value]
return tf.train.Feature(float_list=tf.train.FloatList(value=value))

def _int_feature(value):
if not isinstance(value, list): value = [value]
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

def _bytes_feature(value):
if not isinstance(value, list): value = [value]
return tf.train.Feature(
bytes_list=tf.train.BytesList(
value=[p.encode('utf-8') for p in value]
)
)

functDict = {'float':_float_feature,
'int':_int_feature,'bytes':_bytes_feature
}

training_targets = []
# Omitted validatin partition


with open('[JSON FILE PATH]') as jfile:
json_data_input = json.load(jfile)

random.shuffle(json_data_input)


with tf.python_io.TFRecordWriter('savefile1.tfrecord') as writer:
for item in json_data_input:
if item[target_name] > 0:
feature = {}

for col in col_names:
feature[col] = functDict[col_def[col]['type']](item[col])

training_targets.append(item[target_name])

example = tf.train.Example(
features=tf.train.Features(feature=feature)
)
writer.write(example.SerializeToString())


def _parse_function(example_proto):
example = {}

for col in col_names:
if col_def[col]['len'] == 'fixed':
example[col] = tf.FixedLenFeature([], col_def[col]['tfType'])
else:
example[col] = tf.VarLenFeature(col_def[col]['tfType'])

parsed_example = tf.parse_single_example(example_proto, example)

features = {}

for col in col_names:
features[col] = parsed_example[col]

labels = parsed_example.get(target_name)

return features, labels


def my_input_fn(batch_size=1,num_epochs=None):
dataset = tf.data.TFRecordDataset('savefile1.tfrecord')

dataset = dataset.map(_parse_function)
dataset = dataset.shuffle(10000)
dataset = dataset.repeat(num_epochs)
dataset = dataset.batch(batch_size)
iterator = dataset.make_one_shot_iterator()
features, labels = iterator.get_next()

return features, labels

allColumns = None

def train_model(
learning_rate,
n_trees,
n_batchespl,
batch_size):

periods = 10

vocab_list = ('vocab1', 'vocab2', 'vocab3')

featureA_bucket = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column(
key="featureA",dtype=tf.int64
), [5,10,15]
)
featureB_bucket = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column(
key="featureB",dtype=tf.float32
), [0.25,0.5,0.75]
)
featureC_cat = tf.feature_column.indicator_column(
tf.feature_column.categorical_column_with_vocabulary_list(
key="featureC",vocabulary_list=vocab_list,
num_oov_buckets=1
)
)


theColumns = [featureA_bucket,featureB_bucket,featureC_cat]

global allColumns
allColumns = theColumns

regressor = tf.estimator.BoostedTreesRegressor(
feature_columns=theColumns,
n_batches_per_layer=n_batchespl,
n_trees=n_trees,
learning_rate=learning_rate
)

training_input_fn = lambda: my_input_fn(batch_size=batch_size,num_epochs=5)
predict_input_fn = lambda: my_input_fn(num_epochs=1)

regressor.train(
input_fn=training_input_fn
)

# omitted evaluation part

return regressor

regressor = train_model(
learning_rate=0.05,
n_trees=100,
n_batchespl=50,
batch_size=20)

导出试用 1:

def _serving_input_receiver_fn():
serialized_tf_example = tf.placeholder(dtype=tf.string, shape=None,
name='input_example_tensor'
)

receiver_tensors = {'examples': serialized_tf_example}
features = tf.parse_example(serialized_tf_example, feature_spec)
return tf.estimator.export.ServingInputReceiver(features,
receiver_tensors
)

servable_model_dir = "[OUT PATH]"
servable_model_path = regressor.export_savedmodel(servable_model_dir,
_serving_input_receiver_fn
)

导出试用 2:

def serving_input_fn():
feature_placeholders = {
'featureA': tf.placeholder(tf.int64, [None]),
'featureB': tf.placeholder(tf.float32, [None]),
'featureC': tf.placeholder(tf.string, [None, None])
}

receiver_tensors = {'inputs': feature_placeholders}

feature_spec = tf.feature_column.make_parse_example_spec(allColumns)

features = tf.parse_example(feature_placeholders, feature_spec)
return tf.estimator.export.ServingInputReceiver(features,
feature_placeholders
)

servable_model_dir = "[OUT PATH]"
servable_model_path = regressor.export_savedmodel(
servable_model_dir, serving_input_fn
)

导出试用 3:

def serving_input_fn():
feature_placeholders = {
'featureA': tf.placeholder(tf.int64, [None]),
'featureB': tf.placeholder(tf.float32, [None]),
'featureC': tf.placeholder(tf.string, [None, None])
}

def toBytes(t):
t = str(t)
return t.encode('utf-8')

tmpFeatures = {}

tmpFeatures['featureA'] = tf.train.Feature(
int64_list=feature_placeholders['featureA']
)
# TypeError: Parameter to MergeFrom() must be instance
# of same class: expected tensorflow.Int64List got Tensor.
tmpFeatures['featureB'] = tf.train.Feature(
float_list=feature_placeholders['featureB']
)
tmpFeatures['featureC'] = tf.train.Feature(
bytes_list=feature_placeholders['featureC']
)

tmpExample = tf.train.Example(
features=tf.train.Features(feature=tmpFeatures)
)
tmpExample_proto = tmpExample.SerializeToString()

example = {}

for key, tensor in feature_placeholders.items():
if col_def[key]['len'] == 'fixed':
example[key] = tf.FixedLenFeature(
[], col_def[key]['tfType']
)
else:
example[key] = tf.VarLenFeature(
col_def[key]['tfType']
)

parsed_example = tf.parse_single_example(
tmpExample_proto, example
)

features = {}

for key in tmpFeatures.keys():
features[key] = parsed_example[key]

return tf.estimator.export.ServingInputReceiver(
features, feature_placeholders
)

servable_model_dir = "[OUT PATH]"
servable_model_path = regressor.export_savedmodel(
servable_model_dir, serving_input_fn
)

应该如何构建服务输入函数才能输入 JSON 文件进行预测?非常感谢您的任何见解!

最佳答案

只是提供更新 - 我仍然无法完成导出。然后,我使用 Keras 重建了训练模型,并成功导出了用于服务的模型(重建模型可能花费了更少的时间来弄清楚如何在我的案例中导出估计器模型......)

关于python - 如何保存 Tensorflow 估计器模型以在 Google ML Engine 上提供服务,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/56190772/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com