This guide is a part of a series of articles that show you how to save a Keras model and use it for predictions in a Visual Studio C program.
Assuming you’ve set up your Visual Studio project correctly, we are now ready to use the frozen model (.pb file) to make a prediction in C.
If you do not have the .pb file, you can download it here.
This code is adapted from Patrick Wieschollek‘s inference code, and is similar, except for some parts removed (his code does not use a frozen graph, and thus also loads a checkpoint.)
| // Code to use a frozen pb model and run inferences in C using libtensorflow | |
| // Adapted from https://github.com/PatWie/tensorflow-cmake/blob/master/inference/c/inference_c.c | |
| #include "pch.h" | |
| #include <stdlib.h> | |
| #include <iostream> | |
| #include <c_api.h> | |
| /* Functions to help read pb file */ | |
| void free_buffer(void* data, size_t length) { free(data); } | |
| void deallocator(void* ptr, size_t len, void* arg) { free((void*)ptr); } | |
| TF_Buffer* read_file(const char* file) { | |
| FILE* f = fopen(file, "rb"); | |
| fseek(f, 0, SEEK_END); | |
| long fsize = ftell(f); | |
| fseek(f, 0, SEEK_SET); // same as rewind(f); | |
| void* data = malloc(fsize); | |
| fread(data, fsize, 1, f); | |
| fclose(f); | |
| TF_Buffer* buf = TF_NewBuffer(); | |
| buf->data = data; | |
| buf->length = fsize; | |
| buf->data_deallocator = free_buffer; | |
| return buf; | |
| } | |
| int main() | |
| { | |
| printf("Hello from TensorFlow C library version %s\n", TF_Version()); | |
| /* Define filename where the frozen graph is stored */ | |
| char filename[320]; | |
| strcpy(filename, "C:\\Users\\s\\Dropbox\\Tensorflowgroup\\plus-2017\\slopemodel.pb"); | |
| // load graph | |
| TF_Buffer* graph_def = read_file(filename); | |
| TF_Graph* graph = TF_NewGraph(); | |
| TF_Status* status = TF_NewStatus(); | |
| TF_ImportGraphDefOptions* opts = TF_NewImportGraphDefOptions(); | |
| TF_GraphImportGraphDef(graph, graph_def, opts, status); | |
| TF_DeleteImportGraphDefOptions(opts); | |
| if (TF_GetCode(status) != TF_OK) { | |
| printf("ERROR: Unable to import graph %s\n", filename); | |
| return 1; | |
| } | |
| // create session | |
| TF_SessionOptions* opt = TF_NewSessionOptions(); | |
| TF_Session* sess = TF_NewSession(graph, opt, status); | |
| TF_DeleteSessionOptions(opt); | |
| if (TF_GetCode(status) != TF_OK) { | |
| printf("Unable to Create Session \n"); | |
| return 1; | |
| } | |
| // run restore | |
| TF_Operation* checkpoint_op = TF_GraphOperationByName(graph, "save/Const"); | |
| TF_Operation* restore_op = TF_GraphOperationByName(graph, "save/restore_all"); | |
| const char* checkpoint_path_str = "./exported/my_model"; | |
| size_t checkpoint_path_str_len = strlen(checkpoint_path_str); | |
| size_t encoded_size = TF_StringEncodedSize(checkpoint_path_str_len); | |
| // The format for TF_STRING tensors is: | |
| // start_offset: array[uint64] | |
| // data: byte[...] | |
| size_t total_size = sizeof(int64_t) + encoded_size; | |
| char* input_encoded = (char*)malloc(total_size); | |
| memset(input_encoded, 0, total_size); | |
| TF_StringEncode(checkpoint_path_str, checkpoint_path_str_len, | |
| input_encoded + sizeof(int64_t), encoded_size, status); | |
| if (TF_GetCode(status) != TF_OK) { | |
| fprintf(stderr, "ERROR: something wrong with encoding: %s", | |
| TF_Message(status)); | |
| return 1; | |
| } | |
| // First two samples, normalized values | |
| int SEQ_LENGTH = 21; | |
| float sampledata0[21] = { 1., 0.96713615, 0.88262911, 0.74178404, 0.69953052, | |
| 0.66666667, 0.6713615, 0.57746479, 0.48826291, 0.50234742, | |
| 0.5399061, 0.43661972, 0.33333333, 0.342723, 0.25821596, | |
| 0.24882629, 0.19248826, 0.16901408, 0.07511737, 0., | |
| 0.03286385 }; | |
| float sampledata1[21] = { 0. , 0.0257732 , 0.01030928, 0.1185567 , 0.20618557, | |
| 0.28865979, 0.30412371, 0.33505155, 0.36597938, 0.42268041, | |
| 0.43814433, 0.55670103, 0.73195876, 0.69587629, 0.77835052, | |
| 0.85051546, 0.8814433 , 0.81443299, 0.92783505, 0.98969072, | |
| 1. }; | |
| // gerenate input | |
| TF_Operation* input_op = TF_GraphOperationByName(graph, "dense_1_input"); | |
| if (input_op == NULL) { | |
| printf("operattion not found\n"); | |
| exit(0); | |
| } | |
| printf("input_op has %i inputs\n", TF_OperationNumOutputs(input_op)); | |
| float* raw_input_data = (float*)malloc(SEQ_LENGTH * sizeof(float)); | |
| for (int i = 0; i < 21; i++) raw_input_data[i] = sampledata1[i]; | |
| //raw_input_data[1] = 1.f; | |
| int64_t* raw_input_dims = (int64_t*)malloc(2 * sizeof(int64_t)); | |
| raw_input_dims[0] = 1; | |
| raw_input_dims[1] = 21; | |
| // prepare inputs | |
| TF_Tensor* input_tensor = | |
| TF_NewTensor(TF_FLOAT, raw_input_dims, 2, raw_input_data, | |
| SEQ_LENGTH * sizeof(float), &deallocator, NULL); | |
| TF_Output* run_inputs = (TF_Output*)malloc(1 * sizeof(TF_Output)); | |
| run_inputs[0].oper = input_op; | |
| run_inputs[0].index = 0; | |
| TF_Tensor** run_inputs_tensors = (TF_Tensor**)malloc(1 * sizeof(TF_Tensor*)); | |
| run_inputs_tensors[0] = input_tensor; | |
| // prepare outputs | |
| TF_Operation* output_op = TF_GraphOperationByName(graph, "dense_2/Sigmoid"); | |
| if (output_op == NULL) { | |
| printf("oepration not found\n"); | |
| exit(0); | |
| } | |
| TF_Output* run_outputs = (TF_Output*)malloc(1 * sizeof(TF_Output)); | |
| run_outputs[0].oper = output_op; | |
| run_outputs[0].index = 0; | |
| TF_Tensor** run_output_tensors = (TF_Tensor**)malloc(1 * sizeof(TF_Tensor*)); | |
| float* raw_output_data = (float*)malloc(1 * sizeof(float)); | |
| raw_output_data[0] = 1.f; | |
| int64_t* raw_output_dims = (int64_t*)malloc(1 * sizeof(int64_t)); | |
| raw_output_dims[0] = 1; | |
| TF_Tensor* output_tensor = | |
| TF_NewTensor(TF_FLOAT, raw_output_dims, 1, raw_output_data, | |
| 1 * sizeof(float), &deallocator, NULL); | |
| run_output_tensors[0] = output_tensor; | |
| // run network | |
| TF_SessionRun(sess, | |
| /* RunOptions */ NULL, | |
| /* Input tensors */ run_inputs, run_inputs_tensors, 1, | |
| /* Output tensors */ run_outputs, run_output_tensors, 1, | |
| /* Target operations */ NULL, 0, | |
| /* RunMetadata */ NULL, | |
| /* Output status */ status); | |
| if (TF_GetCode(status) != TF_OK) { | |
| fprintf(stderr, "ERROR: Unable to run output_op: %s\n", TF_Message(status)); | |
| return 1; | |
| } | |
| void * output_data = TF_TensorData(run_output_tensors[0]); | |
| float data1 = ((float*)output_data)[0]; | |
| printf("Prediction: %.4f\n", data1); | |
| TF_CloseSession(sess, status); | |
| TF_DeleteSession(sess, status); | |
| TF_DeleteStatus(status); | |
| TF_DeleteBuffer(graph_def); | |
| TF_DeleteGraph(graph); | |
| getchar(); | |
| } |
Change your Visual Studio project created in the last guide to this code, and run it. You should see the value of 0.6984, which is the same as the output for the keras model.

Removing log messages
If things worked as expected, your program will print a lot of messages (above) that you don’t really need.
To change this, go back to the environment variable settings in Windows, and set the TF_CPP_MIN_LOG_LEVEL flag to 3. (You might need to restart visual studio or windows to see an effect).

Moving on
And that’s it! You are now ready to work with more complicated models in keras, or run inferences on multiple samples at a time. The libtensorflow C API and Patwie’s github should have more examples if you need them.