Surya Sharma

Machine Learning Applications | Computer Vision | This website may be my recipe book.

Inference in C using tensorflow

This guide is a part of a series of articles that show you how to save a Keras model and use it for predictions in a Visual Studio C program.

Assuming you’ve set up your Visual Studio project correctly, we are now ready to use the frozen model (.pb file) to make a prediction in C.

If you do not have the .pb file, you can download it here.

This code is adapted from Patrick Wieschollek‘s inference code, and is similar, except for some parts removed (his code does not use a frozen graph, and thus also loads a checkpoint.)

// Code to use a frozen pb model and run inferences in C using libtensorflow
// Adapted from https://github.com/PatWie/tensorflow-cmake/blob/master/inference/c/inference_c.c
#include "pch.h"
#include <stdlib.h>
#include <iostream>
#include <c_api.h>
/* Functions to help read pb file */
void free_buffer(void* data, size_t length) { free(data); }
void deallocator(void* ptr, size_t len, void* arg) { free((void*)ptr); }
TF_Buffer* read_file(const char* file) {
FILE* f = fopen(file, "rb");
fseek(f, 0, SEEK_END);
long fsize = ftell(f);
fseek(f, 0, SEEK_SET); // same as rewind(f);
void* data = malloc(fsize);
fread(data, fsize, 1, f);
fclose(f);
TF_Buffer* buf = TF_NewBuffer();
buf->data = data;
buf->length = fsize;
buf->data_deallocator = free_buffer;
return buf;
}
int main()
{
printf("Hello from TensorFlow C library version %s\n", TF_Version());
/* Define filename where the frozen graph is stored */
char filename[320];
strcpy(filename, "C:\\Users\\s\\Dropbox\\Tensorflowgroup\\plus-2017\\slopemodel.pb");
// load graph
TF_Buffer* graph_def = read_file(filename);
TF_Graph* graph = TF_NewGraph();
TF_Status* status = TF_NewStatus();
TF_ImportGraphDefOptions* opts = TF_NewImportGraphDefOptions();
TF_GraphImportGraphDef(graph, graph_def, opts, status);
TF_DeleteImportGraphDefOptions(opts);
if (TF_GetCode(status) != TF_OK) {
printf("ERROR: Unable to import graph %s\n", filename);
return 1;
}
// create session
TF_SessionOptions* opt = TF_NewSessionOptions();
TF_Session* sess = TF_NewSession(graph, opt, status);
TF_DeleteSessionOptions(opt);
if (TF_GetCode(status) != TF_OK) {
printf("Unable to Create Session \n");
return 1;
}
// run restore
TF_Operation* checkpoint_op = TF_GraphOperationByName(graph, "save/Const");
TF_Operation* restore_op = TF_GraphOperationByName(graph, "save/restore_all");
const char* checkpoint_path_str = "./exported/my_model";
size_t checkpoint_path_str_len = strlen(checkpoint_path_str);
size_t encoded_size = TF_StringEncodedSize(checkpoint_path_str_len);
// The format for TF_STRING tensors is:
// start_offset: array[uint64]
// data: byte[...]
size_t total_size = sizeof(int64_t) + encoded_size;
char* input_encoded = (char*)malloc(total_size);
memset(input_encoded, 0, total_size);
TF_StringEncode(checkpoint_path_str, checkpoint_path_str_len,
input_encoded + sizeof(int64_t), encoded_size, status);
if (TF_GetCode(status) != TF_OK) {
fprintf(stderr, "ERROR: something wrong with encoding: %s",
TF_Message(status));
return 1;
}
// First two samples, normalized values
int SEQ_LENGTH = 21;
float sampledata0[21] = { 1., 0.96713615, 0.88262911, 0.74178404, 0.69953052,
0.66666667, 0.6713615, 0.57746479, 0.48826291, 0.50234742,
0.5399061, 0.43661972, 0.33333333, 0.342723, 0.25821596,
0.24882629, 0.19248826, 0.16901408, 0.07511737, 0.,
0.03286385 };
float sampledata1[21] = { 0. , 0.0257732 , 0.01030928, 0.1185567 , 0.20618557,
0.28865979, 0.30412371, 0.33505155, 0.36597938, 0.42268041,
0.43814433, 0.55670103, 0.73195876, 0.69587629, 0.77835052,
0.85051546, 0.8814433 , 0.81443299, 0.92783505, 0.98969072,
1. };
// gerenate input
TF_Operation* input_op = TF_GraphOperationByName(graph, "dense_1_input");
if (input_op == NULL) {
printf("operattion not found\n");
exit(0);
}
printf("input_op has %i inputs\n", TF_OperationNumOutputs(input_op));
float* raw_input_data = (float*)malloc(SEQ_LENGTH * sizeof(float));
for (int i = 0; i < 21; i++) raw_input_data[i] = sampledata1[i];
//raw_input_data[1] = 1.f;
int64_t* raw_input_dims = (int64_t*)malloc(2 * sizeof(int64_t));
raw_input_dims[0] = 1;
raw_input_dims[1] = 21;
// prepare inputs
TF_Tensor* input_tensor =
TF_NewTensor(TF_FLOAT, raw_input_dims, 2, raw_input_data,
SEQ_LENGTH * sizeof(float), &deallocator, NULL);
TF_Output* run_inputs = (TF_Output*)malloc(1 * sizeof(TF_Output));
run_inputs[0].oper = input_op;
run_inputs[0].index = 0;
TF_Tensor** run_inputs_tensors = (TF_Tensor**)malloc(1 * sizeof(TF_Tensor*));
run_inputs_tensors[0] = input_tensor;
// prepare outputs
TF_Operation* output_op = TF_GraphOperationByName(graph, "dense_2/Sigmoid");
if (output_op == NULL) {
printf("oepration not found\n");
exit(0);
}
TF_Output* run_outputs = (TF_Output*)malloc(1 * sizeof(TF_Output));
run_outputs[0].oper = output_op;
run_outputs[0].index = 0;
TF_Tensor** run_output_tensors = (TF_Tensor**)malloc(1 * sizeof(TF_Tensor*));
float* raw_output_data = (float*)malloc(1 * sizeof(float));
raw_output_data[0] = 1.f;
int64_t* raw_output_dims = (int64_t*)malloc(1 * sizeof(int64_t));
raw_output_dims[0] = 1;
TF_Tensor* output_tensor =
TF_NewTensor(TF_FLOAT, raw_output_dims, 1, raw_output_data,
1 * sizeof(float), &deallocator, NULL);
run_output_tensors[0] = output_tensor;
// run network
TF_SessionRun(sess,
/* RunOptions */ NULL,
/* Input tensors */ run_inputs, run_inputs_tensors, 1,
/* Output tensors */ run_outputs, run_output_tensors, 1,
/* Target operations */ NULL, 0,
/* RunMetadata */ NULL,
/* Output status */ status);
if (TF_GetCode(status) != TF_OK) {
fprintf(stderr, "ERROR: Unable to run output_op: %s\n", TF_Message(status));
return 1;
}
void * output_data = TF_TensorData(run_output_tensors[0]);
float data1 = ((float*)output_data)[0];
printf("Prediction: %.4f\n", data1);
TF_CloseSession(sess, status);
TF_DeleteSession(sess, status);
TF_DeleteStatus(status);
TF_DeleteBuffer(graph_def);
TF_DeleteGraph(graph);
getchar();
}
view raw TFinference.c hosted with ❤ by GitHub

Change your Visual Studio project created in the last guide to this code, and run it. You should see the value of 0.6984, which is the same as the output for the keras model.

Removing log messages

If things worked as expected, your program will print a lot of messages (above) that you don’t really need.

To change this, go back to the environment variable settings in Windows, and set the TF_CPP_MIN_LOG_LEVEL flag to 3. (You might need to restart visual studio or windows to see an effect).

Moving on

And that’s it! You are now ready to work with more complicated models in keras, or run inferences on multiple samples at a time. The libtensorflow C API and Patwie’s github should have more examples if you need them.

Next Post

Previous Post

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.

© 2026 Surya Sharma

Theme by Anders Norén