summaryrefslogtreecommitdiffstats
path: root/libavfilter/dnn
diff options
context:
space:
mode:
authorGuo, Yejun <yejun.guo@intel.com>2019-08-29 13:53:33 +0800
committerPedro Arthur <bygrandao@gmail.com>2019-08-30 11:41:30 -0300
commit09a455a24649af36e8eea81029be7a410201be4c (patch)
tree70d0e8e1575068d010bc229864cf5badb5595fb8 /libavfilter/dnn
parent20a12448aaf140cf1ec06ee22e1ab4ebaea5c9ba (diff)
downloadffmpeg-streaming-09a455a24649af36e8eea81029be7a410201be4c.zip
ffmpeg-streaming-09a455a24649af36e8eea81029be7a410201be4c.tar.gz
dnn: introduce dnn operand (in c code) to hold operand infos within network
the info can be saved in dnn operand object without regenerating again and again, and it is also needed for layer split/merge, and for memory reuse. to make things step by step, this patch just focuses on c code, the change within python script will be added later. Signed-off-by: Guo, Yejun <yejun.guo@intel.com> Signed-off-by: Pedro Arthur <bygrandao@gmail.com>
Diffstat (limited to 'libavfilter/dnn')
-rw-r--r--libavfilter/dnn/dnn_backend_native.c226
-rw-r--r--libavfilter/dnn/dnn_backend_native.h54
-rw-r--r--libavfilter/dnn/dnn_backend_native_layer_pad.c24
-rw-r--r--libavfilter/dnn/dnn_backend_native_layer_pad.h4
4 files changed, 187 insertions, 121 deletions
diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c
index d52abc6..daa4f50 100644
--- a/libavfilter/dnn/dnn_backend_native.c
+++ b/libavfilter/dnn/dnn_backend_native.c
@@ -30,77 +30,30 @@
static DNNReturnType set_input_output_native(void *model, DNNInputData *input, const char *input_name, const char **output_names, uint32_t nb_output)
{
ConvolutionalNetwork *network = (ConvolutionalNetwork *)model;
- InputParams *input_params;
- ConvolutionalParams *conv_params;
- DepthToSpaceParams *depth_to_space_params;
- LayerPadParams *pad_params;
- int cur_width, cur_height, cur_channels;
- int32_t layer;
- if (network->layers_num <= 0 || network->layers[0].type != INPUT){
+ if (network->layers_num <= 0 || network->operands_num <= 0)
return DNN_ERROR;
- }
- else{
- input_params = (InputParams *)network->layers[0].params;
- input_params->width = cur_width = input->width;
- input_params->height = cur_height = input->height;
- input_params->channels = cur_channels = input->channels;
- if (input->data){
- av_freep(&input->data);
- }
- av_assert0(input->dt == DNN_FLOAT);
- network->layers[0].output = input->data = av_malloc(cur_height * cur_width * cur_channels * sizeof(float));
- if (!network->layers[0].output){
- return DNN_ERROR;
- }
- }
-
- for (layer = 1; layer < network->layers_num; ++layer){
- switch (network->layers[layer].type){
- case CONV:
- conv_params = (ConvolutionalParams *)network->layers[layer].params;
- if (conv_params->input_num != cur_channels){
- return DNN_ERROR;
- }
- cur_channels = conv_params->output_num;
-
- if (conv_params->padding_method == VALID) {
- int pad_size = (conv_params->kernel_size - 1) * conv_params->dilation;
- cur_height -= pad_size;
- cur_width -= pad_size;
- }
- break;
- case DEPTH_TO_SPACE:
- depth_to_space_params = (DepthToSpaceParams *)network->layers[layer].params;
- if (cur_channels % (depth_to_space_params->block_size * depth_to_space_params->block_size) != 0){
- return DNN_ERROR;
- }
- cur_channels = cur_channels / (depth_to_space_params->block_size * depth_to_space_params->block_size);
- cur_height *= depth_to_space_params->block_size;
- cur_width *= depth_to_space_params->block_size;
- break;
- case MIRROR_PAD:
- pad_params = (LayerPadParams *)network->layers[layer].params;
- cur_height = cur_height + pad_params->paddings[1][0] + pad_params->paddings[1][1];
- cur_width = cur_width + pad_params->paddings[2][0] + pad_params->paddings[2][1];
- cur_channels = cur_channels + pad_params->paddings[3][0] + pad_params->paddings[3][1];
- break;
- default:
- return DNN_ERROR;
- }
- if (network->layers[layer].output){
- av_freep(&network->layers[layer].output);
- }
-
- if (cur_height <= 0 || cur_width <= 0)
- return DNN_ERROR;
- network->layers[layer].output = av_malloc(cur_height * cur_width * cur_channels * sizeof(float));
- if (!network->layers[layer].output){
- return DNN_ERROR;
- }
- }
+ av_assert0(input->dt == DNN_FLOAT);
+
+ /**
+ * as the first step, suppose network->operands[0] is the input operand.
+ */
+ network->operands[0].dims[0] = 1;
+ network->operands[0].dims[1] = input->height;
+ network->operands[0].dims[2] = input->width;
+ network->operands[0].dims[3] = input->channels;
+ network->operands[0].type = DOT_INPUT;
+ network->operands[0].data_type = DNN_FLOAT;
+ network->operands[0].isNHWC = 1;
+
+ av_freep(&network->operands[0].data);
+ network->operands[0].length = calculate_operand_data_length(&network->operands[0]);
+ network->operands[0].data = av_malloc(network->operands[0].length);
+ if (!network->operands[0].data)
+ return DNN_ERROR;
+ input->data = network->operands[0].data;
return DNN_SUCCESS;
}
@@ -119,6 +72,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
ConvolutionalParams *conv_params;
DepthToSpaceParams *depth_to_space_params;
LayerPadParams *pad_params;
+ int32_t operand_index = 0;
model = av_malloc(sizeof(DNNModel));
if (!model){
@@ -131,7 +85,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
}
file_size = avio_size(model_file_context);
- network = av_malloc(sizeof(ConvolutionalNetwork));
+ network = av_mallocz(sizeof(ConvolutionalNetwork));
if (!network){
avio_closep(&model_file_context);
av_freep(&model);
@@ -139,32 +93,33 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
}
model->model = (void *)network;
- network->layers_num = 1 + (int32_t)avio_rl32(model_file_context);
+ network->layers_num = (int32_t)avio_rl32(model_file_context);
dnn_size = 4;
- network->layers = av_malloc(network->layers_num * sizeof(Layer));
+ network->layers = av_mallocz(network->layers_num * sizeof(Layer));
if (!network->layers){
- av_freep(&network);
avio_closep(&model_file_context);
- av_freep(&model);
+ ff_dnn_free_model_native(&model);
return NULL;
}
- for (layer = 0; layer < network->layers_num; ++layer){
- network->layers[layer].output = NULL;
- network->layers[layer].params = NULL;
- }
- network->layers[0].type = INPUT;
- network->layers[0].params = av_malloc(sizeof(InputParams));
- if (!network->layers[0].params){
+ /**
+ * Operands should be read from model file, the whole change will be huge.
+ * to make things step by step, we first mock the operands, instead of reading from model file.
+ */
+ network->operands_num = network->layers_num + 1;
+ network->operands = av_mallocz(network->operands_num * sizeof(DnnOperand));
+ if (!network->operands){
avio_closep(&model_file_context);
ff_dnn_free_model_native(&model);
return NULL;
}
- for (layer = 1; layer < network->layers_num; ++layer){
+ for (layer = 0; layer < network->layers_num; ++layer){
layer_type = (int32_t)avio_rl32(model_file_context);
dnn_size += 4;
+ network->layers[layer].input_operand_indexes[0] = operand_index++;
+ network->layers[layer].output_operand_index = operand_index;
switch (layer_type){
case CONV:
conv_params = av_malloc(sizeof(ConvolutionalParams));
@@ -258,14 +213,35 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
#define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
-static void convolve(const float *input, float *output, const ConvolutionalParams *conv_params, int width, int height)
+static int convolve(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const ConvolutionalParams *conv_params)
{
+ float *output;
+ int32_t input_operand_index = input_operand_indexes[0];
+ int number = operands[input_operand_index].dims[0];
+ int height = operands[input_operand_index].dims[1];
+ int width = operands[input_operand_index].dims[2];
+ int channel = operands[input_operand_index].dims[3];
+ const float *input = operands[input_operand_index].data;
+
int radius = conv_params->kernel_size >> 1;
int src_linesize = width * conv_params->input_num;
int filter_linesize = conv_params->kernel_size * conv_params->input_num;
int filter_size = conv_params->kernel_size * filter_linesize;
int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
+ DnnOperand *output_operand = &operands[output_operand_index];
+ output_operand->dims[0] = number;
+ output_operand->dims[1] = height - pad_size * 2;
+ output_operand->dims[2] = width - pad_size * 2;
+ output_operand->dims[3] = conv_params->output_num;
+ output_operand->length = calculate_operand_data_length(output_operand);
+ output_operand->data = av_realloc(output_operand->data, output_operand->length);
+ if (!output_operand->data)
+ return -1;
+ output = output_operand->data;
+
+ av_assert0(channel == conv_params->input_num);
+
for (int y = pad_size; y < height - pad_size; ++y) {
for (int x = pad_size; x < width - pad_size; ++x) {
for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) {
@@ -311,16 +287,36 @@ static void convolve(const float *input, float *output, const ConvolutionalParam
output += conv_params->output_num;
}
}
+ return 0;
}
-static void depth_to_space(const float *input, float *output, int block_size, int width, int height, int channels)
+static int depth_to_space(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, int block_size)
{
+ float *output;
+ int32_t input_operand_index = input_operand_indexes[0];
+ int number = operands[input_operand_index].dims[0];
+ int height = operands[input_operand_index].dims[1];
+ int width = operands[input_operand_index].dims[2];
+ int channels = operands[input_operand_index].dims[3];
+ const float *input = operands[input_operand_index].data;
+
int y, x, by, bx, ch;
int new_channels = channels / (block_size * block_size);
int output_linesize = width * channels;
int by_linesize = output_linesize / block_size;
int x_linesize = new_channels * block_size;
+ DnnOperand *output_operand = &operands[output_operand_index];
+ output_operand->dims[0] = number;
+ output_operand->dims[1] = height * block_size;
+ output_operand->dims[2] = width * block_size;
+ output_operand->dims[3] = new_channels;
+ output_operand->length = calculate_operand_data_length(output_operand);
+ output_operand->data = av_realloc(output_operand->data, output_operand->length);
+ if (!output_operand->data)
+ return -1;
+ output = output_operand->data;
+
for (y = 0; y < height; ++y){
for (x = 0; x < width; ++x){
for (by = 0; by < block_size; ++by){
@@ -334,58 +330,38 @@ static void depth_to_space(const float *input, float *output, int block_size, in
}
output += output_linesize;
}
+ return 0;
}
DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *outputs, uint32_t nb_output)
{
ConvolutionalNetwork *network = (ConvolutionalNetwork *)model->model;
- int cur_width, cur_height, cur_channels;
int32_t layer;
- InputParams *input_params;
ConvolutionalParams *conv_params;
DepthToSpaceParams *depth_to_space_params;
LayerPadParams *pad_params;
- if (network->layers_num <= 0 || network->layers[0].type != INPUT || !network->layers[0].output){
+ if (network->layers_num <= 0 || network->operands_num <= 0)
+ return DNN_ERROR;
+ if (!network->operands[0].data)
return DNN_ERROR;
- }
- else{
- input_params = (InputParams *)network->layers[0].params;
- cur_width = input_params->width;
- cur_height = input_params->height;
- cur_channels = input_params->channels;
- }
- for (layer = 1; layer < network->layers_num; ++layer){
- if (!network->layers[layer].output){
- return DNN_ERROR;
- }
+ for (layer = 0; layer < network->layers_num; ++layer){
switch (network->layers[layer].type){
case CONV:
conv_params = (ConvolutionalParams *)network->layers[layer].params;
- convolve(network->layers[layer - 1].output, network->layers[layer].output, conv_params, cur_width, cur_height);
- cur_channels = conv_params->output_num;
- if (conv_params->padding_method == VALID) {
- int pad_size = (conv_params->kernel_size - 1) * conv_params->dilation;
- cur_height -= pad_size;
- cur_width -= pad_size;
- }
+ convolve(network->operands, network->layers[layer].input_operand_indexes,
+ network->layers[layer].output_operand_index, conv_params);
break;
case DEPTH_TO_SPACE:
depth_to_space_params = (DepthToSpaceParams *)network->layers[layer].params;
- depth_to_space(network->layers[layer - 1].output, network->layers[layer].output,
- depth_to_space_params->block_size, cur_width, cur_height, cur_channels);
- cur_height *= depth_to_space_params->block_size;
- cur_width *= depth_to_space_params->block_size;
- cur_channels /= depth_to_space_params->block_size * depth_to_space_params->block_size;
+ depth_to_space(network->operands, network->layers[layer].input_operand_indexes,
+ network->layers[layer].output_operand_index, depth_to_space_params->block_size);
break;
case MIRROR_PAD:
pad_params = (LayerPadParams *)network->layers[layer].params;
- dnn_execute_layer_pad(network->layers[layer - 1].output, network->layers[layer].output,
- pad_params, 1, cur_height, cur_width, cur_channels);
- cur_height = cur_height + pad_params->paddings[1][0] + pad_params->paddings[1][1];
- cur_width = cur_width + pad_params->paddings[2][0] + pad_params->paddings[2][1];
- cur_channels = cur_channels + pad_params->paddings[3][0] + pad_params->paddings[3][1];
+ dnn_execute_layer_pad(network->operands, network->layers[layer].input_operand_indexes,
+ network->layers[layer].output_operand_index, pad_params);
break;
case INPUT:
return DNN_ERROR;
@@ -395,14 +371,24 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
// native mode does not support multiple outputs yet
if (nb_output > 1)
return DNN_ERROR;
- outputs[0].data = network->layers[network->layers_num - 1].output;
- outputs[0].height = cur_height;
- outputs[0].width = cur_width;
- outputs[0].channels = cur_channels;
+
+ /**
+ * as the first step, suppose network->operands[network->operands_num - 1] is the output operand.
+ */
+ outputs[0].data = network->operands[network->operands_num - 1].data;
+ outputs[0].height = network->operands[network->operands_num - 1].dims[1];
+ outputs[0].width = network->operands[network->operands_num - 1].dims[2];
+ outputs[0].channels = network->operands[network->operands_num - 1].dims[3];
return DNN_SUCCESS;
}
+int32_t calculate_operand_data_length(DnnOperand* operand)
+{
+ // currently, we just support DNN_FLOAT
+ return operand->dims[0] * operand->dims[1] * operand->dims[2] * operand->dims[3] * sizeof(float);
+}
+
void ff_dnn_free_model_native(DNNModel **model)
{
ConvolutionalNetwork *network;
@@ -413,7 +399,6 @@ void ff_dnn_free_model_native(DNNModel **model)
{
network = (ConvolutionalNetwork *)(*model)->model;
for (layer = 0; layer < network->layers_num; ++layer){
- av_freep(&network->layers[layer].output);
if (network->layers[layer].type == CONV){
conv_params = (ConvolutionalParams *)network->layers[layer].params;
av_freep(&conv_params->kernel);
@@ -422,6 +407,11 @@ void ff_dnn_free_model_native(DNNModel **model)
av_freep(&network->layers[layer].params);
}
av_freep(&network->layers);
+
+ for (uint32_t operand = 0; operand < network->operands_num; ++operand)
+ av_freep(&network->operands[operand].data);
+ av_freep(&network->operands);
+
av_freep(&network);
av_freep(model);
}
diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h
index b6f9533..87b4394 100644
--- a/libavfilter/dnn/dnn_backend_native.h
+++ b/libavfilter/dnn/dnn_backend_native.h
@@ -36,12 +36,60 @@ typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc;
typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNConvPaddingParam;
+typedef enum {DOT_INPUT, DOT_INTERMEDIATE, DOT_OUTPUT} DNNOperandType;
+
typedef struct Layer{
DNNLayerType type;
- float *output;
+ /**
+ * a layer can have multiple inputs and one output.
+ * 4 is just a big enough number for input operands (increase it if necessary),
+ * do not use 'int32_t *input_operand_indexes', so we don't worry about mem leaks.
+ */
+ int32_t input_operand_indexes[4];
+ int32_t output_operand_index;
void *params;
} Layer;
+typedef struct DnnOperand{
+ /**
+ * there are two memory layouts, NHWC or NCHW, so we use dims,
+ * dims[0] is Number.
+ */
+ int32_t dims[4];
+
+ /**
+ * input/output/intermediate operand of the network
+ */
+ DNNOperandType type;
+
+ /**
+ * support different kinds of data type such as float, half float, int8 etc,
+ * first support float now.
+ */
+ DNNDataType data_type;
+
+ /**
+ * NHWC if 1, otherwise NCHW.
+ * let's first support NHWC only, this flag is for extensive usage.
+ */
+ int8_t isNHWC;
+
+ /**
+ * to avoid possible memory leak, do not use char *name
+ */
+ char name[128];
+
+ /**
+ * data pointer with data length in bytes.
+ * usedNumbersLeft is only valid for intermediate operand,
+ * it means how many layers still depend on this operand,
+ * todo: the memory can be reused when usedNumbersLeft is zero.
+ */
+ void *data;
+ int32_t length;
+ int32_t usedNumbersLeft;
+}DnnOperand;
+
typedef struct ConvolutionalParams{
int32_t input_num, output_num, kernel_size;
DNNActivationFunc activation;
@@ -63,6 +111,8 @@ typedef struct DepthToSpaceParams{
typedef struct ConvolutionalNetwork{
Layer *layers;
int32_t layers_num;
+ DnnOperand *operands;
+ int32_t operands_num;
} ConvolutionalNetwork;
DNNModel *ff_dnn_load_model_native(const char *model_filename);
@@ -71,4 +121,6 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
void ff_dnn_free_model_native(DNNModel **model);
+int32_t calculate_operand_data_length(DnnOperand *operand);
+
#endif
diff --git a/libavfilter/dnn/dnn_backend_native_layer_pad.c b/libavfilter/dnn/dnn_backend_native_layer_pad.c
index 5417d73..c2905a7 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_pad.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_pad.c
@@ -48,12 +48,21 @@ static int after_get_buddy(int given, int border, LayerPadModeParam mode)
}
}
-void dnn_execute_layer_pad(const float *input, float *output, const LayerPadParams *params, int number, int height, int width, int channel)
+int dnn_execute_layer_pad(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index,
+ const LayerPadParams *params)
{
int32_t before_paddings;
int32_t after_paddings;
+ float* output;
// suppose format is <N, H, W, C>
+ int32_t input_operand_index = input_operand_indexes[0];
+ int number = operands[input_operand_index].dims[0];
+ int height = operands[input_operand_index].dims[1];
+ int width = operands[input_operand_index].dims[2];
+ int channel = operands[input_operand_index].dims[3];
+ const float *input = operands[input_operand_index].data;
+
int new_number = number + params->paddings[0][0] + params->paddings[0][1];
int new_height = height + params->paddings[1][0] + params->paddings[1][1];
int new_width = width + params->paddings[2][0] + params->paddings[2][1];
@@ -67,6 +76,17 @@ void dnn_execute_layer_pad(const float *input, float *output, const LayerPadPara
int new_wc_stride = new_c_stride * new_width;
int new_hwc_stride = new_wc_stride * new_height;
+ DnnOperand *output_operand = &operands[output_operand_index];
+ output_operand->dims[0] = new_number;
+ output_operand->dims[1] = new_height;
+ output_operand->dims[2] = new_width;
+ output_operand->dims[3] = new_channel;
+ output_operand->length = calculate_operand_data_length(output_operand);
+ output_operand->data = av_realloc(output_operand->data, output_operand->length);
+ if (!output_operand->data)
+ return -1;
+ output = output_operand->data;
+
// copy the original data
for (int n = 0; n < number; n++) {
for (int h = 0; h < height; h++) {
@@ -208,4 +228,6 @@ void dnn_execute_layer_pad(const float *input, float *output, const LayerPadPara
}
}
}
+
+ return 0;
}
diff --git a/libavfilter/dnn/dnn_backend_native_layer_pad.h b/libavfilter/dnn/dnn_backend_native_layer_pad.h
index 0fbe652..7cc8213 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_pad.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_pad.h
@@ -26,6 +26,7 @@
#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_PAD_H
#include <stdint.h>
+#include "dnn_backend_native.h"
typedef enum {LPMP_CONSTANT, LPMP_REFLECT, LPMP_SYMMETRIC} LayerPadModeParam;
@@ -35,6 +36,7 @@ typedef struct LayerPadParams{
float constant_values;
} LayerPadParams;
-void dnn_execute_layer_pad(const float *input, float *output, const LayerPadParams *params, int number, int height, int width, int channel);
+int dnn_execute_layer_pad(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index,
+ const LayerPadParams *params);
#endif
OpenPOWER on IntegriCloud