diff options
author | Guo, Yejun <yejun.guo@intel.com> | 2019-08-29 13:53:33 +0800 |
---|---|---|
committer | Pedro Arthur <bygrandao@gmail.com> | 2019-08-30 11:41:30 -0300 |
commit | 09a455a24649af36e8eea81029be7a410201be4c (patch) | |
tree | 70d0e8e1575068d010bc229864cf5badb5595fb8 /libavfilter/dnn | |
parent | 20a12448aaf140cf1ec06ee22e1ab4ebaea5c9ba (diff) | |
download | ffmpeg-streaming-09a455a24649af36e8eea81029be7a410201be4c.zip ffmpeg-streaming-09a455a24649af36e8eea81029be7a410201be4c.tar.gz |
dnn: introduce dnn operand (in c code) to hold operand infos within network
the info can be saved in dnn operand object without regenerating again and again,
and it is also needed for layer split/merge, and for memory reuse.
to make things step by step, this patch just focuses on c code,
the change within python script will be added later.
Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
Signed-off-by: Pedro Arthur <bygrandao@gmail.com>
Diffstat (limited to 'libavfilter/dnn')
-rw-r--r-- | libavfilter/dnn/dnn_backend_native.c | 226 | ||||
-rw-r--r-- | libavfilter/dnn/dnn_backend_native.h | 54 | ||||
-rw-r--r-- | libavfilter/dnn/dnn_backend_native_layer_pad.c | 24 | ||||
-rw-r--r-- | libavfilter/dnn/dnn_backend_native_layer_pad.h | 4 |
4 files changed, 187 insertions, 121 deletions
diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c index d52abc6..daa4f50 100644 --- a/libavfilter/dnn/dnn_backend_native.c +++ b/libavfilter/dnn/dnn_backend_native.c @@ -30,77 +30,30 @@ static DNNReturnType set_input_output_native(void *model, DNNInputData *input, const char *input_name, const char **output_names, uint32_t nb_output) { ConvolutionalNetwork *network = (ConvolutionalNetwork *)model; - InputParams *input_params; - ConvolutionalParams *conv_params; - DepthToSpaceParams *depth_to_space_params; - LayerPadParams *pad_params; - int cur_width, cur_height, cur_channels; - int32_t layer; - if (network->layers_num <= 0 || network->layers[0].type != INPUT){ + if (network->layers_num <= 0 || network->operands_num <= 0) return DNN_ERROR; - } - else{ - input_params = (InputParams *)network->layers[0].params; - input_params->width = cur_width = input->width; - input_params->height = cur_height = input->height; - input_params->channels = cur_channels = input->channels; - if (input->data){ - av_freep(&input->data); - } - av_assert0(input->dt == DNN_FLOAT); - network->layers[0].output = input->data = av_malloc(cur_height * cur_width * cur_channels * sizeof(float)); - if (!network->layers[0].output){ - return DNN_ERROR; - } - } - - for (layer = 1; layer < network->layers_num; ++layer){ - switch (network->layers[layer].type){ - case CONV: - conv_params = (ConvolutionalParams *)network->layers[layer].params; - if (conv_params->input_num != cur_channels){ - return DNN_ERROR; - } - cur_channels = conv_params->output_num; - - if (conv_params->padding_method == VALID) { - int pad_size = (conv_params->kernel_size - 1) * conv_params->dilation; - cur_height -= pad_size; - cur_width -= pad_size; - } - break; - case DEPTH_TO_SPACE: - depth_to_space_params = (DepthToSpaceParams *)network->layers[layer].params; - if (cur_channels % (depth_to_space_params->block_size * depth_to_space_params->block_size) != 0){ - return DNN_ERROR; - } - cur_channels = cur_channels / (depth_to_space_params->block_size * depth_to_space_params->block_size); - cur_height *= depth_to_space_params->block_size; - cur_width *= depth_to_space_params->block_size; - break; - case MIRROR_PAD: - pad_params = (LayerPadParams *)network->layers[layer].params; - cur_height = cur_height + pad_params->paddings[1][0] + pad_params->paddings[1][1]; - cur_width = cur_width + pad_params->paddings[2][0] + pad_params->paddings[2][1]; - cur_channels = cur_channels + pad_params->paddings[3][0] + pad_params->paddings[3][1]; - break; - default: - return DNN_ERROR; - } - if (network->layers[layer].output){ - av_freep(&network->layers[layer].output); - } - - if (cur_height <= 0 || cur_width <= 0) - return DNN_ERROR; - network->layers[layer].output = av_malloc(cur_height * cur_width * cur_channels * sizeof(float)); - if (!network->layers[layer].output){ - return DNN_ERROR; - } - } + av_assert0(input->dt == DNN_FLOAT); + + /** + * as the first step, suppose network->operands[0] is the input operand. + */ + network->operands[0].dims[0] = 1; + network->operands[0].dims[1] = input->height; + network->operands[0].dims[2] = input->width; + network->operands[0].dims[3] = input->channels; + network->operands[0].type = DOT_INPUT; + network->operands[0].data_type = DNN_FLOAT; + network->operands[0].isNHWC = 1; + + av_freep(&network->operands[0].data); + network->operands[0].length = calculate_operand_data_length(&network->operands[0]); + network->operands[0].data = av_malloc(network->operands[0].length); + if (!network->operands[0].data) + return DNN_ERROR; + input->data = network->operands[0].data; return DNN_SUCCESS; } @@ -119,6 +72,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename) ConvolutionalParams *conv_params; DepthToSpaceParams *depth_to_space_params; LayerPadParams *pad_params; + int32_t operand_index = 0; model = av_malloc(sizeof(DNNModel)); if (!model){ @@ -131,7 +85,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename) } file_size = avio_size(model_file_context); - network = av_malloc(sizeof(ConvolutionalNetwork)); + network = av_mallocz(sizeof(ConvolutionalNetwork)); if (!network){ avio_closep(&model_file_context); av_freep(&model); @@ -139,32 +93,33 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename) } model->model = (void *)network; - network->layers_num = 1 + (int32_t)avio_rl32(model_file_context); + network->layers_num = (int32_t)avio_rl32(model_file_context); dnn_size = 4; - network->layers = av_malloc(network->layers_num * sizeof(Layer)); + network->layers = av_mallocz(network->layers_num * sizeof(Layer)); if (!network->layers){ - av_freep(&network); avio_closep(&model_file_context); - av_freep(&model); + ff_dnn_free_model_native(&model); return NULL; } - for (layer = 0; layer < network->layers_num; ++layer){ - network->layers[layer].output = NULL; - network->layers[layer].params = NULL; - } - network->layers[0].type = INPUT; - network->layers[0].params = av_malloc(sizeof(InputParams)); - if (!network->layers[0].params){ + /** + * Operands should be read from model file, the whole change will be huge. + * to make things step by step, we first mock the operands, instead of reading from model file. + */ + network->operands_num = network->layers_num + 1; + network->operands = av_mallocz(network->operands_num * sizeof(DnnOperand)); + if (!network->operands){ avio_closep(&model_file_context); ff_dnn_free_model_native(&model); return NULL; } - for (layer = 1; layer < network->layers_num; ++layer){ + for (layer = 0; layer < network->layers_num; ++layer){ layer_type = (int32_t)avio_rl32(model_file_context); dnn_size += 4; + network->layers[layer].input_operand_indexes[0] = operand_index++; + network->layers[layer].output_operand_index = operand_index; switch (layer_type){ case CONV: conv_params = av_malloc(sizeof(ConvolutionalParams)); @@ -258,14 +213,35 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename) #define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x))) -static void convolve(const float *input, float *output, const ConvolutionalParams *conv_params, int width, int height) +static int convolve(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const ConvolutionalParams *conv_params) { + float *output; + int32_t input_operand_index = input_operand_indexes[0]; + int number = operands[input_operand_index].dims[0]; + int height = operands[input_operand_index].dims[1]; + int width = operands[input_operand_index].dims[2]; + int channel = operands[input_operand_index].dims[3]; + const float *input = operands[input_operand_index].data; + int radius = conv_params->kernel_size >> 1; int src_linesize = width * conv_params->input_num; int filter_linesize = conv_params->kernel_size * conv_params->input_num; int filter_size = conv_params->kernel_size * filter_linesize; int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0; + DnnOperand *output_operand = &operands[output_operand_index]; + output_operand->dims[0] = number; + output_operand->dims[1] = height - pad_size * 2; + output_operand->dims[2] = width - pad_size * 2; + output_operand->dims[3] = conv_params->output_num; + output_operand->length = calculate_operand_data_length(output_operand); + output_operand->data = av_realloc(output_operand->data, output_operand->length); + if (!output_operand->data) + return -1; + output = output_operand->data; + + av_assert0(channel == conv_params->input_num); + for (int y = pad_size; y < height - pad_size; ++y) { for (int x = pad_size; x < width - pad_size; ++x) { for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) { @@ -311,16 +287,36 @@ static void convolve(const float *input, float *output, const ConvolutionalParam output += conv_params->output_num; } } + return 0; } -static void depth_to_space(const float *input, float *output, int block_size, int width, int height, int channels) +static int depth_to_space(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, int block_size) { + float *output; + int32_t input_operand_index = input_operand_indexes[0]; + int number = operands[input_operand_index].dims[0]; + int height = operands[input_operand_index].dims[1]; + int width = operands[input_operand_index].dims[2]; + int channels = operands[input_operand_index].dims[3]; + const float *input = operands[input_operand_index].data; + int y, x, by, bx, ch; int new_channels = channels / (block_size * block_size); int output_linesize = width * channels; int by_linesize = output_linesize / block_size; int x_linesize = new_channels * block_size; + DnnOperand *output_operand = &operands[output_operand_index]; + output_operand->dims[0] = number; + output_operand->dims[1] = height * block_size; + output_operand->dims[2] = width * block_size; + output_operand->dims[3] = new_channels; + output_operand->length = calculate_operand_data_length(output_operand); + output_operand->data = av_realloc(output_operand->data, output_operand->length); + if (!output_operand->data) + return -1; + output = output_operand->data; + for (y = 0; y < height; ++y){ for (x = 0; x < width; ++x){ for (by = 0; by < block_size; ++by){ @@ -334,58 +330,38 @@ static void depth_to_space(const float *input, float *output, int block_size, in } output += output_linesize; } + return 0; } DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *outputs, uint32_t nb_output) { ConvolutionalNetwork *network = (ConvolutionalNetwork *)model->model; - int cur_width, cur_height, cur_channels; int32_t layer; - InputParams *input_params; ConvolutionalParams *conv_params; DepthToSpaceParams *depth_to_space_params; LayerPadParams *pad_params; - if (network->layers_num <= 0 || network->layers[0].type != INPUT || !network->layers[0].output){ + if (network->layers_num <= 0 || network->operands_num <= 0) + return DNN_ERROR; + if (!network->operands[0].data) return DNN_ERROR; - } - else{ - input_params = (InputParams *)network->layers[0].params; - cur_width = input_params->width; - cur_height = input_params->height; - cur_channels = input_params->channels; - } - for (layer = 1; layer < network->layers_num; ++layer){ - if (!network->layers[layer].output){ - return DNN_ERROR; - } + for (layer = 0; layer < network->layers_num; ++layer){ switch (network->layers[layer].type){ case CONV: conv_params = (ConvolutionalParams *)network->layers[layer].params; - convolve(network->layers[layer - 1].output, network->layers[layer].output, conv_params, cur_width, cur_height); - cur_channels = conv_params->output_num; - if (conv_params->padding_method == VALID) { - int pad_size = (conv_params->kernel_size - 1) * conv_params->dilation; - cur_height -= pad_size; - cur_width -= pad_size; - } + convolve(network->operands, network->layers[layer].input_operand_indexes, + network->layers[layer].output_operand_index, conv_params); break; case DEPTH_TO_SPACE: depth_to_space_params = (DepthToSpaceParams *)network->layers[layer].params; - depth_to_space(network->layers[layer - 1].output, network->layers[layer].output, - depth_to_space_params->block_size, cur_width, cur_height, cur_channels); - cur_height *= depth_to_space_params->block_size; - cur_width *= depth_to_space_params->block_size; - cur_channels /= depth_to_space_params->block_size * depth_to_space_params->block_size; + depth_to_space(network->operands, network->layers[layer].input_operand_indexes, + network->layers[layer].output_operand_index, depth_to_space_params->block_size); break; case MIRROR_PAD: pad_params = (LayerPadParams *)network->layers[layer].params; - dnn_execute_layer_pad(network->layers[layer - 1].output, network->layers[layer].output, - pad_params, 1, cur_height, cur_width, cur_channels); - cur_height = cur_height + pad_params->paddings[1][0] + pad_params->paddings[1][1]; - cur_width = cur_width + pad_params->paddings[2][0] + pad_params->paddings[2][1]; - cur_channels = cur_channels + pad_params->paddings[3][0] + pad_params->paddings[3][1]; + dnn_execute_layer_pad(network->operands, network->layers[layer].input_operand_indexes, + network->layers[layer].output_operand_index, pad_params); break; case INPUT: return DNN_ERROR; @@ -395,14 +371,24 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output // native mode does not support multiple outputs yet if (nb_output > 1) return DNN_ERROR; - outputs[0].data = network->layers[network->layers_num - 1].output; - outputs[0].height = cur_height; - outputs[0].width = cur_width; - outputs[0].channels = cur_channels; + + /** + * as the first step, suppose network->operands[network->operands_num - 1] is the output operand. + */ + outputs[0].data = network->operands[network->operands_num - 1].data; + outputs[0].height = network->operands[network->operands_num - 1].dims[1]; + outputs[0].width = network->operands[network->operands_num - 1].dims[2]; + outputs[0].channels = network->operands[network->operands_num - 1].dims[3]; return DNN_SUCCESS; } +int32_t calculate_operand_data_length(DnnOperand* operand) +{ + // currently, we just support DNN_FLOAT + return operand->dims[0] * operand->dims[1] * operand->dims[2] * operand->dims[3] * sizeof(float); +} + void ff_dnn_free_model_native(DNNModel **model) { ConvolutionalNetwork *network; @@ -413,7 +399,6 @@ void ff_dnn_free_model_native(DNNModel **model) { network = (ConvolutionalNetwork *)(*model)->model; for (layer = 0; layer < network->layers_num; ++layer){ - av_freep(&network->layers[layer].output); if (network->layers[layer].type == CONV){ conv_params = (ConvolutionalParams *)network->layers[layer].params; av_freep(&conv_params->kernel); @@ -422,6 +407,11 @@ void ff_dnn_free_model_native(DNNModel **model) av_freep(&network->layers[layer].params); } av_freep(&network->layers); + + for (uint32_t operand = 0; operand < network->operands_num; ++operand) + av_freep(&network->operands[operand].data); + av_freep(&network->operands); + av_freep(&network); av_freep(model); } diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h index b6f9533..87b4394 100644 --- a/libavfilter/dnn/dnn_backend_native.h +++ b/libavfilter/dnn/dnn_backend_native.h @@ -36,12 +36,60 @@ typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc; typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNConvPaddingParam; +typedef enum {DOT_INPUT, DOT_INTERMEDIATE, DOT_OUTPUT} DNNOperandType; + typedef struct Layer{ DNNLayerType type; - float *output; + /** + * a layer can have multiple inputs and one output. + * 4 is just a big enough number for input operands (increase it if necessary), + * do not use 'int32_t *input_operand_indexes', so we don't worry about mem leaks. + */ + int32_t input_operand_indexes[4]; + int32_t output_operand_index; void *params; } Layer; +typedef struct DnnOperand{ + /** + * there are two memory layouts, NHWC or NCHW, so we use dims, + * dims[0] is Number. + */ + int32_t dims[4]; + + /** + * input/output/intermediate operand of the network + */ + DNNOperandType type; + + /** + * support different kinds of data type such as float, half float, int8 etc, + * first support float now. + */ + DNNDataType data_type; + + /** + * NHWC if 1, otherwise NCHW. + * let's first support NHWC only, this flag is for extensive usage. + */ + int8_t isNHWC; + + /** + * to avoid possible memory leak, do not use char *name + */ + char name[128]; + + /** + * data pointer with data length in bytes. + * usedNumbersLeft is only valid for intermediate operand, + * it means how many layers still depend on this operand, + * todo: the memory can be reused when usedNumbersLeft is zero. + */ + void *data; + int32_t length; + int32_t usedNumbersLeft; +}DnnOperand; + typedef struct ConvolutionalParams{ int32_t input_num, output_num, kernel_size; DNNActivationFunc activation; @@ -63,6 +111,8 @@ typedef struct DepthToSpaceParams{ typedef struct ConvolutionalNetwork{ Layer *layers; int32_t layers_num; + DnnOperand *operands; + int32_t operands_num; } ConvolutionalNetwork; DNNModel *ff_dnn_load_model_native(const char *model_filename); @@ -71,4 +121,6 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output void ff_dnn_free_model_native(DNNModel **model); +int32_t calculate_operand_data_length(DnnOperand *operand); + #endif diff --git a/libavfilter/dnn/dnn_backend_native_layer_pad.c b/libavfilter/dnn/dnn_backend_native_layer_pad.c index 5417d73..c2905a7 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_pad.c +++ b/libavfilter/dnn/dnn_backend_native_layer_pad.c @@ -48,12 +48,21 @@ static int after_get_buddy(int given, int border, LayerPadModeParam mode) } } -void dnn_execute_layer_pad(const float *input, float *output, const LayerPadParams *params, int number, int height, int width, int channel) +int dnn_execute_layer_pad(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, + const LayerPadParams *params) { int32_t before_paddings; int32_t after_paddings; + float* output; // suppose format is <N, H, W, C> + int32_t input_operand_index = input_operand_indexes[0]; + int number = operands[input_operand_index].dims[0]; + int height = operands[input_operand_index].dims[1]; + int width = operands[input_operand_index].dims[2]; + int channel = operands[input_operand_index].dims[3]; + const float *input = operands[input_operand_index].data; + int new_number = number + params->paddings[0][0] + params->paddings[0][1]; int new_height = height + params->paddings[1][0] + params->paddings[1][1]; int new_width = width + params->paddings[2][0] + params->paddings[2][1]; @@ -67,6 +76,17 @@ void dnn_execute_layer_pad(const float *input, float *output, const LayerPadPara int new_wc_stride = new_c_stride * new_width; int new_hwc_stride = new_wc_stride * new_height; + DnnOperand *output_operand = &operands[output_operand_index]; + output_operand->dims[0] = new_number; + output_operand->dims[1] = new_height; + output_operand->dims[2] = new_width; + output_operand->dims[3] = new_channel; + output_operand->length = calculate_operand_data_length(output_operand); + output_operand->data = av_realloc(output_operand->data, output_operand->length); + if (!output_operand->data) + return -1; + output = output_operand->data; + // copy the original data for (int n = 0; n < number; n++) { for (int h = 0; h < height; h++) { @@ -208,4 +228,6 @@ void dnn_execute_layer_pad(const float *input, float *output, const LayerPadPara } } } + + return 0; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_pad.h b/libavfilter/dnn/dnn_backend_native_layer_pad.h index 0fbe652..7cc8213 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_pad.h +++ b/libavfilter/dnn/dnn_backend_native_layer_pad.h @@ -26,6 +26,7 @@ #define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_PAD_H #include <stdint.h> +#include "dnn_backend_native.h" typedef enum {LPMP_CONSTANT, LPMP_REFLECT, LPMP_SYMMETRIC} LayerPadModeParam; @@ -35,6 +36,7 @@ typedef struct LayerPadParams{ float constant_values; } LayerPadParams; -void dnn_execute_layer_pad(const float *input, float *output, const LayerPadParams *params, int number, int height, int width, int channel); +int dnn_execute_layer_pad(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, + const LayerPadParams *params); #endif |