57 #include "nnstreamer-orc.h"
64 #define DBG (!filter->silent)
68 #define GST_CAT_DEFAULT gst_tensor_transform_debug
69 #define CAPS_STRING GST_TENSOR_CAP_DEFAULT ";" GST_TENSORS_CAP_MAKE ("{ static, flexible }")
70 #define REGEX_DIMCHG_OPTION "^([0-9]|1[0-5]):([0-9]|1[0-5])$"
71 #define REGEX_TYPECAST_OPTION "(^[u]?int(8|16|32|64)$|^float(16|32|64)$)"
72 #define REGEX_TRANSPOSE_OPTION "^(?:([0-2]):(?!.*\\1)){3}3$"
73 #define REGEX_STAND_OPTION "^(default|dc-average)(:([u]?int(8|16|32|64)|float(16|32|64)))?(,per-channel:(true|false))?$"
74 #define REGEX_CLAMP_OPTION "^((([-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?))):"\
75 "((([-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?)))$"
76 #define REGEX_PADDING_OPTION "^((left|right|top|bottom|front|back):(\\d)(,)?)+(layout:(NCHW|NHWC))?$"
77 #define REGEX_ARITH_OPTION "^(typecast:([u]?int(8|16|32|64)|float(16|32|64)),)?"\
78 "(per-channel:(false|true@[0-9]+),)?"\
79 "(((add|mul|div)(:([-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?))+(@[0-9]+)?)(,|))+$"
81 #define REGEX_ARITH_OPTION_TYPECAST "(typecast:([u]?int(8|16|32|64)|float(16|32|64)))"
87 #define NNS_TENSOR_TRANSPOSE_RANK_LIMIT (4)
93 #define NNS_TENSOR_PADDING_RANK_LIMIT (3)
113 #define DEFAULT_ACCELERATION TRUE
115 #define DEFAULT_ACCELERATION FALSE
135 static GstStaticPadTemplate
sink_factory = GST_STATIC_PAD_TEMPLATE (
"sink",
143 static GstStaticPadTemplate
src_factory = GST_STATIC_PAD_TEMPLATE (
"src",
148 #define gst_tensor_transform_parent_class parent_class
150 GST_TYPE_BASE_TRANSFORM);
154 const GValue * value, GParamSpec * pspec);
156 GValue * value, GParamSpec * pspec);
161 GstBuffer * inbuf, GstBuffer * outbuf);
163 GstPadDirection direction, GstCaps * caps, GstCaps * filter);
165 GstPadDirection direction, GstCaps * caps, GstCaps * othercaps);
167 GstCaps * incaps, GstCaps * outcaps);
169 GstPadDirection direction, GstCaps * caps, gsize size,
170 GstCaps * othercaps, gsize * othersize);
173 filter, GstPadDirection direction, guint idx,
const GstTensorInfo * in_info,
176 #define GST_TYPE_TENSOR_TRANSFORM_MODE (gst_tensor_transform_mode_get_type ())
184 static GType mode_type = 0;
186 if (mode_type == 0) {
187 static GEnumValue mode_types[] = {
188 {
GTT_DIMCHG,
"Mode for changing tensor dimensions, "
190 ", where NNS_TENSOR_RANK_LIMIT is 16)",
195 "option=[typecast:TYPE,][per-channel:(false|true@DIM),]add|mul|div:NUMBER[@CH_IDX], ...",
198 "option=D1\':D2\':D3\':D4 (fixed to 3)",
200 {
GTT_STAND,
"Mode for statistical standardization of tensor, "
201 "option=(default|dc-average)[:TYPE][,per-channel:(false|true)]",
203 {
GTT_CLAMP,
"Mode for clamping all elements of tensor into the range, "
204 "option=CLAMP_MIN:CLAMP_MAX",
207 "option=left|right|top|bottom|front|back:NUMBER[,layout:(NCHW|NHWC)]",
209 {
GTT_UNKNOWN,
"Unknown or not-implemented-yet mode",
214 mode_type = g_enum_register_static (
"gtt_mode_type", mode_types);
226 GObjectClass *gobject_class;
227 GstElementClass *gstelement_class;
228 GstBaseTransformClass *trans_class;
230 GST_DEBUG_CATEGORY_INIT (gst_tensor_transform_debug,
"tensor_transform", 0,
231 "Element to transforms tensor dimension or type");
233 trans_class = (GstBaseTransformClass *) klass;
234 gstelement_class = (GstElementClass *) trans_class;
235 gobject_class = (GObjectClass *) gstelement_class;
241 g_object_class_install_property (gobject_class,
PROP_SILENT,
242 g_param_spec_boolean (
"silent",
"Silent",
"Produce verbose output ?",
243 FALSE, G_PARAM_READWRITE));
244 g_object_class_install_property (gobject_class,
PROP_MODE,
245 g_param_spec_enum (
"mode",
"Mode",
"Mode used for transforming tensor",
247 G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
248 g_object_class_install_property (gobject_class,
PROP_OPTION,
249 g_param_spec_string (
"option",
"Option",
250 "Option for the tensor transform mode ?",
"", G_PARAM_READWRITE));
252 g_param_spec_boolean (
"acceleration",
"Acceleration",
"Orc acceleration",
254 g_object_class_install_property (gobject_class,
PROP_APPLY,
255 g_param_spec_string (
"apply",
"Apply",
"Select tensors to apply, "
256 "separated with ',' in case of multiple tensors. Default to apply all tensors.",
257 "", G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
259 g_param_spec_uint (
"transpose-rank-limit",
"Transpose rank limit",
260 "The rank limit of transpose, which varies per version of nnstreamer and may be lower than the global rank limit if it is over 4.",
262 G_PARAM_READABLE | G_PARAM_STATIC_STRINGS));
264 gst_element_class_set_details_simple (gstelement_class,
267 "Transforms other/tensor dimensions for different models or frameworks",
268 "MyungJoo Ham <myungjoo.ham@samsung.com>");
270 gst_element_class_add_pad_template (gstelement_class,
272 gst_element_class_add_pad_template (gstelement_class,
275 trans_class->passthrough_on_same_caps =
FALSE;
281 trans_class->transform_caps =
283 trans_class->fixate_caps =
288 trans_class->transform_size =
307 filter->
apply = NULL;
343 #ifndef FLOAT16_SUPPORT
351 (
"Tensor_transform does not support float16 operators. Apply -Denable-float16=true for meson build option if your architecture support float16. Note that tensor-transform's float16 is adhoc and does NOT perform good (slow!).\n");
356 #ifdef FLOAT16_SUPPORT
362 refrain_from_heavy_op_on_float16 (gulong n)
364 static int warned = 0;
370 (
"Tensor_transform implementation for float16 does not support SIMD. Heavy tensor-transform operations of float16 is not recommended. Try to apply heavy ops with other types (e.g., float32) and convert it to float16 at the time when it's really needed.\n");
376 #define _conv_to_f16(intype, o, i, n) \
378 float16 *op = (gpointer) (o); \
379 intype *ip = (gpointer) (i); \
381 refrain_from_heavy_op_on_float16 (n); \
382 for (idx = 0; idx < n; idx++) \
383 *(op + idx) = (float16) *(ip + idx); \
387 #define _conv_from_f16_action(n, op, ip, otypename) \
390 for (idx = 0; idx < n; idx++) \
391 *(op + idx) = (otypename) *(ip + idx); \
395 #define _conv_from_f16(otype, o, i, n) \
397 float16 *ip = (gpointer) (i); \
398 refrain_from_heavy_op_on_float16 (n); \
401 int32_t *op = (gpointer) (o); \
402 _conv_from_f16_action (n, op, ip, int32_t); \
404 case _NNS_UINT32: { \
405 uint32_t *op = (gpointer) (o); \
406 _conv_from_f16_action (n, op, ip, uint32_t); \
409 int16_t *op = (gpointer) (o); \
410 _conv_from_f16_action (n, op, ip, int16_t); \
412 case _NNS_UINT16: { \
413 uint16_t *op = (gpointer) (o); \
414 _conv_from_f16_action (n, op, ip, uint16_t); \
417 int8_t *op = (gpointer) (o); \
418 _conv_from_f16_action (n, op, ip, int8_t); \
421 uint8_t *op = (gpointer) (o); \
422 _conv_from_f16_action (n, op, ip, uint8_t); \
424 case _NNS_FLOAT64: { \
425 double *op = (gpointer) (o); \
426 _conv_from_f16_action (n, op, ip, double); \
428 case _NNS_FLOAT32: { \
429 float *op = (gpointer) (o); \
430 _conv_from_f16_action (n, op, ip, float); \
432 case _NNS_FLOAT16: { \
433 float16 *op = (gpointer) (o); \
434 _conv_from_f16_action (n, op, ip, float16); \
436 default: GST_ERROR_OBJECT (filter, "Unsupported type %d", (otype)); g_assert (0); \
441 #define _op_float16(i, n, v, op) \
444 float16 *data_in = (float16 *) (i); \
445 refrain_from_heavy_op_on_float16 (n); \
448 for (idx = 0; idx < n; idx++) \
449 data_in[idx] = data_in[idx] + (v); \
452 for (idx = 0; idx < n; idx++) \
453 data_in[idx] = data_in[idx] * (v); \
456 for (idx = 0; idx < n; idx++) \
457 data_in[idx] = data_in[idx] / (v); \
459 default: GST_ERROR_OBJECT (filter, "Unknown operator for float16: %d", op); break; \
464 #define _conv_to_f16(intype, o, i, n) do { float16_not_supported (); } while (0)
465 #define _conv_from_f16(otype, o, i, n) do { float16_not_supported (); } while (0)
466 #define _op_float16(i, n, v, op) do { float16_not_supported (); } while (0)
472 #define type_64bit_integer(t) ((t) == _NNS_INT64 || (t) == _NNS_UINT64)
473 #define orc_supported(f,itype,otype) ((f)->acceleration && !(type_64bit_integer (itype) || type_64bit_integer (otype)))
475 #define orc_func_conv(intype,outtype) nns_orc_conv_
476 #define orc_func_add(intype) nns_orc_add_c_
477 #define orc_func_mul(intype) nns_orc_mul_c_
478 #define orc_func_div(intype) nns_orc_div_c_
480 #define orc_typecast_to(i,o,n,intype,otype,intypename) do { \
482 case _NNS_INT32: orc_func_conv (intype, s32) ((gpointer) o, (gpointer) i, n); break; \
483 case _NNS_UINT32: orc_func_conv (intype, u32) ((gpointer) o, (gpointer) i, n); break; \
484 case _NNS_INT16: orc_func_conv (intype, s16) ((gpointer) o, (gpointer) i, n); break; \
485 case _NNS_UINT16: orc_func_conv (intype, u16) ((gpointer) o, (gpointer) i, n); break; \
486 case _NNS_INT8: orc_func_conv (intype, s8) ((gpointer) o, (gpointer) i, n); break; \
487 case _NNS_UINT8: orc_func_conv (intype, u8) ((gpointer) o, (gpointer) i, n); break; \
488 case _NNS_FLOAT64: orc_func_conv (intype, f64) ((gpointer) o, (gpointer) i, n); break; \
489 case _NNS_FLOAT32: orc_func_conv (intype, f32) ((gpointer) o, (gpointer) i, n); break; \
490 case _NNS_FLOAT16: _conv_to_f16 (intypename, o, i, n); break; \
491 default: GST_ERROR_OBJECT (filter, "Unsupported output type %d", otype); g_assert (0); break; \
495 #define orc_typecast(i,o,n,itype,otype) do { \
497 case _NNS_INT32: orc_typecast_to (i, o, n, s32, otype, int32_t); break; \
498 case _NNS_UINT32: orc_typecast_to (i, o, n, u32, otype, uint32_t); break; \
499 case _NNS_INT16: orc_typecast_to (i, o, n, s16, otype, int16_t); break; \
500 case _NNS_UINT16: orc_typecast_to (i, o, n, u16, otype, uint16_t); break; \
501 case _NNS_INT8: orc_typecast_to (i, o, n, s8, otype, int8_t); break; \
502 case _NNS_UINT8: orc_typecast_to (i, o, n, u8, otype, uint8_t); break; \
503 case _NNS_FLOAT64: orc_typecast_to (i, o, n, f64, otype, double); break; \
504 case _NNS_FLOAT32: orc_typecast_to (i, o, n, f32, otype, float); break; \
505 case _NNS_FLOAT16: _conv_from_f16 (otype, o, i, n); break; \
506 default: GST_ERROR_OBJECT (filter, "Unsupported input type %d", itype); g_assert (0); break; \
510 #define orc_typesize(size, type) do { \
512 case _NNS_INT32: size = sizeof(int32_t); break; \
513 case _NNS_UINT32: size = sizeof(uint32_t); break; \
514 case _NNS_INT16: size = sizeof(int16_t); break; \
515 case _NNS_UINT16: size = sizeof(uint16_t); break; \
516 case _NNS_INT8: size = sizeof(int8_t); break; \
517 case _NNS_UINT8: size = sizeof(uint8_t); break; \
518 case _NNS_FLOAT64: size = sizeof(double); break; \
519 case _NNS_FLOAT32: size = sizeof(float); break; \
520 default: GST_ERROR_OBJECT (filter, "Unsupported type %d", type); g_assert (0); break; \
524 #define orc_operator_func(i,n,v,opfunc,op) do { \
525 switch ((v)->type) { \
526 case _NNS_INT32: opfunc (s32) ((gpointer) i, (v)->data._int32_t, n); break; \
527 case _NNS_UINT32: opfunc (u32) ((gpointer) i, (v)->data._uint32_t, n); break; \
528 case _NNS_INT16: opfunc (s16) ((gpointer) i, (v)->data._int16_t, n); break; \
529 case _NNS_UINT16: opfunc (u16) ((gpointer) i, (v)->data._uint16_t, n); break; \
530 case _NNS_INT8: opfunc (s8) ((gpointer) i, (v)->data._int8_t, n); break; \
531 case _NNS_UINT8: opfunc (u8) ((gpointer) i, (v)->data._uint8_t, n); break; \
532 case _NNS_FLOAT64: opfunc (f64) ((gpointer) i, (v)->data._double, n); break; \
533 case _NNS_FLOAT32: opfunc (f32) ((gpointer) i, (v)->data._float, n); break; \
534 case _NNS_FLOAT16: _op_float16 (i, n, (v)->data._float16, op); break; \
535 default: GST_ERROR_OBJECT (filter, "Unsupported type %d", (v)->type); g_assert (0); break; \
539 #define orc_operator_div_loop(i,n,val,typename) do { \
541 typename *data_in = (typename *) (i); \
542 for (idx_div = 0; idx_div < (n); ++idx_div) { \
543 data_in[idx_div] = data_in[idx_div] / (val); \
547 #define orc_operator(i,n,v,op) do { \
549 case GTT_OP_ADD: orc_operator_func (i, n, v, orc_func_add, op); break; \
550 case GTT_OP_MUL: orc_operator_func (i, n, v, orc_func_mul, op); break; \
552 switch ((v)->type) { \
553 case _NNS_INT32: orc_operator_div_loop (i, n, (v)->data._int32_t, int32_t); break; \
554 case _NNS_UINT32: orc_operator_div_loop (i, n, (v)->data._uint32_t, uint32_t); break; \
555 case _NNS_INT16: orc_operator_div_loop (i, n, (v)->data._int16_t, int16_t); break; \
556 case _NNS_UINT16: orc_operator_div_loop (i, n, (v)->data._uint16_t, uint16_t); break; \
557 case _NNS_INT8: orc_operator_div_loop (i, n, (v)->data._int8_t, int8_t); break; \
558 case _NNS_UINT8: orc_operator_div_loop (i, n, (v)->data._uint8_t, uint8_t); break; \
559 case _NNS_FLOAT64: orc_func_div (f64) ((gpointer) i, (v)->data._double, n); break; \
560 case _NNS_FLOAT32: orc_func_div (f32) ((gpointer) i, (v)->data._float, n); break; \
561 case _NNS_FLOAT16: _op_float16 (i, n, (v)->data._float16, op); break; \
562 default: GST_ERROR_OBJECT (filter, "Unsupported type %d", (v)->type); g_assert (0); break; \
565 default: GST_ERROR_OBJECT (filter, "Unknown operator %d", op); break; \
573 #define handle_operator(d,v,oper,vtype) do { \
583 GST_ERROR_OBJECT (filter, "Invalid state, denominator is 0."); \
589 GST_ERROR_OBJECT (filter, "Unknown operator %d", oper); \
607 g_return_val_if_fail (desc != NULL,
FALSE);
608 g_return_val_if_fail (val != NULL,
FALSE);
611 switch (desc->
type) {
637 #ifdef FLOAT16_SUPPORT
650 GST_ERROR_OBJECT (filter,
"Unknown tensor type %d", desc->
type);
666 gboolean ret =
FALSE;
671 filter_name = gst_object_get_name ((GstObject *) filter);
673 switch (filter->mode) {
679 G_REGEX_CASELESS, 0)) {
681 (
"%s: dimchg: \'%s\' is not valid option string: it should be in the form of IDX_DIM_FROM:IDX_DIM_TO: with a regex, "
686 strv = g_strsplit (filter->option,
":", 2);
688 filter->data_dimchg.from = (int) g_ascii_strtoll (strv[0], NULL, 10);
689 filter->data_dimchg.to = (int) g_ascii_strtoll (strv[1], NULL, 10);
690 ret = filter->loaded =
TRUE;
697 G_REGEX_CASELESS, 0)) {
699 ret = filter->loaded =
TRUE;
702 (
"%s: typecast: \'%s\' is not valid data type for tensor: data type of tensor should be one of %s\n",
710 gchar **str_operators;
713 guint i, num_operators, num_op;
714 GRegex *regex_option_tc;
716 filter->data_arithmetic.out_type =
_NNS_END;
717 filter->data_arithmetic.per_channel_arith =
FALSE;
719 if (filter->operators) {
720 GST_WARNING_OBJECT (filter,
721 "There exists pre-defined operators (total %d), now reset these.",
722 g_slist_length (filter->operators));
724 g_slist_free_full (filter->operators,
g_free);
725 filter->operators = NULL;
729 G_REGEX_CASELESS, 0, 0);
731 if (!regex_option_tc) {
732 GST_ERROR_OBJECT (filter,
733 "arithmetic: failed to create a GRegex structure for %s\n",
738 if (g_regex_match_full (regex_option_tc, filter->option, -1,
740 str_option = g_regex_replace (regex_option_tc, filter->option, -1, 1,
743 (
"%s: arithmetic: [typecast:TYPE,] should be located at the first to prevent memory re-allocation: typecast(s) in the middle of \'%s\' will be ignored\n",
744 filter_name, filter->option);
746 str_option = g_strdup (filter->option);
748 g_regex_unref (regex_option_tc);
751 G_REGEX_CASELESS, 0)) {
753 (
"%s: arithmetic: \'%s\' is not valid option string: it should be in the form of [typecast:TYPE,][per-channel:(false|true@DIM),]add|mul|div:NUMBER[@CH_IDX]..., ...\n",
754 filter_name, str_option);
758 str_operators = g_strsplit (str_option,
",", -1);
759 num_operators = g_strv_length (str_operators);
761 for (i = 0; i < num_operators; ++i) {
762 str_op = g_strsplit (str_operators[i],
":", -1);
763 num_op = g_strv_length (str_op);
766 gchar **values = g_strsplit (str_op[1],
"@", -1);
767 guint num_values = g_strv_length (values);
770 if (g_ascii_strcasecmp (str_op[0],
"per-channel") == 0) {
771 if (num_values > 1 && g_ascii_strcasecmp (values[0],
"true") == 0) {
773 (
"Set per-channel for arithmetic and assume that %s-th dim is the channel",
775 filter->data_arithmetic.per_channel_arith =
TRUE;
776 filter->data_arithmetic.ch_dim =
777 (guint) g_ascii_strtoull (values[1], NULL, 10);
792 if (num_op > 1 && str_op[1]) {
794 filter->data_arithmetic.out_type = op_s->
value.
type;
796 GST_WARNING_OBJECT (filter,
"Invalid option for typecast %s",
804 if (num_op > 1 && str_op[1]) {
806 if (strchr (values[0],
'.') || strchr (values[0],
'e') ||
807 strchr (values[0],
'E')) {
810 val = g_ascii_strtod (values[0], NULL);
815 val = g_ascii_strtoll (values[0], NULL, 10);
819 if (filter->data_arithmetic.per_channel_arith && num_values > 1) {
820 op_s->
applying_ch = g_ascii_strtoll (values[1], NULL, 10);
824 GST_WARNING_OBJECT (filter,
825 "Invalid option for arithmetic %s", str_operators[i]);
830 GST_WARNING_OBJECT (filter,
"Unknown operator %s", str_op[0]);
836 filter->operators = g_slist_append (filter->operators, op_s);
843 GST_WARNING_OBJECT (filter,
"Invalid option %s", str_operators[i]);
849 ret = filter->loaded = (filter->operators != NULL);
850 g_strfreev (str_operators);
860 G_REGEX_CASELESS, 0)) {
862 (
"%s: transpose: \'%s\' is not valid option string: it should be in the form of NEW_IDX_DIM0:NEW_IDX_DIM1:NEW_IDX_DIM2:3 (Now transpose mode's rank is fixed to 3. Note that the index of the last dim is always fixed to 3)\n",
863 filter_name, filter->option);
869 filter->data_transpose.trans_order[i] =
870 (uint8_t) g_ascii_strtoull (strv[i], NULL, 10);
873 ret = filter->loaded =
TRUE;
879 gchar **options = NULL;
880 guint i, num_options;
883 G_REGEX_CASELESS, 0)) {
885 (
"%s: stand: \'%s\' is not a valid option string: it should be in the form of (default|dc-average)[:TYPE][,per-channel:(false|true)]\n",
886 filter_name, filter->option);
890 filter->data_stand.out_type =
_NNS_END;
891 filter->data_stand.per_channel =
FALSE;
893 options = g_strsplit (filter->option,
",", -1);
894 num_options = g_strv_length (options);
896 for (i = 0; i < num_options; i++) {
897 gchar **strv = g_strsplit (options[i],
":", -1);
899 if (g_ascii_strcasecmp (strv[0],
"default") == 0 ||
900 g_ascii_strcasecmp (strv[0],
"dc-average") == 0) {
901 filter->data_stand.mode =
903 if (g_strv_length (strv) > 1)
905 }
else if (g_ascii_strcasecmp (strv[0],
"per-channel") == 0) {
906 if (g_strv_length (strv) > 1 &&
907 g_ascii_strcasecmp (strv[1],
"true") == 0)
908 filter->data_stand.per_channel =
TRUE;
911 ml_logw (
"Unknown option for stand mode: %s", strv[0]);
917 g_strfreev (options);
918 ret = filter->loaded =
TRUE;
926 G_REGEX_CASELESS, 0)) {
928 (
"%s: clamp: \'%s\' is not valid option string: it should be in the form of [CLAMP_MIN:CLAMP_MAX]\n",
929 filter_name, filter->option);
933 strv = g_strsplit (filter->option,
":", 2);
935 filter->data_clamp.min = g_ascii_strtod (strv[0], NULL);
936 if (errno == ERANGE) {
937 ml_loge (
"%s: clamp: CLAMP_MIN value has an invalid range\n",
942 filter->data_clamp.max = g_ascii_strtod (strv[1], NULL);
943 if (errno == ERANGE) {
944 ml_loge (
"%s: clamp: CLAMP_MAX value has an invalid range\n",
952 if (filter->data_clamp.min > filter->data_clamp.max) {
953 ml_loge (
"%s: clamp: CLAMP_MIN is larger than CLAMP_MAX\n",
958 ret = filter->loaded =
TRUE;
963 gchar **options = NULL;
964 guint i, num_options;
967 G_REGEX_CASELESS, 0)) {
969 (
"%s: padding: \'%s\' is not valid option string: it should be in the form of left|right|top|bottom|front|back:PADDING,[layout:(NCHW|NHWC)]\n",
970 filter_name, filter->option);
975 filter->data_padding.pad[i] = 0;
978 options = g_strsplit (filter->option,
",", -1);
979 num_options = g_strv_length (options);
981 for (i = 0; i < num_options; i++) {
982 gchar **strv = g_strsplit (options[i],
":", 2);
983 if (g_ascii_strcasecmp (strv[0],
"left") == 0) {
985 (guint) g_ascii_strtoull (strv[1], NULL, 10);
986 }
else if (g_ascii_strcasecmp (strv[0],
"right") == 0) {
988 (guint) g_ascii_strtoull (strv[1], NULL, 10);
989 }
else if (g_ascii_strcasecmp (strv[0],
"top") == 0) {
991 (guint) g_ascii_strtoull (strv[1], NULL, 10);
992 }
else if (g_ascii_strcasecmp (strv[0],
"bottom") == 0) {
994 (guint) g_ascii_strtoull (strv[1], NULL, 10);
995 }
else if (g_ascii_strcasecmp (strv[0],
"front") == 0) {
997 (guint) g_ascii_strtoull (strv[1], NULL, 10);
998 }
else if (g_ascii_strcasecmp (strv[0],
"back") == 0) {
1000 (guint) g_ascii_strtoull (strv[1], NULL, 10);
1001 }
else if (g_ascii_strcasecmp (strv[0],
"layout") == 0) {
1002 if (g_ascii_strcasecmp (strv[1],
"NHWC") == 0)
1007 ml_logw (
"Unknown option for padding mode: %s", strv[0]);
1011 g_strfreev (options);
1014 guint prev_left = filter->data_padding.pad[
PADDING_LEFT],
1024 ret = filter->loaded =
TRUE;
1028 GST_ERROR_OBJECT (filter,
"Cannot identify mode\n");
1041 const GValue * value, GParamSpec * pspec)
1047 filter->
silent = g_value_get_boolean (value);
1050 filter->
mode = g_value_get_enum (value);
1055 gchar *backup_option = filter->
option;
1056 filter->
option = g_value_dup_string (value);
1058 silent_debug (filter,
"Option = %s --> %s\n", backup_option,
1064 filter->
option = backup_option;
1074 GST_WARNING_OBJECT (filter,
"Orc acceleration is not supported");
1081 const gchar *param = g_value_get_string (value);
1082 gchar **strv = g_strsplit_set (param,
",", -1);
1083 guint i, num = g_strv_length (strv);
1084 gchar *endptr = NULL;
1086 for (i = 0; i < num; i++) {
1088 val = g_ascii_strtoll (strv[i], &endptr, 10);
1089 if (errno == ERANGE || errno == EINVAL || (endptr == strv[i])) {
1090 ml_loge (
"Cannot convert string %s to a gint64 value", strv[i]);
1092 filter->
apply = g_list_append (filter->
apply, GINT_TO_POINTER (val));
1098 G_OBJECT_WARN_INVALID_PROPERTY_ID (
object, prop_id, pspec);
1108 GValue * value, GParamSpec * pspec)
1114 g_value_set_boolean (value, filter->
silent);
1117 g_value_set_enum (value, filter->
mode);
1132 if (filter->
apply == NULL) {
1137 arr = g_ptr_array_new ();
1138 for (list = filter->
apply; list != NULL; list = list->next) {
1139 g_ptr_array_add (arr, g_strdup_printf (
"%i",
1140 GPOINTER_TO_INT (list->data)));
1142 g_ptr_array_add (arr, NULL);
1143 strings = (gchar **) g_ptr_array_free (arr,
FALSE);
1144 p = g_strjoinv (
",", strings);
1146 g_strfreev (strings);
1147 g_value_take_string (value, p);
1154 G_OBJECT_WARN_INVALID_PROPERTY_ID (
object, prop_id, pspec);
1179 if (filter->
apply) {
1180 g_list_free (filter->
apply);
1181 filter->
apply = NULL;
1184 G_OBJECT_CLASS (parent_class)->finalize (
object);
1196 static GstFlowReturn
1199 const uint8_t * inptr, uint8_t * outptr)
1205 unsigned int i, j, k;
1206 unsigned int loopLimit = 1;
1207 gsize loopBlockSize, copyblocksize, copyblocklimit;
1212 GST_WARNING_OBJECT (filter,
1213 "Calling tensor_transform with high memcpy overhead WITHOUT any effects! Check your stream whether you really need tensor_transform.\n");
1219 g_assert (fromDim[from] == toDim[to]);
1234 loopLimit *= toDim[i];
1237 for (i = 0; i < to; i++) {
1240 loopBlockSize *= toDim[i];
1243 for (i = 0; i < from; i++) {
1244 if (fromDim[i] == 0)
1246 copyblocksize *= fromDim[i];
1248 for (i = 0; i < to; i++) {
1251 copyblocklimit *= toDim[i];
1254 for (i = 0; i < loopLimit; i++) {
1256 uint8_t *destptr = outptr + loopBlockSize * toDim[to] * i;
1257 const uint8_t *srcptr = inptr + loopBlockSize * toDim[to] * i;
1259 for (j = 0; j < toDim[to]; j++) {
1260 uint8_t *j_destptr = destptr + loopBlockSize * j;
1261 for (k = 0; k < copyblocklimit; k++) {
1263 srcptr + k * copyblocksize * toDim[to] + j * copyblocksize,
1275 (
"tensor-transform/dimchg operation is not permitted if from >= to.\n");
1276 return GST_FLOW_ERROR;
1291 static GstFlowReturn
1294 const uint8_t * inptr, uint8_t * outptr)
1297 gsize in_element_size, out_element_size;
1302 if (orc_supported (filter, in_info->
type, out_info->
type)) {
1303 orc_typecast (inptr, outptr, num, in_info->
type, out_info->
type);
1311 for (i = 0; i < num; ++i) {
1313 (gpointer) (inptr + in_element_size * i), in_info->
type,
1314 (gpointer) (outptr + out_element_size * i), out_info->
type);
1329 static GstFlowReturn
1332 const uint8_t * inptr, uint8_t * outptr)
1334 gulong i, num, j, ch;
1335 gsize in_element_size, out_element_size;
1344 if (orc_supported (filter, in_info->
type, out_info->
type)) {
1350 orc_typecast (inptr, outptr, num, in_info->
type, out_info->
type);
1358 orc_operator (outptr, num, &op_s->
value, op_s->
op);
1361 walk = g_slist_next (walk);
1366 gsize ch_offset, ch_size = 1;
1367 uint8_t *tmp_outptr = NULL;
1369 for (i = 0; i < ch_dim; ++i) {
1372 ch_offset = ch_size * in_info->
dimension[ch_dim];
1373 orc_typesize (typesize, out_info->
type);
1378 walk = g_slist_next (walk);
1384 orc_operator (outptr, num, &op_s->
value, op_s->
op);
1386 for (i = 0; i < num / ch_offset; ++i) {
1389 ch_offset * i) * typesize;
1391 orc_operator (tmp_outptr, ch_size, &op_s->
value, op_s->
op);
1394 walk = g_slist_next (walk);
1407 gsize ch_offset, ch_size = 1;
1408 for (i = 0; i < ch_dim; ++i) {
1411 ch_offset = ch_size * in_info->
dimension[ch_dim];
1420 for (i = 0; i < num / ch_offset; ++i) {
1421 for (ch = 0; ch < in_info->
dimension[ch_dim]; ++ch) {
1422 for (j = 0; j < ch_size; ++j) {
1423 gulong data_idx = (i * ch_offset) + (ch * ch_size) + j;
1425 (gpointer) (inptr + in_element_size * data_idx));
1441 gst_tensor_transform_do_operator (filter, &value,
1448 return GST_FLOW_ERROR;
1451 walk = g_slist_next (walk);
1464 for (i = 0; i < num; ++i) {
1467 (gpointer) (inptr + in_element_size * i));
1484 gst_tensor_transform_do_operator (filter, &value, &op_s->
value,
1489 return GST_FLOW_ERROR;
1492 walk = g_slist_next (walk);
1506 #define transposeloop(cl,ck,cj,ci,sl,sk,sj,si,typesize) do { \
1507 size_t i, j, k, l; \
1508 int inidx = 0, outidx=0; \
1509 for(cl=0;cl<sl;cl++) \
1510 for(ci=0;ci<si;ci++) \
1511 for(cj=0;cj<sj;cj++) \
1512 for(ck=0;ck<sk;ck++){ \
1513 const uint8_t *_in; \
1515 outidx = si*sj*sk*cl + sj*sk*ci + sk*cj + ck; \
1516 inidx = SK*SJ*SI*l + SJ*SI*k + SI*j + i; \
1517 _in = inptr + inidx * typesize; \
1518 _out = outptr + outidx * typesize; \
1519 nns_memcpy(_out, _in, typesize); \
1532 static GstFlowReturn
1535 const uint8_t * inptr, uint8_t * outptr)
1538 gboolean checkdim =
FALSE;
1541 gsize indexI, indexJ, SL, SI, SJ, SK;
1555 GST_WARNING_OBJECT (filter,
1556 "Calling tensor_transform with high memcpy overhead WITHOUT any effects!");
1562 SL = fromDim[3] > 0 ? fromDim[3] : 1;
1563 SI = fromDim[0] > 0 ? fromDim[0] : 1;
1564 SJ = fromDim[1] > 0 ? fromDim[1] : 1;
1565 SK = fromDim[2] > 0 ? fromDim[2] : 1;
1604 static GstFlowReturn
1607 const uint8_t * inptr, uint8_t * outptr)
1609 GstFlowReturn ret = GST_FLOW_OK;
1610 gsize in_element_size, out_element_size, data_size, ch_size;
1611 gulong i, num, data_idx, ch;
1612 gdouble tmp, *average, *std;
1622 average = std = NULL;
1632 in_info->
type, &average);
1643 for (i = 0; i < num; i++) {
1644 data_idx = in_element_size * i;
1648 tmp = fabs ((tmp - *average) / *std);
1650 data_idx = out_element_size * i;
1652 (gpointer) (outptr + data_idx), out_info->
type);
1655 for (ch = 0; ch < ch_size; ++ch) {
1656 for (i = 0; i < num / ch_size; i++) {
1657 data_idx = in_element_size * ((i * ch_size) + ch);
1661 tmp = fabs ((tmp - average[ch]) / std[ch]);
1663 data_idx = out_element_size * ((i * ch_size) + ch);
1665 (gpointer) (outptr + data_idx), out_info->
type);
1674 for (i = 0; i < num; i++) {
1675 data_idx = in_element_size * i;
1681 data_idx = out_element_size * i;
1683 (gpointer) (outptr + data_idx), out_info->
type);
1686 for (ch = 0; ch < ch_size; ++ch) {
1687 for (i = 0; i < num / ch_size; i++) {
1688 data_idx = in_element_size * ((i * ch_size) + ch);
1694 data_idx = out_element_size * ((i * ch_size) + ch);
1696 (gpointer) (outptr + data_idx), out_info->
type);
1703 GST_ERROR_OBJECT (filter,
"Cannot identify mode\n");
1704 ret = GST_FLOW_ERROR;
1724 static GstFlowReturn
1727 const uint8_t * inptr, uint8_t * outptr)
1729 gsize in_element_size, out_element_size;
1730 gulong i, num, data_idx;
1737 for (i = 0; i < num; ++i) {
1738 data_idx = in_element_size * i;
1744 data_idx = out_element_size * i;
1761 static GstFlowReturn
1766 gsize element_size, in_loop_size, out_loop_size, copy_block_size;
1767 guint i, j, k, left, top, front, loop_limit = 1;
1773 * out_info->
dimension[0] * element_size;
1774 copy_block_size = in_info->
dimension[0] * element_size;
1787 memset (outptr, 0, out_loop_size * loop_limit);
1789 for (i = 0; i < loop_limit; i++)
1790 for (j = 0; j < in_info->
dimension[2]; j++)
1791 for (k = 0; k < in_info->
dimension[1]; k++) {
1797 out_idx += left + top * out_info->
dimension[0]
1800 memcpy (outptr + out_idx * element_size + out_loop_size * i,
1801 inptr + in_idx * element_size + in_loop_size * i, copy_block_size);
1814 static GstFlowReturn
1816 GstBuffer * inbuf, GstBuffer * outbuf)
1820 GstFlowReturn res = GST_FLOW_ERROR;
1825 uint8_t *inptr, *outptr;
1826 guint i, num_tensors, num_mems;
1827 gsize buf_size, hsize;
1830 gboolean in_flexible, out_flexible;
1834 g_return_val_if_fail (filter->
loaded, GST_FLOW_ERROR);
1844 num_tensors = num_mems;
1845 g_return_val_if_fail (out_flexible, GST_FLOW_ERROR);
1848 g_return_val_if_fail (num_mems == num_tensors, GST_FLOW_ERROR);
1851 for (i = 0; i < num_tensors; i++) {
1855 if (filter->
apply && !g_list_find (filter->
apply, GINT_TO_POINTER (i))) {
1858 if (!in_flexible && out_flexible) {
1859 GstMemory *old = mem;
1864 gst_memory_unref (old);
1873 if (!gst_memory_map (in_mem[i], &in_map[i], GST_MAP_READ)) {
1874 ml_loge (
"Cannot map input buffer to gst-buf at tensor-transform.\n");
1875 res = GST_FLOW_ERROR;
1878 inptr = in_map[i].data;
1881 in_info = &in_flex_info;
1882 out_info = &out_flex_info;
1887 res = GST_FLOW_ERROR;
1892 i, in_info, out_info);
1906 out_mem[i] = gst_allocator_alloc (NULL, buf_size, NULL);
1909 if (!gst_memory_map (out_mem[i], &out_map[i], GST_MAP_WRITE)) {
1910 ml_loge (
"Cannot map output buffer to gst-buf at tensor-transform.\n");
1911 res = GST_FLOW_ERROR;
1914 outptr = out_map[i].data;
1921 switch (filter->
mode) {
1951 ml_loge (
"Not supported tensor transform mode");
1952 res = GST_FLOW_NOT_SUPPORTED;
1958 for (i = 0; i < num_tensors; i++) {
1960 gst_memory_unmap (in_mem[i], &in_map[i]);
1961 gst_memory_unref (in_mem[i]);
1964 gst_memory_unmap (out_mem[i], &out_map[i]);
1981 GstStructure *structure;
1982 g_return_val_if_fail (config != NULL,
FALSE);
1984 structure = gst_caps_get_structure (caps, 0);
1987 GST_WARNING_OBJECT (filter,
"caps is not tensor %s\n",
1988 gst_structure_get_name (structure));
2006 GstPadDirection direction, guint idx,
const GstTensorInfo * in_info,
2014 if (filter->
apply && !g_list_find (filter->
apply, GINT_TO_POINTER (idx)))
2017 switch (filter->
mode) {
2023 if (direction == GST_PAD_SINK) {
2025 if ((i < from && i < to) || (i > from && i > to) || from == to) {
2027 }
else if (i == to) {
2029 }
else if (from > to) {
2039 if ((i < from && i < to) || (i > from && i > to) || from == to) {
2041 }
else if (i == from) {
2043 }
else if (from > to) {
2056 if (direction == GST_PAD_SINK) {
2068 if (direction == GST_PAD_SINK) {
2078 if (direction == GST_PAD_SINK) {
2095 if (direction == GST_PAD_SINK) {
2109 if (direction == GST_PAD_SINK) {
2140 GstPadDirection direction, GstCaps * caps, GstCaps * filtercap)
2144 GstStructure *structure;
2149 silent_debug (filter,
"Calling TransformCaps, direction = %d\n", direction);
2153 result = gst_caps_new_empty ();
2154 for (i = 0; i < gst_caps_get_size (caps); i++) {
2157 gboolean is_types_not_fixed =
FALSE;
2158 GstCaps *result_aux = gst_caps_new_empty ();
2162 structure = gst_caps_get_structure (caps, i);
2174 j, in_info, out_info);
2177 is_types_not_fixed =
TRUE;
2192 if (is_types_not_fixed) {
2193 GstStructure *s = gst_caps_get_structure (result_aux, 0);
2194 gst_structure_remove_field (s,
"types");
2198 gst_caps_append (
result, result_aux);
2204 if (filtercap && gst_caps_get_size (filtercap) > 0) {
2205 GstCaps *intersection;
2208 gst_caps_intersect_full (
result, filtercap, GST_CAPS_INTERSECT_FIRST);
2223 GstPadDirection direction, GstCaps * caps, GstCaps * othercaps)
2230 silent_debug (filter,
"Calling FixateCaps, direction = %d\n", direction);
2236 gst_caps_unref (othercaps);
2250 GstCaps * incaps, GstCaps * outcaps)
2256 gboolean in_flexible, out_flexible;
2257 gboolean allowed =
FALSE;
2267 GST_ERROR_OBJECT (filter,
"Cannot read cap of incaps\n");
2272 GST_ERROR_OBJECT (filter,
"Cannot read cap of outcaps\n");
2293 i, in_info, out_info)) {
2294 GST_ERROR_OBJECT (filter,
2295 "Tensor info is not matched with given properties.");
2302 GST_INFO_OBJECT (filter,
"Output tensor is flexible.");
2306 out_config = config;
2308 GST_ERROR_OBJECT (filter,
2309 "Tensor info is not matched with given properties.\n");
2320 GST_ERROR_OBJECT (filter,
"Set Caps Failed!\n");
2330 GstPadDirection direction, GstCaps * caps, gsize size, GstCaps * othercaps,