v4.16.0 (#11)

* v4.16.0
2023-12-28 16:14:01 +02:00
parent 459eaf0234
commit 35d9282f36
276 changed files with 14513 additions and 8725 deletions
--- a/common/include/context_switch_defs.h
+++ b/common/include/context_switch_defs.h
@@ -64,9 +64,10 @@ typedef struct {
    uint16_t periph_bytes_per_buffer;
    uint16_t periph_buffers_per_frame;
    uint16_t feature_padding_payload;
-    uint16_t buffer_padding_payload;
+    uint32_t buffer_padding_payload;
    uint16_t buffer_padding;
    bool is_periph_calculated_in_hailort;
    bool is_core_hw_padding_config_in_dfc;
 } CONTEXT_SWITCH_DEFS__stream_reg_info_t;
 #if defined(_MSC_VER)
--- a/common/include/control_protocol.h
+++ b/common/include/control_protocol.h
@@ -439,9 +439,10 @@ typedef struct {
    uint16_t periph_bytes_per_buffer;
    uint16_t periph_buffers_per_frame;
    uint16_t feature_padding_payload;
-    uint16_t buffer_padding_payload;
+    uint32_t buffer_padding_payload;
    uint16_t buffer_padding;
    bool is_periph_calculated_in_hailort;
    bool is_core_hw_padding_config_in_dfc;
 } CONTROL_PROTOCOL__nn_stream_config_t;
 typedef struct {
--- a/common/include/firmware_status.h
+++ b/common/include/firmware_status.h
@@ -919,6 +919,7 @@ Updating rules:
   FIRMWARE_STATUS__X(DATAFLOW_COMMON_STATUS_INVALID_EDGE_LAYER_INDEX)\
   FIRMWARE_STATUS__X(DATAFLOW_COMMON_STATUS_INVALID_PARAMETER)\
   FIRMWARE_STATUS__X(DATAFLOW_COMMON_STATUS_PADDING_NOT_SUPPORTED_FOR_ARCH)\
   FIRMWARE_STATUS__X(DATAFLOW_COMMON_STATUS_INVALID_MAX_BUFFER_PADDING_VALUE)\
   \
   FIRMWARE_MODULE__X(FIRMWARE_MODULE__RESET_HANDLER)\
   FIRMWARE_STATUS__X(RESET_HANDLER_CHIP_RESET_FAILED)\
--- a/hailort/.gitignore
+++ b/hailort/.gitignore
@@ -1,12 +1,3 @@
 /external/
-cmake/external/benchmark/
+cmake/external/*/
-cmake/external/catch2/
+prepare_externals/build/
 cmake/external/dotwriter/
 cmake/external/json/
 cmake/external/pybind11/
 cmake/external/readerwriterqueue/
 cmake/external/spdlog/
 pre_build/external/build/
 pre_build/tools/build_protoc/
 pre_build/install/
--- a/hailort/CMakeLists.txt
+++ b/hailort/CMakeLists.txt
@@ -30,55 +30,25 @@ endif()
 # Set firmware version
 add_definitions( -DFIRMWARE_VERSION_MAJOR=4 )
-add_definitions( -DFIRMWARE_VERSION_MINOR=15 )
+add_definitions( -DFIRMWARE_VERSION_MINOR=16 )
 add_definitions( -DFIRMWARE_VERSION_REVISION=0 )
 if(HAILO_BUILD_SERVICE)
    add_definitions( -DHAILO_SUPPORT_MULTI_PROCESS )
 endif()
-# The logic of prepare_externals is executed in a sperate module so that it can be run externally (via cmake -P prepare_externals.cmake)
+# TODO: temporary hack to support offline builds. Remove HAILO_OFFLINE_COMPILATION and use FETCHCONTENT_FULLY_DISCONNECTED
-include(prepare_externals.cmake)
+if(HAILO_OFFLINE_COMPILATION)
-
+    set(FETCHCONTENT_FULLY_DISCONNECTED ON CACHE INTERNAL "")
-# Include host protobuf for protoc (https://stackoverflow.com/questions/53651181/cmake-find-protobuf-package-in-custom-directory)
+    set(HAILO_OFFLINE_COMPILATION OFF CACHE INTERNAL "")
 if(CMAKE_HOST_UNIX)
    include(${CMAKE_CURRENT_LIST_DIR}/pre_build/install/lib/cmake/protobuf/protobuf-config.cmake)
    include(${CMAKE_CURRENT_LIST_DIR}/pre_build/install/lib/cmake/protobuf/protobuf-module.cmake)
 else()
    include(${CMAKE_CURRENT_LIST_DIR}/pre_build/install/cmake/protobuf-config.cmake)
    include(${CMAKE_CURRENT_LIST_DIR}/pre_build/install/cmake/protobuf-module.cmake)
 endif()
 # TODO: move protobuf and grpc to inner cmake files
 set(HAILO_EXTERNAL_DIR ${CMAKE_CURRENT_LIST_DIR}/external)
 set(HAILO_EXTERNALS_CMAKE_SCRIPTS ${CMAKE_CURRENT_LIST_DIR}/cmake/external/)
-
+include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/protobuf.cmake)
 # Add target protobuf directory and exclude its targets from all
 # Disable protobuf tests, protoc and MSVC static runtime unless they are already defined
 # NOTE: we can also force - set(protobuf_BUILD_TESTS OFF CACHE BOOL "Build protobuf tests" FORCE)
 if(NOT protobuf_BUILD_TESTS)
    set(protobuf_BUILD_TESTS OFF CACHE BOOL "Build protobuf tests")
 endif()
 if(NOT protobuf_BUILD_PROTOC_BINARIES)
    set(protobuf_BUILD_PROTOC_BINARIES OFF CACHE BOOL "Build libprotoc and protoc compiler")
 endif()
 if(MSVC AND NOT protobuf_MSVC_STATIC_RUNTIME)
    set(protobuf_MSVC_STATIC_RUNTIME OFF CACHE BOOL "Protobuf MSVC static runtime")
 endif()
 if(NOT protobuf_WITH_ZLIB)
    set(protobuf_WITH_ZLIB OFF CACHE BOOL "Compile protobuf with zlib")
 endif()
 add_subdirectory(external/protobuf/cmake EXCLUDE_FROM_ALL)
 if(NOT MSVC)
    set_target_properties(libprotobuf PROPERTIES POSITION_INDEPENDENT_CODE ON)
    set_target_properties(libprotobuf-lite PROPERTIES POSITION_INDEPENDENT_CODE ON)
 endif()
 if(HAILO_BUILD_SERVICE)
-    if(CMAKE_HOST_UNIX)
+    include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/grpc.cmake)
        set(HAILO_GRPC_CPP_PLUGIN_EXECUTABLE "${HAILO_PRE_BUILD_BUILD_TOOLS}/build_grpc/grpc_cpp_plugin")
    else()
        set(HAILO_GRPC_CPP_PLUGIN_EXECUTABLE "${HAILO_PRE_BUILD_BUILD_TOOLS}/build_grpc/${PRE_BUILD_BUILD_TYPE}/grpc_cpp_plugin.exe")
    endif()
 endif()
 set(HAILO_PROTOBUF_PROTOC $<TARGET_FILE:protobuf::protoc>)
 set(HAILORT_INC_DIR ${PROJECT_SOURCE_DIR}/hailort/libhailort/include)
 set(HAILORT_SRC_DIR ${PROJECT_SOURCE_DIR}/hailort/libhailort/src)
@@ -87,19 +57,11 @@ set(COMMON_INC_DIR ${PROJECT_SOURCE_DIR}/common/include)
 set(DRIVER_INC_DIR ${PROJECT_SOURCE_DIR}/hailort/drivers/common)
 set(RPC_DIR ${PROJECT_SOURCE_DIR}/hailort/rpc)
 add_subdirectory(external/CLI11 EXCLUDE_FROM_ALL)
 if(CMAKE_SYSTEM_NAME STREQUAL QNX)
    include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/pevents.cmake)
 endif()
 if(HAILO_BUILD_SERVICE)
    set(BUILD_TESTING OFF) # disabe abseil tests
    set(gRPC_ZLIB_PROVIDER "module" CACHE STRING "Provider of zlib library")
    # The following is an awful hack needed in order to force grpc to use our libprotobuf+liborotoc targets
    # ('formal' options are to let grpc recompile it which causes a name conflict,
    # or let it use find_package and take the risk it will use a different installed lib)
    set(gRPC_PROTOBUF_PROVIDER "hack" CACHE STRING "Provider of protobuf library")
    add_subdirectory(external/grpc EXCLUDE_FROM_ALL)
    add_subdirectory(rpc)
 endif()
--- a/hailort/LICENSE-3RD-PARTY.md
+++ b/hailort/LICENSE-3RD-PARTY.md
@@ -2,7 +2,7 @@
 |:---------------------------------|:----------------------------------|:-------------------|:---------------|:----------------------------------------------|:------------------------------------------------------------------------------|
 | CLI11                            | University of Cincinnati          | 3-Clause BSD       | 2.2.0          | Fork                                          | https://github.com/hailo-ai/CLI11                                             |
 | Catch2                           | Catch2 Authors                    | BSL-1.0            | 2.13.7         | Cloned entire package                         | https://github.com/catchorg/Catch2                                            |
-| protobuf                         | Google Inc.                       | BSD                | 3.19.4         | Cloned entire package                         | https://github.com/protocolbuffers/protobuf                                   |
+| protobuf                         | Google Inc.                       | BSD                | 21.12          | Cloned entire package                         | https://github.com/protocolbuffers/protobuf                                   |
 | pybind11                         | Wenzel Jakob                      | BSD                | 2.10.1         | Cloned entire package                         | https://github.com/pybind/pybind11                                            |
 | spdlog                           | Gabi Melman                       | MIT                | 1.6.1          | Cloned entire package                         | https://github.com/gabime/spdlog                                              |
 | folly                            | Facebook, Inc. and its affiliates | Apache License 2.0 | v2020.08.17.00 | Copied only the file `folly/TokenBucket.h`    | https://github.com/facebook/folly                                             |
@@ -12,5 +12,5 @@
 | benchmark                        | Google Inc.                       | Apache License 2.0 | 1.6.0          | Cloned entire package                         | https://github.com/google/benchmark.git                                       |
 | md5                              | Alexander Peslyak                 | cut-down BSD       | -              | Copied code from website                      | http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 |
 | pevents                          | Mahmoud Al-Qudsi                  | MIT License        | master         | Cloned entire package                         | https://github.com/neosmart/pevents.git                                       |
-| grpc                             | Google Inc.                       | Apache License 2.0 | 1.46.0         | Cloned entire package                         | https://github.com/grpc/grpc                                                  |
+| grpc                             | Google Inc.                       | Apache License 2.0 | 1.46.3         | Cloned entire package                         | https://github.com/grpc/grpc                                                  |
 | stb                              | Sean Barrett                      | MIT License        | 0.97           | Copied only the file `stb/stb_image_resize.h` | https://github.com/nothings/stb                                               |
--- a/hailort/cmake/execute_cmake.cmake
+++ b/hailort/cmake/execute_cmake.cmake
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 function(execute_process_in_clean_env)
    cmake_parse_arguments(execute_process_in_clean_env "" "RESULT_VARIABLE" "" ${ARGN})
    if(CMAKE_HOST_UNIX)
-        string(REPLACE ";" " " cmdline "${execute_process_in_clean_env_UNPARSED_ARGUMENTS}")
+        string(REPLACE ";" "' '" cmdline "'${execute_process_in_clean_env_UNPARSED_ARGUMENTS}'")
        execute_process(COMMAND env -i HOME=$ENV{HOME} PATH=/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin bash -l -c "${cmdline}" OUTPUT_QUIET RESULT_VARIABLE result)
    else()
        # TODO: make it clean env for cross compile
@@ -22,10 +22,11 @@ function(execute_cmake)
    cmake_parse_arguments(execute_cmake "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
    execute_process_in_clean_env(
-        "${CMAKE_COMMAND}"
+        ${CMAKE_COMMAND}
-        "${execute_cmake_SOURCE_DIR}"
+        ${execute_cmake_SOURCE_DIR}
-        "-B${execute_cmake_BUILD_DIR}"
+        -B ${execute_cmake_BUILD_DIR}
-        "${execute_cmake_CONFIGURE_ARGS}"
+        -G "${CMAKE_GENERATOR}"
        ${execute_cmake_CONFIGURE_ARGS}
        RESULT_VARIABLE result
    )
    if(result)
@@ -34,6 +35,7 @@ function(execute_cmake)
    if(${execute_cmake_PARALLEL_BUILD} AND (CMAKE_GENERATOR MATCHES "Unix Makefiles"))
        execute_process(COMMAND grep -c ^processor /proc/cpuinfo OUTPUT_VARIABLE cores_count RESULT_VARIABLE result)
        string(STRIP ${cores_count} cores_count)
        if(result)
            message(FATAL_ERROR "Failed getting the amount of cores")
        endif()
--- a/hailort/cmake/external/benchmark.cmake
+++ b/hailort/cmake/external/benchmark.cmake
@@ -11,17 +11,15 @@ FetchContent_Declare(
    GIT_REPOSITORY https://github.com/google/benchmark.git 
    GIT_TAG f91b6b42b1b9854772a90ae9501464a161707d1e # Version 1.6.0
    GIT_SHALLOW TRUE
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/benchmark"
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/benchmark-src
-    BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/benchmark"
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/benchmark-subbuild
 )
-if(NOT HAILO_OFFLINE_COMPILATION)
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-    # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(benchmark)
-    FetchContent_GetProperties(benchmark)
+if(NOT benchmark_POPULATED)
-    if(NOT benchmark_POPULATED)
+    FetchContent_Populate(benchmark)
-        FetchContent_Populate(benchmark)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
        add_subdirectory(${benchmark_SOURCE_DIR} ${benchmark_BINARY_DIR} EXCLUDE_FROM_ALL)
    endif()
 else()
    add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/benchmark EXCLUDE_FROM_ALL)
 endif()
--- a/hailort/cmake/external/catch2.cmake
+++ b/hailort/cmake/external/catch2.cmake
@@ -7,17 +7,15 @@ FetchContent_Declare(
    GIT_REPOSITORY https://github.com/catchorg/Catch2.git 
    GIT_TAG c4e3767e265808590986d5db6ca1b5532a7f3d13 # Version 2.13.7
    GIT_SHALLOW TRUE
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/catch2"
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/catch2-src
-    BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/catch2"
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/catch2-subbuild
 )
-if(NOT HAILO_OFFLINE_COMPILATION)
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-    # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(catch2)
-    FetchContent_GetProperties(catch2)
+if(NOT catch2_POPULATED)
-    if(NOT catch2_POPULATED)
+    FetchContent_Populate(catch2)
-        FetchContent_Populate(catch2)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
        add_subdirectory(${catch2_SOURCE_DIR} ${catch2_BINARY_DIR} EXCLUDE_FROM_ALL)
    endif()
 else()
    add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/catch2 EXCLUDE_FROM_ALL)
 endif()
--- a/hailort/cmake/external/cli11.cmake
+++ b/hailort/cmake/external/cli11.cmake
@@ -0,0 +1,21 @@
 cmake_minimum_required(VERSION 3.11.0)
 include(FetchContent)
 FetchContent_Declare(
    cli11
    GIT_REPOSITORY https://github.com/hailo-ai/CLI11.git
    GIT_TAG ae78ac41cf225706e83f57da45117e3e90d4a5b4 # Version 2.2.0 + hailo completion
    GIT_SHALLOW TRUE
    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/cli11-src
    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/cli11-subbuild
 )
 # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
 FetchContent_GetProperties(cli11)
 if(NOT cli11_POPULATED)
    FetchContent_Populate(cli11)
    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
        add_subdirectory(${cli11_SOURCE_DIR} ${cli11_BINARY_DIR} EXCLUDE_FROM_ALL)
    endif()
 endif()
--- a/hailort/cmake/external/dotwriter.cmake
+++ b/hailort/cmake/external/dotwriter.cmake
@@ -7,17 +7,15 @@ FetchContent_Declare(
    GIT_REPOSITORY https://github.com/hailo-ai/DotWriter
    GIT_TAG e5fa8f281adca10dd342b1d32e981499b8681daf # Version master
    GIT_SHALLOW TRUE
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/dotwriter"
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/dotwriter-src
-    BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/dotwriter"
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/dotwriter-subbuild
 )
-if(NOT HAILO_OFFLINE_COMPILATION)
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-    # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(dotwriter)
-    FetchContent_GetProperties(dotwriter)
+if(NOT dotwriter_POPULATED)
-    if(NOT dotwriter_POPULATED)
+    FetchContent_Populate(dotwriter)
-        FetchContent_Populate(dotwriter)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
        add_subdirectory(${dotwriter_SOURCE_DIR} ${dotwriter_BINARY_DIR} EXCLUDE_FROM_ALL)
    endif()
 else()
    add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/dotwriter EXCLUDE_FROM_ALL)
 endif()
--- a/hailort/cmake/external/grpc.cmake
+++ b/hailort/cmake/external/grpc.cmake
@@ -0,0 +1,50 @@
 cmake_minimum_required(VERSION 3.11.0)
 include(${CMAKE_CURRENT_LIST_DIR}/protobuf.cmake)
 include(FetchContent)
 FetchContent_Declare(
    grpc
    GIT_REPOSITORY  https://github.com/grpc/grpc
    GIT_TAG         53d69cc581c5b7305708587f4f1939278477c28a # v1.46.3
    GIT_SHALLOW     TRUE
    SOURCE_DIR      ${HAILO_EXTERNAL_DIR}/grpc-src
    SUBBUILD_DIR    ${HAILO_EXTERNAL_DIR}/grpc-subbuild
 )
 FetchContent_GetProperties(grpc)
 if(NOT grpc_POPULATED)
    FetchContent_Populate(grpc)
    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
        message(STATUS "Building grpc...")
        include(${CMAKE_CURRENT_LIST_DIR}/../execute_cmake.cmake)
        set(TOOL_BUILD_TYPE "Release")
        execute_cmake(
            SOURCE_DIR ${HAILO_EXTERNAL_DIR}/grpc-src
            BUILD_DIR ${HAILO_EXTERNAL_DIR}/grpc-build
            CONFIGURE_ARGS
                -DCMAKE_BUILD_TYPE=${TOOL_BUILD_TYPE}
                -DgRPC_BUILD_TESTS:BOOL=OFF
                # TODO: check flag on Windows
                # -DgRPC_BUILD_MSVC_MP_COUNT:STRING=-1
                -DgRPC_PROTOBUF_PROVIDER:STRING=package
                -DgRPC_PROTOBUF_PACKAGE_TYPE:STRING=CONFIG
                -DProtobuf_DIR:PATH=${PROTOBUF_CONFIG_DIR}
            BUILD_ARGS
                --config ${TOOL_BUILD_TYPE} --target grpc_cpp_plugin ${CMAKE_EXTRA_BUILD_ARGS}
            PARALLEL_BUILD
        )
        if(HAILO_BUILD_SERVICE)
            # TODO: go over BUILD_TESTING vs gRPC_BUILD_TESTS. what about avoiding the hack the same way we did for grpc_cpp_plugin?
            set(BUILD_TESTING OFF) # disabe abseil tests
            set(gRPC_ZLIB_PROVIDER "module" CACHE STRING "Provider of zlib library")
            # The following is an awful hack needed in order to force grpc to use our libprotobuf+liborotoc targets
            # ('formal' options are to let grpc recompile it which causes a name conflict,
            # or let it use find_package and take the risk it will use a different installed lib)
            set(gRPC_PROTOBUF_PROVIDER "hack" CACHE STRING "Provider of protobuf library")
            add_subdirectory(${grpc_SOURCE_DIR} ${grpc_BINARY_DIR} EXCLUDE_FROM_ALL)
        endif()
    endif()
 endif()
--- a/hailort/cmake/external/json.cmake
+++ b/hailort/cmake/external/json.cmake
@@ -7,17 +7,15 @@ FetchContent_Declare(
    GIT_REPOSITORY https://github.com/ArthurSonzogni/nlohmann_json_cmake_fetchcontent.git
    GIT_TAG 391786c6c3abdd3eeb993a3154f1f2a4cfe137a0 # Version 3.9.1
    GIT_SHALLOW TRUE
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/json"
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/json-src
-    BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/json"
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/json-subbuild
 )
-if(NOT HAILO_OFFLINE_COMPILATION)
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-    # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(json)
-    FetchContent_GetProperties(json)
+if(NOT json_POPULATED)
-    if(NOT json_POPULATED)
+    FetchContent_Populate(json)
-        FetchContent_Populate(json)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
        add_subdirectory(${json_SOURCE_DIR} ${json_BINARY_DIR} EXCLUDE_FROM_ALL)
    endif()
 else()
    add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/json EXCLUDE_FROM_ALL)
 endif()
--- a/hailort/cmake/external/pevents.cmake
+++ b/hailort/cmake/external/pevents.cmake
@@ -7,20 +7,17 @@ FetchContent_Declare(
    GIT_REPOSITORY https://github.com/neosmart/pevents.git
    GIT_TAG 1209b1fd1bd2e75daab4380cf43d280b90b45366 # Master
    #GIT_SHALLOW TRUE
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/pevents"
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/pevents-src
-    BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/pevents"
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/pevents-subbuild
 )
-if(NOT HAILO_OFFLINE_COMPILATION)
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-    # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(pevents)
-    FetchContent_GetProperties(pevents)
+if(NOT pevents_POPULATED)
-    if(NOT pevents_POPULATED)
+    FetchContent_Populate(pevents)
-        FetchContent_Populate(pevents)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
        add_library(pevents STATIC EXCLUDE_FROM_ALL ${pevents_SOURCE_DIR}/src/pevents.cpp)
        target_include_directories(pevents PUBLIC ${pevents_SOURCE_DIR}/src)
        target_compile_definitions(pevents PRIVATE -DWFMO)
    endif()
 endif()
 if(NOT TARGET pevents)
    add_library(pevents STATIC EXCLUDE_FROM_ALL ${pevents_SOURCE_DIR}/src/pevents.cpp)
    target_include_directories(pevents PUBLIC ${pevents_SOURCE_DIR}/src)
    target_compile_definitions(pevents PRIVATE -DWFMO)
 endif()
--- a/hailort/cmake/external/protobuf.cmake
+++ b/hailort/cmake/external/protobuf.cmake
@@ -0,0 +1,67 @@
 cmake_minimum_required(VERSION 3.11.0)
 include(FetchContent)
 # TODO: support cross generators - https://gitlab.kitware.com/cmake/cmake/-/issues/20536
 FetchContent_Declare(
    protobuf
    GIT_REPOSITORY  https://github.com/protocolbuffers/protobuf.git
    GIT_TAG         f0dc78d7e6e331b8c6bb2d5283e06aa26883ca7c # v21.12
    GIT_SHALLOW     TRUE
    SOURCE_DIR      ${HAILO_EXTERNAL_DIR}/protobuf-src
    SUBBUILD_DIR    ${HAILO_EXTERNAL_DIR}/protobuf-subbuild
 )
 FetchContent_GetProperties(protobuf)
 if(NOT protobuf_POPULATED)
    FetchContent_Populate(protobuf)
    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
        message(STATUS "Building protobuf::protoc...")
        include(${CMAKE_CURRENT_LIST_DIR}/../execute_cmake.cmake)
        set(TOOL_BUILD_TYPE "Release")
        set(PROTOBUF_INSTALL_DIR ${HAILO_EXTERNAL_DIR}/protobuf-install)
        execute_cmake(
            SOURCE_DIR ${HAILO_EXTERNAL_DIR}/protobuf-src
            BUILD_DIR ${HAILO_EXTERNAL_DIR}/protobuf-build
            CONFIGURE_ARGS
                -DCMAKE_BUILD_TYPE=${TOOL_BUILD_TYPE}
                -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
                -Dprotobuf_BUILD_TESTS:BOOL=OFF
                -Dprotobuf_WITH_ZLIB:BOOL=OFF
                -Dprotobuf_MSVC_STATIC_RUNTIME:BOOL=OFF
            BUILD_ARGS
                # NOTE: We are installing instead of building protoc because "hailort\external\protobuf-build\cmake\protobuf-targets.cmake" (in Windows) is based on config type.
                # TODO: consider importing protobuf_generate_cpp instead? will it solve it?
                --config ${TOOL_BUILD_TYPE} --target install ${CMAKE_EXTRA_BUILD_ARGS}
            PARALLEL_BUILD
        )
        if(WIN32)
            set(PROTOBUF_CONFIG_DIR ${PROTOBUF_INSTALL_DIR}/cmake)
        else()
            set(PROTOBUF_CONFIG_DIR ${PROTOBUF_INSTALL_DIR}/lib/cmake/protobuf)
        endif()
        # Include host protobuf for protoc (https://stackoverflow.com/questions/53651181/cmake-find-protobuf-package-in-custom-directory)
        include(${PROTOBUF_CONFIG_DIR}/protobuf-config.cmake)
        include(${PROTOBUF_CONFIG_DIR}/protobuf-module.cmake)
        set(protobuf_BUILD_TESTS OFF CACHE BOOL "Build protobuf tests" FORCE)
        set(protobuf_BUILD_PROTOC_BINARIES OFF CACHE BOOL "Build libprotoc and protoc compiler" FORCE)
        set(protobuf_MSVC_STATIC_RUNTIME OFF CACHE BOOL "Protobuf MSVC static runtime" FORCE)
        set(protobuf_WITH_ZLIB OFF CACHE BOOL "Compile protobuf with zlib" FORCE)
        add_subdirectory(${protobuf_SOURCE_DIR} ${protobuf_BINARY_DIR} EXCLUDE_FROM_ALL)
        if(NOT MSVC)
            set_target_properties(libprotobuf PROPERTIES POSITION_INDEPENDENT_CODE ON)
            set_target_properties(libprotobuf-lite PROPERTIES POSITION_INDEPENDENT_CODE ON)
        endif()
    endif()
 endif()
--- a/hailort/cmake/external/pybind11.cmake
+++ b/hailort/cmake/external/pybind11.cmake
@@ -2,34 +2,31 @@ cmake_minimum_required(VERSION 3.11.0)
 include(FetchContent)
 if(NOT PYTHON_EXECUTABLE AND PYBIND11_PYTHON_VERSION)
    # venv version is prioritized (instead of PYBIND11_PYTHON_VERSION) if PYTHON_EXECUTABLE is not set.
    # See https://pybind11.readthedocs.io/en/stable/changelog.html#v2-6-0-oct-21-2020
    if((${CMAKE_VERSION} VERSION_LESS "3.22.0") AND (NOT WIN32))
        find_package(PythonInterp ${PYBIND11_PYTHON_VERSION} REQUIRED)
        set(PYTHON_EXECUTABLE ${Python_EXECUTABLE})
    else()
        find_package(Python3 ${PYBIND11_PYTHON_VERSION} REQUIRED EXACT COMPONENTS Interpreter Development)
        set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE})
    endif()
 endif()
 FetchContent_Declare(
    pybind11
    GIT_REPOSITORY https://github.com/pybind/pybind11.git
    GIT_TAG 80dc998efced8ceb2be59756668a7e90e8bef917 # Version 2.10.1
    GIT_SHALLOW TRUE
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/pybind11"
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/pybind11-src
-    BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/pybind11"
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/pybind11-subbuild
 )
-if(NOT HAILO_OFFLINE_COMPILATION)
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-    # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(pybind11)
-    FetchContent_GetProperties(pybind11)
+if(NOT pybind11_POPULATED)
-    if(NOT pybind11_POPULATED)
+    FetchContent_Populate(pybind11)
-        FetchContent_Populate(pybind11)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
        if(NOT PYTHON_EXECUTABLE AND PYBIND11_PYTHON_VERSION)
            # venv version is prioritized (instead of PYBIND11_PYTHON_VERSION) if PYTHON_EXECUTABLE is not set.
            # See https://pybind11.readthedocs.io/en/stable/changelog.html#v2-6-0-oct-21-2020
            if((${CMAKE_VERSION} VERSION_LESS "3.22.0") AND (NOT WIN32))
                find_package(PythonInterp ${PYBIND11_PYTHON_VERSION} REQUIRED)
                set(PYTHON_EXECUTABLE ${Python_EXECUTABLE})
            else()
                find_package(Python3 ${PYBIND11_PYTHON_VERSION} REQUIRED EXACT COMPONENTS Interpreter Development)
                set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE})
            endif()
        endif()
        add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR} EXCLUDE_FROM_ALL)
    endif()
 else()
    add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/pybind11 EXCLUDE_FROM_ALL)
 endif()
--- a/hailort/cmake/external/readerwriterqueue.cmake
+++ b/hailort/cmake/external/readerwriterqueue.cmake
@@ -7,20 +7,17 @@ FetchContent_Declare(
    GIT_REPOSITORY https://github.com/cameron314/readerwriterqueue
    GIT_TAG 435e36540e306cac40fcfeab8cc0a22d48464509 # Version 1.0.3
    GIT_SHALLOW TRUE
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/readerwriterqueue"
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/readerwriterqueue-src
-    BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/readerwriterqueue"
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/readerwriterqueue-subbuild
 )
-if(NOT HAILO_OFFLINE_COMPILATION)
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-    # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(readerwriterqueue)
-    FetchContent_GetProperties(readerwriterqueue)
+if(NOT readerwriterqueue_POPULATED)
-    if(NOT readerwriterqueue_POPULATED)
+    FetchContent_Populate(readerwriterqueue)
-        FetchContent_Populate(readerwriterqueue)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
        # Add readerwriterqueue as a header-only library
        add_library(readerwriterqueue INTERFACE)
        target_include_directories(readerwriterqueue INTERFACE ${readerwriterqueue_SOURCE_DIR})
    endif()
 endif()
 if(NOT TARGET readerwriterqueue)
    # Add readerwriterqueue as a header-only library
    add_library(readerwriterqueue INTERFACE)
    target_include_directories(readerwriterqueue INTERFACE ${readerwriterqueue_SOURCE_DIR})
 endif()
--- a/hailort/cmake/external/spdlog.cmake
+++ b/hailort/cmake/external/spdlog.cmake
@@ -7,18 +7,16 @@ FetchContent_Declare(
    GIT_REPOSITORY https://github.com/gabime/spdlog
    GIT_TAG 22a169bc319ac06948e7ee0be6b9b0ac81386604
    GIT_SHALLOW TRUE
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/spdlog"
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/spdlog-src
-    BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/spdlog"
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/spdlog-subbuild
 )
-if(NOT HAILO_OFFLINE_COMPILATION)
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-    # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(spdlog)
-    FetchContent_GetProperties(spdlog)
+if(NOT spdlog_POPULATED)
-    if(NOT spdlog_POPULATED)
+    FetchContent_Populate(spdlog)
-        FetchContent_Populate(spdlog)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
        add_subdirectory(${spdlog_SOURCE_DIR} ${spdlog_BINARY_DIR} EXCLUDE_FROM_ALL)
        set_target_properties(spdlog PROPERTIES POSITION_INDEPENDENT_CODE ON)
    endif()
 else()
    add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/spdlog EXCLUDE_FROM_ALL)
 endif()
 set_target_properties(spdlog PROPERTIES POSITION_INDEPENDENT_CODE ON)
--- a/hailort/common/device_measurements.cpp
+++ b/hailort/common/device_measurements.cpp
@@ -123,23 +123,36 @@ hailo_status PowerMeasurement::sanity_check()
 hailo_status PowerMeasurement::start_measurement()
 {
    auto status = m_device.stop_power_measurement();
    CHECK_SUCCESS(status, "Failed to stop power measurement");   
    status = m_device.set_power_measurement(HAILO_MEASUREMENT_BUFFER_INDEX_0, HAILO_DVM_OPTIONS_AUTO, m_measurement_type);
    CHECK_SUCCESS(status, "Failed to start power measurement");   
    //Note: important to keep the chip sampling period lower than the interval between measurements (DEFAULT_MEASUREMENTS_INTERVAL)
    status = m_device.start_power_measurement(HAILO_AVERAGE_FACTOR_1, HAILO_SAMPLING_PERIOD_140US);
    CHECK_SUCCESS(status, "Failed to start power measurement");
    m_is_thread_running = true;
    m_thread = std::thread([this] () {
-        while (m_is_thread_running.load()) {
+        const bool clear_power_measurement_history = true;
-            auto power_info = m_device.power_measurement(HAILO_DVM_OPTIONS_AUTO, m_measurement_type);
+        while (m_is_thread_running.load()) { 
-            if (HAILO_SUCCESS != power_info.status()) {
+            std::this_thread::sleep_for(DEFAULT_MEASUREMENTS_INTERVAL);
-                LOGGER__ERROR("Failed to get chip's power, status = {}", power_info.status());
+            auto power_data = m_device.get_power_measurement(HAILO_MEASUREMENT_BUFFER_INDEX_0, clear_power_measurement_history);
            if (HAILO_SUCCESS != power_data.status()) {
                LOGGER__ERROR("Failed to get chip's power, status = {}", power_data.status());
                m_is_thread_running = false;
                break;
            }
            {
                std::unique_lock<std::mutex> lock(m_mutex);
-                m_acc->add_data_point(*power_info);
+                m_acc->add_data_point(power_data->average_value);
-            }
+            } 
            std::this_thread::sleep_for(DEFAULT_MEASUREMENTS_INTERVAL); 
        }
        auto status = m_device.stop_power_measurement();
        CHECK_SUCCESS(status, "Failed to start power measurement");
        return HAILO_SUCCESS;
    });
    return HAILO_SUCCESS;
--- a/hailort/common/file_utils.cpp
+++ b/hailort/common/file_utils.cpp
@@ -34,7 +34,7 @@ Expected<size_t> get_istream_size(std::ifstream &s)
    return Expected<size_t>(static_cast<size_t>(total_size));
 }
-Expected<Buffer> read_binary_file(const std::string &file_path)
+Expected<Buffer> read_binary_file(const std::string &file_path, const BufferStorageParams &output_buffer_params)
 {
    std::ifstream file(file_path, std::ios::in | std::ios::binary);
    CHECK_AS_EXPECTED(file.good(), HAILO_OPEN_FILE_FAILURE, "Error opening file {}", file_path);
@@ -42,7 +42,7 @@ Expected<Buffer> read_binary_file(const std::string &file_path)
    auto file_size = get_istream_size(file);
    CHECK_EXPECTED(file_size, "Failed to get file size");
-    auto buffer = Buffer::create(file_size.value());
+    auto buffer = Buffer::create(file_size.value(), output_buffer_params);
    CHECK_EXPECTED(buffer, "Failed to allocate file buffer ({} bytes}", file_size.value());
    // Read the data
--- a/hailort/common/file_utils.hpp
+++ b/hailort/common/file_utils.hpp
@@ -24,7 +24,8 @@ Expected<size_t> get_istream_size(std::ifstream &s);
 /**
 * Reads full file content into a `Buffer`
 */
-Expected<Buffer> read_binary_file(const std::string &file_path);
+Expected<Buffer> read_binary_file(const std::string &file_path,
    const BufferStorageParams &output_buffer_params = {});
 } /* namespace hailort */
--- a/hailort/common/os/posix/os_utils.cpp
+++ b/hailort/common/os/posix/os_utils.cpp
@@ -16,7 +16,9 @@
 #include <signal.h>
 #include <sched.h>
-
+#if defined(__QNX__)
 #define OS_UTILS__QNX_PAGE_SIZE (4096)
 #endif /* defined(__QNX__) */
 namespace hailort
 {
@@ -74,6 +76,34 @@ size_t OsUtils::get_page_size()
    return page_size;
 }
 size_t OsUtils::get_dma_able_alignment()
 {
 #if defined(__linux__)
    // TODO: HRT-12494 after supporting in linux, restore this code
    // Return value if was saved already
    // if (0 != DMA_ABLE_ALIGNMENT) {
    //     return Expected<size_t>(DMA_ABLE_ALIGNMENT);
    // }
    // static const auto cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
    // if (-1 == cacheline_size) {
    //     return make_unexpected(HAILO_INTERNAL_FAILURE);
    // }
    // // Set static variable to value - so dont need to fetch actual value every function call
    // // TODO HRT-12459: Currently use DMA_ABLE_ALIGNMENT_WRITE_HW_LIMITATION as minimum until after debug - seeing as all
    // // Funtions currently calling this function are for write
    // DMA_ABLE_ALIGNMENT = std::max(HailoRTCommon::DMA_ABLE_ALIGNMENT_WRITE_HW_LIMITATION, static_cast<size_t>(cacheline_size));
    // return Expected<size_t>(DMA_ABLE_ALIGNMENT);
    return get_page_size();
 // TODO: implement on qnx (HRT-12356) - only needed when async api is implemented on qnx
 // TODO - URT-13534 - use sys call for QNX OS to get page size
 #elif defined(__QNX__)
    return OS_UTILS__QNX_PAGE_SIZE
 #endif
 }
 CursorAdjustment::CursorAdjustment(){}
 CursorAdjustment::~CursorAdjustment(){}
--- a/hailort/common/os/windows/os_utils.cpp
+++ b/hailort/common/os/windows/os_utils.cpp
@@ -14,6 +14,8 @@
 #include <windows.h>
 #include "spdlog/sinks/win_eventlog_sink.h"
 #define CACHE_LEVEL_INDEX (1)
 namespace hailort
 {
@@ -78,6 +80,45 @@ size_t OsUtils::get_page_size()
    return page_size;
 }
 size_t OsUtils::get_dma_able_alignment()
 {
    // // Return value if was saved already
    // if (0 != DMA_ABLE_ALIGNMENT) {
    //     return Expected<size_t>(DMA_ABLE_ALIGNMENT);
    // }
    // size_t cacheline_size = 0;
    // DWORD proc_info_struct_size = 0;
    // // We call this function to fail and get the size needed for SYSTEM_LOGICAL_PROCESSOR_INFORMATION struct
    // BOOL ret_val = GetLogicalProcessorInformation(0, &proc_info_struct_size);
    // CHECK_AS_EXPECTED((FALSE == ret_val) && (ERROR_INSUFFICIENT_BUFFER == GetLastError()), HAILO_INTERNAL_FAILURE,
    //     "GetLogicalProcessorInformation Failed with error {}", GetLastError());
    // std::shared_ptr<SYSTEM_LOGICAL_PROCESSOR_INFORMATION> proc_info(
    //     static_cast<SYSTEM_LOGICAL_PROCESSOR_INFORMATION *>(malloc(proc_info_struct_size)), free);
    // ret_val = GetLogicalProcessorInformation(static_cast<SYSTEM_LOGICAL_PROCESSOR_INFORMATION *>(proc_info.get()),
    //     &proc_info_struct_size);
    // CHECK_AS_EXPECTED(ret_val, HAILO_INTERNAL_FAILURE, "GetLogicalProcessorInformation Failed with error {}",
    //     GetLastError());
    // for (DWORD i = 0; i < proc_info_struct_size; i++) {
    //     // Assume same cache line for all processors
    //     if ((RelationCache == proc_info.get()[i].Relationship) && (CACHE_LEVEL_INDEX == proc_info.get()[i].Cache.Level)) {
    //         cacheline_size = proc_info.get()[i].Cache.LineSize;
    //         break;
    //     }
    // }
    // // Set static variable to value - so dont need to fetch actual value every function call
    // // TODO HRT-12459: Currently use DMA_ABLE_ALIGNMENT_WRITE_HW_LIMITATION as minimum until after debug - seeing as all
    // // Funtions currently calling this function are for write
    // DMA_ABLE_ALIGNMENT = std::max(HailoRTCommon::DMA_ABLE_ALIGNMENT_WRITE_HW_LIMITATION, static_cast<size_t>(cacheline_size));
    // return Expected<size_t>(DMA_ABLE_ALIGNMENT);
    // TODO: HRT-12495 support page-aligned address on windows
    return get_page_size();
 }
 CursorAdjustment::CursorAdjustment()
 {
    // Enables Vitual Terminal Processing - enables ANSI Escape Sequences on Windows
--- a/hailort/common/os_utils.hpp
+++ b/hailort/common/os_utils.hpp
@@ -11,6 +11,8 @@
 #define _HAILO_OS_UTILS_HPP_
 #include "hailo/hailort.h"
 #include "hailo/expected.hpp"
 #include "hailo/hailort_common.hpp"
 #include "common/logger_macros.hpp"
@@ -63,6 +65,7 @@ public:
    static void set_current_thread_name(const std::string &name);
    static hailo_status set_current_thread_affinity(uint8_t cpu_index);
    static size_t get_page_size();
    static size_t get_dma_able_alignment();
 };
 } /* namespace hailort */
--- a/hailort/common/utils.hpp
+++ b/hailort/common/utils.hpp
@@ -244,13 +244,13 @@ _ISEMPTY(                                                               \
    } while(0)
 #define CHECK_AS_RPC_STATUS(cond, reply, ret_val, ...) _CHECK_AS_RPC_STATUS((cond), (reply), (ret_val), ISEMPTY(__VA_ARGS__), "" __VA_ARGS__)
-#define _CHECK_GRPC_STATUS(status, ret_val, warning_msg)                                                    \
+#define _CHECK_GRPC_STATUS(status, ret_val, warning_msg)                                                                         \
-    do {                                                                                                    \
+    do {                                                                                                                         \
-        if (!status.ok()) {                                                                                 \
+        if (!status.ok()) {                                                                                                      \
-            LOGGER__ERROR("CHECK_GRPC_STATUS failed with error massage: {}.", status.error_message());      \
+            LOGGER__ERROR("CHECK_GRPC_STATUS failed with error code: {}.", status.error_code());                                 \
-            LOGGER__WARNING(warning_msg);                                                                   \
+            LOGGER__WARNING(warning_msg);                                                                                        \
-            return ret_val;                                                                                 \
+            return ret_val;                                                                                                      \
-        }                                                                                                   \
+        }                                                                                                                        \
    } while(0)
 #define SERVICE_WARNING_MSG ("Make sure HailoRT service is enabled and active!")
--- a/hailort/hailort_service/CMakeLists.txt
+++ b/hailort/hailort_service/CMakeLists.txt
@@ -24,6 +24,7 @@ target_link_libraries(hailort_service
    spdlog::spdlog
    grpc++_unsecure
    hailort_rpc_grpc_proto
    readerwriterqueue
 )
 if(WIN32)
    # Needed in order to compile eth utils (we compile here ${HAILORT_COMMON_CPP_SOURCES}, consider removing)
@@ -35,6 +36,7 @@ target_include_directories(hailort_service
    ${CMAKE_CURRENT_SOURCE_DIR}
    ${HAILORT_INC_DIR}
    ${HAILORT_COMMON_DIR}
    ${HAILORT_SRC_DIR}
    ${COMMON_INC_DIR}
    ${RPC_DIR}
 )
--- a/hailort/hailort_service/hailort_rpc_service.cpp
+++ b/hailort/hailort_service/hailort_rpc_service.cpp
--- a/hailort/hailort_service/hailort_rpc_service.hpp
+++ b/hailort/hailort_service/hailort_rpc_service.hpp
@@ -16,6 +16,7 @@
 #else
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wconversion"
 #pragma GCC diagnostic ignored "-Wunused-parameter"
 #endif
 #include <grpcpp/grpcpp.h>
 #include "hailort_rpc.grpc.pb.h"
@@ -25,8 +26,11 @@
 #pragma GCC diagnostic pop
 #endif
 #include <thread>
 #include "hailo/hailort.h"
 #include "hailo/network_group.hpp"
 #include "vdevice_callbacks_queue.hpp"
 #include <thread>
 namespace hailort
 {
@@ -50,6 +54,10 @@ public:
        VDevice_get_physical_devices_ids_Reply* reply) override;
    virtual grpc::Status VDevice_get_default_streams_interface(grpc::ServerContext*, const VDevice_get_default_streams_interface_Request* request,
        VDevice_get_default_streams_interface_Reply* reply) override;
    virtual grpc::Status VDevice_get_callback_id(grpc::ServerContext*, const VDevice_get_callback_id_Request* request,
        VDevice_get_callback_id_Reply* reply) override;
    virtual grpc::Status VDevice_finish_callback_listener(grpc::ServerContext*, const VDevice_finish_callback_listener_Request* request,
        VDevice_finish_callback_listener_Reply* reply) override;
    virtual grpc::Status InputVStreams_create(grpc::ServerContext *, const VStream_create_Request *request,
         VStreams_create_Reply *reply) override;
@@ -141,6 +149,9 @@ public:
    virtual grpc::Status ConfiguredNetworkGroup_get_default_stream_interface(grpc::ServerContext*,
        const ConfiguredNetworkGroup_get_default_stream_interface_Request *request,
        ConfiguredNetworkGroup_get_default_stream_interface_Reply *reply) override;
    virtual grpc::Status ConfiguredNetworkGroup_shutdown(grpc::ServerContext*,
        const ConfiguredNetworkGroup_shutdown_Request *request,
        ConfiguredNetworkGroup_shutdown_Reply *reply) override;
    virtual grpc::Status ConfiguredNetworkGroup_get_output_vstream_groups(grpc::ServerContext*,
        const ConfiguredNetworkGroup_get_output_vstream_groups_Request *request,
        ConfiguredNetworkGroup_get_output_vstream_groups_Reply *reply) override;
@@ -177,22 +188,39 @@ public:
    virtual grpc::Status ConfiguredNetworkGroup_get_sorted_output_names(grpc::ServerContext*,
        const ConfiguredNetworkGroup_get_sorted_output_names_Request *request,
        ConfiguredNetworkGroup_get_sorted_output_names_Reply *reply) override;
    virtual grpc::Status ConfiguredNetworkGroup_get_min_buffer_pool_size(grpc::ServerContext*,
        const ConfiguredNetworkGroup_get_min_buffer_pool_size_Request *request,
        ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply *reply) override;
    virtual grpc::Status ConfiguredNetworkGroup_get_layer_info(grpc::ServerContext*,
        const ConfiguredNetworkGroup_get_layer_info_Request *request,
        ConfiguredNetworkGroup_get_layer_info_Reply *reply) override;
    virtual grpc::Status ConfiguredNetworkGroup_get_ops_metadata(grpc::ServerContext*,
        const ConfiguredNetworkGroup_get_ops_metadata_Request *request,
        ConfiguredNetworkGroup_get_ops_metadata_Reply *reply) override;
    virtual grpc::Status ConfiguredNetworkGroup_set_nms_score_threshold(grpc::ServerContext*,
        const ConfiguredNetworkGroup_set_nms_score_threshold_Request *request,
        ConfiguredNetworkGroup_set_nms_score_threshold_Reply *reply) override;
    virtual grpc::Status ConfiguredNetworkGroup_set_nms_iou_threshold(grpc::ServerContext*,
        const ConfiguredNetworkGroup_set_nms_iou_threshold_Request *request,
        ConfiguredNetworkGroup_set_nms_iou_threshold_Reply *reply) override;
    virtual grpc::Status ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(grpc::ServerContext*,
        const ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Request *request,
        ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Reply *reply) override;
    virtual grpc::Status ConfiguredNetworkGroup_get_stream_names_from_vstream_name(grpc::ServerContext*,
        const ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request *request,
        ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply *reply) override;
    virtual grpc::Status ConfiguredNetworkGroup_get_vstream_names_from_stream_name(grpc::ServerContext*,
        const ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Request *request,
        ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Reply *reply) override;
    virtual grpc::Status ConfiguredNetworkGroup_infer_async(grpc::ServerContext*,
        const ConfiguredNetworkGroup_infer_async_Request *request,
        ConfiguredNetworkGroup_infer_async_Reply *reply) override;
 private:
    void keep_alive();
    hailo_status flush_input_vstream(uint32_t handle);
    hailo_status abort_input_vstream(uint32_t handle);
    hailo_status abort_output_vstream(uint32_t handle);
    hailo_status resume_input_vstream(uint32_t handle);
    hailo_status resume_output_vstream(uint32_t handle);
    bool is_input_vstream_aborted(uint32_t handle);
    bool is_output_vstream_aborted(uint32_t handle);
    void abort_vstreams_by_pids(std::set<uint32_t> &pids);
    void remove_disconnected_clients();
    void update_client_id_timestamp(uint32_t pid);
@@ -200,6 +228,8 @@ private:
    std::mutex m_mutex;
    std::map<uint32_t, std::chrono::time_point<std::chrono::high_resolution_clock>> m_clients_pids;
    std::unique_ptr<std::thread> m_keep_alive;
    std::mutex m_vdevice_creation_mutex;
 };
 }
--- a/hailort/hailort_service/service_resource_manager.hpp
+++ b/hailort/hailort_service/service_resource_manager.hpp
@@ -49,13 +49,29 @@ public:
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        auto resource_expected = resource_lookup(handle);
-        assert(resource_expected);
+        CHECK_EXPECTED(resource_expected);
        auto resource = resource_expected.release();
        assert(contains(m_resources_mutexes, handle));
        std::shared_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
        lock.unlock();
-        K ret = lambda(resource->resource, args...);
+        auto ret = lambda(resource->resource, args...);
        return ret;
    }
    template<class Func, typename... Args>
    hailo_status execute(uint32_t handle, Func &lambda, Args... args)
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        auto resource_expected = resource_lookup(handle);
        CHECK_EXPECTED_AS_STATUS(resource_expected);
        auto resource = resource_expected.release();
        assert(contains(m_resources_mutexes, handle));
        std::shared_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
        lock.unlock();
        auto ret = lambda(resource->resource, args...);
        return ret;
    }
@@ -71,18 +87,18 @@ public:
        return index;
    }
-    uint32_t dup_handle(uint32_t handle, uint32_t pid)
+    Expected<uint32_t> dup_handle(uint32_t handle, uint32_t pid)
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        auto resource_expected = resource_lookup(handle);
-        assert(resource_expected);
+        CHECK_EXPECTED(resource_expected);
        auto resource = resource_expected.release();
        assert(contains(m_resources_mutexes, handle));
        std::unique_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
        resource->pids.insert(pid);
-        return handle;
+        return Expected<uint32_t>(handle);
    }
    std::shared_ptr<T> release_resource(uint32_t handle, uint32_t pid)
--- a/hailort/hailort_service/vdevice_callbacks_queue.hpp
+++ b/hailort/hailort_service/vdevice_callbacks_queue.hpp
@@ -0,0 +1,88 @@
 /**
 * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
 * @file vdevice_callbacks_queue.hpp
 * @brief Queue used for the callbacks in infer async over service.
 * enqueue callback id means the transfer is done.
 * dequeue a callback id means the client is signaled to call the callback on his side.
 **/
 #ifndef _HAILO_VDEVICE_CALLBACKS_QUEUE_HPP_
 #define _HAILO_VDEVICE_CALLBACKS_QUEUE_HPP_
 #include "hailort_rpc_service.hpp"
 #include "hailo/hailort.h"
 #include "hailo/network_group.hpp"
 #include "hailo/hailort_common.hpp"
 #include "utils/thread_safe_queue.hpp"
 namespace hailort
 {
 #define MAX_QUEUE_SIZE (512) // Max inner reader-writer queue size
 class VDeviceCallbacksQueue final
 {
 public:
    static Expected<std::unique_ptr<VDeviceCallbacksQueue>> create(uint32_t max_queue_size)
    {
        auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled);
        CHECK_EXPECTED(shutdown_event_exp);
        auto shutdown_event = shutdown_event_exp.release();
        auto cb_ids_queue = SpscQueue<ProtoCallbackIdentifier>::create(max_queue_size, shutdown_event, HAILO_INFINITE_TIMEOUT);
        CHECK_EXPECTED(cb_ids_queue);
        auto queue_ptr = make_unique_nothrow<VDeviceCallbacksQueue>(cb_ids_queue.release(), shutdown_event);
        CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY);
        return queue_ptr;
    }
    VDeviceCallbacksQueue(SpscQueue<ProtoCallbackIdentifier> &&cb_ids_queue, EventPtr shutdown_event) :
        m_callbacks_ids_queue(std::move(cb_ids_queue)), m_shutdown_event(shutdown_event)
    {}
    hailo_status enqueue(ProtoCallbackIdentifier &&callback_id)
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        auto status = m_callbacks_ids_queue.enqueue(std::move(callback_id));
        CHECK_SUCCESS(status);
        return HAILO_SUCCESS;
    }
    Expected<ProtoCallbackIdentifier> dequeue()
    {
        auto callback_id = m_callbacks_ids_queue.dequeue();
        if (HAILO_SHUTDOWN_EVENT_SIGNALED == callback_id.status()) {
            return make_unexpected(callback_id.status());
        }
        else if (HAILO_TIMEOUT == callback_id.status()) {
            LOGGER__WARNING("Failed to dequeue callback_id because the queue is empty, status={}", HAILO_TIMEOUT);
            return make_unexpected(callback_id.status());
        }
        CHECK_EXPECTED(callback_id);
        return callback_id;
    }
    hailo_status shutdown()
    {
        return m_shutdown_event->signal();
    }
 private:
    std::mutex m_mutex;
    uint32_t m_vdevice_handle;
    // TODO: HRT-12346 - Use folly's MPMC? (for multiple devices)
    SpscQueue<ProtoCallbackIdentifier> m_callbacks_ids_queue;
    EventPtr m_shutdown_event;
 };
 } /* namespace hailort */
 #endif /* _HAILO_VDEVICE_CALLBACKS_QUEUE_HPP_ */
--- a/hailort/hailortcli/CMakeLists.txt
+++ b/hailort/hailortcli/CMakeLists.txt
@@ -1,8 +1,9 @@
 cmake_minimum_required(VERSION 3.0.0)
 include(GNUInstallDirs)
-include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/spdlog.cmake)
+include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/cli11.cmake)
 include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/json.cmake)
 include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/spdlog.cmake)
 include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/readerwriterqueue.cmake)
 include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/dotwriter.cmake)
--- a/hailort/hailortcli/fw_control_command.cpp
+++ b/hailort/hailortcli/fw_control_command.cpp
@@ -142,6 +142,8 @@ static std::string identity_arch_string(const hailo_device_identity_t &identity)
        return "HAILO15H";
    case HAILO_ARCH_PLUTO:
        return "PLUTO";
    case HAILO_ARCH_HAILO15M:
        return "HAILO15M";
    default:
        return "Unknown";
    }
@@ -222,7 +224,7 @@ hailo_status FwControlTestMemoriesCommand::execute_on_device(Device &device)
    auto status = device.test_chip_memories();
    CHECK_SUCCESS(status, "Failed memory test");
-    std::cout << "Memory test has completed succesfully" << std::endl;
+    std::cout << "Memory test has completed successfully" << std::endl;
    return HAILO_SUCCESS;
 }
--- a/hailort/hailortcli/run2/network_live_track.cpp
+++ b/hailort/hailortcli/run2/network_live_track.cpp
@@ -17,11 +17,13 @@ size_t NetworkLiveTrack::max_ng_name = 0;
 std::mutex NetworkLiveTrack::mutex;
 NetworkLiveTrack::NetworkLiveTrack(const std::string &name, std::shared_ptr<ConfiguredNetworkGroup> cng,
-                                   LatencyMeterPtr overall_latency_meter, bool measure_fps, const std::string &hef_path) :
+    std::shared_ptr<ConfiguredInferModel> configured_infer_model, LatencyMeterPtr overall_latency_meter,
    bool measure_fps, const std::string &hef_path) :
    m_name(name),
    m_count(0),
    m_last_get_time(),
    m_cng(cng),
    m_configured_infer_model(configured_infer_model),
    m_overall_latency_meter(overall_latency_meter),
    m_measure_fps(measure_fps),
    m_hef_path(hef_path),
@@ -70,12 +72,22 @@ uint32_t NetworkLiveTrack::push_text_impl(std::stringstream &ss)
        ss << fmt::format("{}fps: {:.2f}", get_separator(), fps);
    }
-    auto hw_latency_measurement = m_cng->get_latency_measurement();
+    if (m_cng) {
-    if (hw_latency_measurement) {
+        auto hw_latency_measurement = m_cng->get_latency_measurement();
-        ss << fmt::format("{}hw latency: {:.2f} ms", get_separator(), InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency));
+        if (hw_latency_measurement) {
            ss << fmt::format("{}hw latency: {:.2f} ms", get_separator(), InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency));
        } else if (HAILO_NOT_AVAILABLE != hw_latency_measurement.status()) { // HAILO_NOT_AVAILABLE is a valid error, we ignore it
            ss << fmt::format("{}hw latency: NaN (err)", get_separator());
        }
    }
-    else if (HAILO_NOT_AVAILABLE != hw_latency_measurement.status()) { // HAILO_NOT_AVAILABLE is a valid error, we ignore it
+    else {
-        ss << fmt::format("{}hw latency: NaN (err)", get_separator());
+        auto hw_latency_measurement = m_configured_infer_model->get_hw_latency_measurement();
        if (hw_latency_measurement) {
            ss << fmt::format("{}hw latency: {:.2f} ms", get_separator(), InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency));
        }
        else if (HAILO_NOT_AVAILABLE != hw_latency_measurement.status()) { // HAILO_NOT_AVAILABLE is a valid error, we ignore it
            ss << fmt::format("{}hw latency: NaN (err)", get_separator());
        }
    }
    if (m_overall_latency_meter) {
@@ -112,10 +124,19 @@ void NetworkLiveTrack::push_json_impl(nlohmann::ordered_json &json)
        network_group_json["FPS"] = std::to_string(fps);
    }
-    auto hw_latency_measurement = m_cng->get_latency_measurement();
+    if (m_cng) {
-    if (hw_latency_measurement){
+        auto hw_latency_measurement = m_cng->get_latency_measurement();
-        network_group_json["hw_latency"] = InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency);
+        if (hw_latency_measurement){
            network_group_json["hw_latency"] = InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency);
        }
    }
    else {
        auto hw_latency_measurement = m_configured_infer_model->get_hw_latency_measurement();
        if (hw_latency_measurement){
            network_group_json["hw_latency"] = InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency);
        }
    }
    if (m_overall_latency_meter){
        auto overall_latency_measurement = m_overall_latency_meter->get_latency(false);
--- a/hailort/hailortcli/run2/network_live_track.hpp
+++ b/hailort/hailortcli/run2/network_live_track.hpp
@@ -11,6 +11,7 @@
 #define _HAILO_HAILORTCLI_RUN2_NETWORK_LIVE_TRACK_HPP_
 #include "hailo/hailort.h"
 #include "hailo/infer_model.hpp"
 #include "hailo/network_group.hpp"
 #include "common/latency_meter.hpp"
@@ -24,7 +25,8 @@ class NetworkLiveTrack : public LiveStats::Track
 {
 public:
    NetworkLiveTrack(const std::string &name, std::shared_ptr<hailort::ConfiguredNetworkGroup> cng,
-                     hailort::LatencyMeterPtr overall_latency_meter, bool measure_fps, const std::string &hef_path);
+        std::shared_ptr<hailort::ConfiguredInferModel> configured_infer_model,
        hailort::LatencyMeterPtr overall_latency_meter, bool measure_fps, const std::string &hef_path);
    virtual ~NetworkLiveTrack() = default;
    virtual hailo_status start_impl() override;
    virtual uint32_t push_text_impl(std::stringstream &ss) override;
@@ -44,6 +46,7 @@ private:
    std::atomic<uint32_t> m_count;
    std::chrono::time_point<std::chrono::steady_clock> m_last_get_time;
    std::shared_ptr<hailort::ConfiguredNetworkGroup> m_cng;
    std::shared_ptr<hailort::ConfiguredInferModel> m_configured_infer_model;
    hailort::LatencyMeterPtr m_overall_latency_meter;
    const bool m_measure_fps;
    const std::string &m_hef_path;
--- a/hailort/hailortcli/run2/network_runner.cpp
+++ b/hailort/hailortcli/run2/network_runner.cpp
@@ -87,9 +87,9 @@ StreamParams::StreamParams() : IoParams(), flags(HAILO_STREAM_FLAGS_NONE)
 }
 NetworkParams::NetworkParams() : hef_path(), net_group_name(), vstream_params(), stream_params(),
-    scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN), batch_size(HAILO_DEFAULT_BATCH_SIZE),
+    scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN), multi_process_service(false),
-    scheduler_threshold(0), scheduler_timeout_ms(0), framerate(UNLIMITED_FRAMERATE), measure_hw_latency(false),
+    batch_size(HAILO_DEFAULT_BATCH_SIZE), scheduler_threshold(0), scheduler_timeout_ms(0),
-    measure_overall_latency(false)
+    framerate(UNLIMITED_FRAMERATE), measure_hw_latency(false),measure_overall_latency(false)
 {
 }
@@ -99,112 +99,216 @@ NetworkRunner::NetworkRunner(const NetworkParams &params, const std::string &nam
    m_params(params),
    m_name(name),
    m_cng(cng),
    m_infer_model(nullptr),
    m_configured_infer_model(nullptr),
    m_overall_latency_meter(nullptr),
    m_latency_barrier(nullptr),
    m_last_measured_fps(0)
 {
 }
 NetworkRunner::NetworkRunner(const NetworkParams &params, const std::string &name, VDevice &vdevice,
    std::shared_ptr<InferModel> infer_model, std::shared_ptr<ConfiguredInferModel> configured_infer_model) :
    m_vdevice(vdevice),
    m_params(params),
    m_name(name),
    m_cng(nullptr),
    m_infer_model(infer_model),
    m_configured_infer_model(configured_infer_model),
    m_overall_latency_meter(nullptr),
    m_latency_barrier(nullptr)
 {
 }
 Expected<std::string> NetworkRunner::get_network_group_name(const NetworkParams &params, const Hef &hef)
 {
    // Get NG's name if single
    auto net_group_name = params.net_group_name;
    // if net_group_name is an empty string - take the name from hef
    if (net_group_name.empty()) {
        auto net_groups_names = hef.get_network_groups_names();
        CHECK_AS_EXPECTED(net_groups_names.size() == 1, HAILO_INVALID_ARGUMENT, "HEF {} doesn't contain a single NetworkGroup. Pass --name", params.hef_path);
        net_group_name = net_groups_names[0];
    }
    return net_group_name;
 }
 Expected<std::shared_ptr<FullAsyncNetworkRunner>> FullAsyncNetworkRunner::create_shared(VDevice &vdevice,
    NetworkParams params)
 {
        auto infer_model = vdevice.create_infer_model(params.hef_path);
        CHECK_EXPECTED(infer_model);
        auto infer_model_ptr = infer_model.release();
        auto expected_net_group_name = get_network_group_name(params, infer_model_ptr->hef());
        CHECK_EXPECTED(expected_net_group_name);
        /* Configure Params */
        infer_model_ptr->set_batch_size(params.batch_size);
        if (params.batch_size == HAILO_DEFAULT_BATCH_SIZE) {
            // Changing batch_size to 1 (after configuring the vdevice) - as we iterate over 'params.batch_size' in latency measurements scenarios
            params.batch_size = 1;
        }
        if (params.measure_hw_latency) {
            infer_model_ptr->set_hw_latency_measurement_flags(HAILO_LATENCY_MEASURE);
        }
        /* Pipeline Params */
        for (const auto &input_name : infer_model_ptr->get_input_names()) {
            auto input_params_it = std::find_if(params.vstream_params.begin(), params.vstream_params.end(),
                [&input_name](const VStreamParams &params) -> bool {
                    return params.name == input_name;
                });
            auto input_params = (input_params_it == params.vstream_params.end()) ? VStreamParams() : *input_params_it;
            auto input_config = infer_model_ptr->input(input_name);
            CHECK_EXPECTED(input_config);
            input_config->set_format_order(input_params.params.user_buffer_format.order);
            input_config->set_format_type(input_params.params.user_buffer_format.type);
        }
        for (const auto &output_name : infer_model_ptr->get_output_names()) {
            auto output_params_it = std::find_if(params.vstream_params.begin(), params.vstream_params.end(),
                [&output_name](const VStreamParams &params) -> bool {
                    return params.name == output_name;
                });
            auto output_params = (output_params_it == params.vstream_params.end()) ? VStreamParams() : *output_params_it;
            auto output_config = infer_model_ptr->output(output_name);
            CHECK_EXPECTED(output_config);
            output_config->set_format_order(output_params.params.user_buffer_format.order);
            output_config->set_format_type(output_params.params.user_buffer_format.type);
        }
        auto configured_model = infer_model_ptr->configure();
        CHECK_EXPECTED(configured_model);
        auto configured_infer_model_ptr = make_shared_nothrow<ConfiguredInferModel>(configured_model.release());
        CHECK_NOT_NULL_AS_EXPECTED(configured_infer_model_ptr, HAILO_OUT_OF_HOST_MEMORY);
        auto res = make_shared_nothrow<FullAsyncNetworkRunner>(params, expected_net_group_name.value(), vdevice,
            infer_model_ptr, configured_infer_model_ptr);
        CHECK_NOT_NULL_AS_EXPECTED(res, HAILO_OUT_OF_HOST_MEMORY);
        if (params.measure_overall_latency || params.measure_hw_latency) {
            CHECK_AS_EXPECTED((1 == res->get_input_names().size()), HAILO_INVALID_OPERATION,
                "Latency measurement over multiple inputs network is not supported");
            if (params.measure_overall_latency) {
                auto overall_latency_meter = make_shared_nothrow<LatencyMeter>(std::set<std::string>{ "INFERENCE" }, // Since we check 'infer()' with single callback, we only address 1 output
                    OVERALL_LATENCY_TIMESTAMPS_LIST_LENGTH);
                CHECK_NOT_NULL_AS_EXPECTED(overall_latency_meter, HAILO_OUT_OF_HOST_MEMORY);
                res->set_overall_latency_meter(overall_latency_meter);
            }
            // We use a barrier for both hw and overall latency
            auto latency_barrier = make_shared_nothrow<Barrier>(1); // Only 1 frame at a time
            CHECK_NOT_NULL_AS_EXPECTED(latency_barrier, HAILO_OUT_OF_HOST_MEMORY);
            res->set_latency_barrier(latency_barrier);
        }
    return res;
 }
 Expected<std::shared_ptr<NetworkRunner>> NetworkRunner::create_shared(VDevice &vdevice, const NetworkParams &params)
 {
    // The network params passed to the NetworkRunner may be changed by this function, hence we copy them.
    auto final_net_params = params;
    auto hef = Hef::create(final_net_params.hef_path);
    CHECK_EXPECTED(hef);
    // Get NG's name if single
    auto net_group_name = final_net_params.net_group_name;
    if (net_group_name.empty()) {
        auto net_groups_names = hef->get_network_groups_names();
        CHECK_AS_EXPECTED(net_groups_names.size() == 1, HAILO_INVALID_ARGUMENT, "HEF {} doesn't contain a single NetworkGroup. Pass --name", final_net_params.hef_path);
        net_group_name = net_groups_names[0];
    }
    auto cfg_params = vdevice.create_configure_params(hef.value(), net_group_name);
    CHECK_EXPECTED(cfg_params);
    cfg_params->batch_size = final_net_params.batch_size;
    if (final_net_params.batch_size == HAILO_DEFAULT_BATCH_SIZE) {
        // Changing batch_size to 1. If HAILO_DEFAULT_BATCH_SIZE is configured, the sched will send one frame per batch
        final_net_params.batch_size = 1;
    }
    if (final_net_params.measure_hw_latency) {
        cfg_params->latency |= HAILO_LATENCY_MEASURE;
    }
    if (final_net_params.is_async()) {
        for (auto &stream_name_params_pair : cfg_params->stream_params_by_name) {
            stream_name_params_pair.second.flags = HAILO_STREAM_FLAGS_ASYNC;
        }
    } 
    auto cfgr_net_groups = vdevice.configure(hef.value(), {{net_group_name, cfg_params.value()}});
    CHECK_EXPECTED(cfgr_net_groups);
    assert(1 == cfgr_net_groups->size());
    auto cfgr_net_group = cfgr_net_groups.value()[0];
    if (HAILO_SCHEDULING_ALGORITHM_NONE!= final_net_params.scheduling_algorithm) {
        CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_threshold(final_net_params.scheduler_threshold));
        CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_timeout(std::chrono::milliseconds(final_net_params.scheduler_timeout_ms)));
        CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_priority(final_net_params.scheduler_priority));
    }
    std::shared_ptr<NetworkRunner> net_runner_ptr = nullptr;
-    switch (final_net_params.mode)
+    if (InferenceMode::FULL_ASYNC == final_net_params.mode) {
-    {
+        auto runner_exp = FullAsyncNetworkRunner::create_shared(vdevice, final_net_params);
-    case InferenceMode::FULL:
+        CHECK_EXPECTED(runner_exp);
-    {
+        net_runner_ptr = runner_exp.release();
-        std::map<std::string, hailo_vstream_params_t> vstreams_params;
+    } else {
-        for (auto &vstream_params : final_net_params.vstream_params) {
+        auto hef = Hef::create(final_net_params.hef_path);
-            vstreams_params.emplace(vstream_params.name, vstream_params.params);
+        CHECK_EXPECTED(hef);
        auto expected_net_group_name = get_network_group_name(final_net_params, hef.value());
        CHECK_EXPECTED(expected_net_group_name);
        auto cfg_params = vdevice.create_configure_params(hef.value(), expected_net_group_name.value());
        CHECK_EXPECTED(cfg_params);
        cfg_params->batch_size = final_net_params.batch_size;
        if (final_net_params.batch_size == HAILO_DEFAULT_BATCH_SIZE) {
            // Changing batch_size to 1 (after configuring the vdevice) - as we iterate over 'final_net_params.batch_size' in latency measurements scenarios
            final_net_params.batch_size = 1;
        }
-        auto vstreams = create_vstreams(*cfgr_net_group, vstreams_params);
+        if (final_net_params.measure_hw_latency) {
-        CHECK_EXPECTED(vstreams);
+            cfg_params->latency |= HAILO_LATENCY_MEASURE;
        }
        if (final_net_params.is_async()) {
            for (auto &stream_name_params_pair : cfg_params->stream_params_by_name) {
                stream_name_params_pair.second.flags = HAILO_STREAM_FLAGS_ASYNC;
            }
        }
        auto cfgr_net_groups = vdevice.configure(hef.value(), {{expected_net_group_name.value(), cfg_params.value()}});
        CHECK_EXPECTED(cfgr_net_groups);
        assert(1 == cfgr_net_groups->size());
        auto cfgr_net_group = cfgr_net_groups.value()[0];
-        auto net_runner = make_shared_nothrow<FullNetworkRunner>(final_net_params, net_group_name, vdevice,
+        if (HAILO_SCHEDULING_ALGORITHM_NONE != final_net_params.scheduling_algorithm) {
-            std::move(vstreams->first), std::move(vstreams->second), cfgr_net_group);
+            CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_threshold(final_net_params.scheduler_threshold));
-        CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY);
+            CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_timeout(std::chrono::milliseconds(final_net_params.scheduler_timeout_ms)));
-        net_runner_ptr = std::static_pointer_cast<NetworkRunner>(net_runner);
+            CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_priority(final_net_params.scheduler_priority));
        break;
    }
    case InferenceMode::RAW:            // Fallthrough
    case InferenceMode::RAW_ASYNC:      // Fallthrough
    case InferenceMode::RAW_ASYNC_SINGLE_THREAD:
    {
        auto input_streams = cfgr_net_group->get_input_streams();
        CHECK_AS_EXPECTED(input_streams.size() > 0, HAILO_INTERNAL_FAILURE);
        auto output_streams = cfgr_net_group->get_output_streams();
        CHECK_AS_EXPECTED(output_streams.size() > 0, HAILO_INTERNAL_FAILURE);
        auto net_runner = make_shared_nothrow<RawNetworkRunner>(final_net_params, net_group_name, vdevice,
            std::move(input_streams), std::move(output_streams), cfgr_net_group);
        CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY);
        net_runner_ptr = std::static_pointer_cast<NetworkRunner>(net_runner);
        break;
    }
    default:
        // Shouldn't get here
        return make_unexpected(HAILO_INTERNAL_FAILURE);
    }
    if (final_net_params.measure_overall_latency || final_net_params.measure_hw_latency) {
        auto input_names = net_runner_ptr->get_input_names();
        auto output_names = net_runner_ptr->get_output_names();
        CHECK_AS_EXPECTED((1 == input_names.size()), HAILO_INVALID_OPERATION,
            "Latency measurement over multiple inputs network is not supported");
        if (final_net_params.measure_overall_latency) {
            auto overall_latency_meter = make_shared_nothrow<LatencyMeter>(output_names, OVERALL_LATENCY_TIMESTAMPS_LIST_LENGTH);
            CHECK_NOT_NULL_AS_EXPECTED(overall_latency_meter, HAILO_OUT_OF_HOST_MEMORY);
            net_runner_ptr->set_overall_latency_meter(overall_latency_meter);
        }
-        // We use a barrier for both hw and overall latency
+        switch (final_net_params.mode)
-        auto latency_barrier = make_shared_nothrow<Barrier>(input_names.size() + output_names.size());
+        {
-        CHECK_NOT_NULL_AS_EXPECTED(latency_barrier, HAILO_OUT_OF_HOST_MEMORY);
+        case InferenceMode::FULL:
-        net_runner_ptr->set_latency_barrier(latency_barrier);
+        {
            std::map<std::string, hailo_vstream_params_t> vstreams_params;
            for (auto &vstream_params : final_net_params.vstream_params) {
                vstreams_params.emplace(vstream_params.name, vstream_params.params);
            }
            auto vstreams = create_vstreams(*cfgr_net_group, vstreams_params);
            CHECK_EXPECTED(vstreams);
            auto net_runner = make_shared_nothrow<FullNetworkRunner>(final_net_params, expected_net_group_name.value(), vdevice,
                std::move(vstreams->first), std::move(vstreams->second), cfgr_net_group);
            CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY);
            net_runner_ptr = std::static_pointer_cast<NetworkRunner>(net_runner);
            break;
        }
        case InferenceMode::RAW:            // Fallthrough
        case InferenceMode::RAW_ASYNC:      // Fallthrough
        case InferenceMode::RAW_ASYNC_SINGLE_THREAD:
        {
            auto input_streams = cfgr_net_group->get_input_streams();
            CHECK_AS_EXPECTED(input_streams.size() > 0, HAILO_INTERNAL_FAILURE);
            auto output_streams = cfgr_net_group->get_output_streams();
            CHECK_AS_EXPECTED(output_streams.size() > 0, HAILO_INTERNAL_FAILURE);
            auto net_runner = make_shared_nothrow<RawNetworkRunner>(final_net_params, expected_net_group_name.value(), vdevice,
                std::move(input_streams), std::move(output_streams), cfgr_net_group);
            CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY);
            net_runner_ptr = std::static_pointer_cast<NetworkRunner>(net_runner);
            break;
        }
        default:
            // Shouldn't get here
            return make_unexpected(HAILO_INTERNAL_FAILURE);
        }
        if (final_net_params.measure_overall_latency || final_net_params.measure_hw_latency) {
            auto input_names = net_runner_ptr->get_input_names();
            auto output_names = net_runner_ptr->get_output_names();
            CHECK_AS_EXPECTED((1 == input_names.size()), HAILO_INVALID_OPERATION,
                "Latency measurement over multiple inputs network is not supported");
            if (final_net_params.measure_overall_latency) {
                auto overall_latency_meter = make_shared_nothrow<LatencyMeter>(output_names, OVERALL_LATENCY_TIMESTAMPS_LIST_LENGTH);
                CHECK_NOT_NULL_AS_EXPECTED(overall_latency_meter, HAILO_OUT_OF_HOST_MEMORY);
                net_runner_ptr->set_overall_latency_meter(overall_latency_meter);
            }
            // We use a barrier for both hw and overall latency
            auto latency_barrier = make_shared_nothrow<Barrier>(input_names.size() + output_names.size());
            CHECK_NOT_NULL_AS_EXPECTED(latency_barrier, HAILO_OUT_OF_HOST_MEMORY);
            net_runner_ptr->set_latency_barrier(latency_barrier);
        }
    }
    return net_runner_ptr;
@@ -222,17 +326,19 @@ hailo_status NetworkRunner::run(EventPtr shutdown_event, LiveStats &live_stats,
 {
    auto ang = std::unique_ptr<ActivatedNetworkGroup>(nullptr);
    if (HAILO_SCHEDULING_ALGORITHM_NONE == m_params.scheduling_algorithm) {
-        auto ang_exp = m_cng->activate();
+        if (m_cng) {
-        if (!ang_exp) {
+            auto ang_exp = m_cng->activate();
-            activation_barrier.terminate();
+            if (!ang_exp) {
                activation_barrier.terminate();
            }
            CHECK_EXPECTED_AS_STATUS(ang_exp);
            ang = ang_exp.release();
        }
        CHECK_EXPECTED_AS_STATUS(ang_exp);
        ang = ang_exp.release();
    }
    // If we measure latency (hw or overall) we send frames one at a time. Hence we don't measure fps.
    const auto measure_fps = !m_params.measure_hw_latency && !m_params.measure_overall_latency;
-    auto net_live_track = std::make_shared<NetworkLiveTrack>(m_name, m_cng, m_overall_latency_meter, measure_fps, m_params.hef_path);
+    auto net_live_track = std::make_shared<NetworkLiveTrack>(m_name, m_cng, m_configured_infer_model, m_overall_latency_meter, measure_fps, m_params.hef_path);
    live_stats.add(net_live_track, 1); //support progress over multiple outputs
 #if defined(_MSC_VER)
@@ -241,7 +347,7 @@ hailo_status NetworkRunner::run(EventPtr shutdown_event, LiveStats &live_stats,
    activation_barrier.arrive_and_wait();
-    if (m_params.mode == InferenceMode::RAW_ASYNC_SINGLE_THREAD) {
+    if ((InferenceMode::RAW_ASYNC_SINGLE_THREAD == m_params.mode) || (InferenceMode::FULL_ASYNC == m_params.mode)) {
        return run_single_thread_async_infer(shutdown_event, net_live_track);
    } else {
        auto threads = start_inference_threads(shutdown_event, net_live_track);
@@ -278,17 +384,6 @@ double NetworkRunner::get_last_measured_fps()
    return m_last_measured_fps;
 }
 hailo_vstream_params_t update_quantize_flag_in_vstream_param(const hailo_vstream_info_t &vstream_info, const hailo_vstream_params_t &old_vstream_params)
 {
    hailo_vstream_params_t res = old_vstream_params;
    if ((HAILO_FORMAT_TYPE_FLOAT32 == old_vstream_params.user_buffer_format.type) || (HailoRTCommon::is_nms(vstream_info))) {
        res.user_buffer_format.flags &= (~HAILO_FORMAT_FLAGS_QUANTIZED);
    } else {
        res.user_buffer_format.flags |= (HAILO_FORMAT_FLAGS_QUANTIZED);
    }
    return res;
 }
 Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> NetworkRunner::create_vstreams(
    ConfiguredNetworkGroup &net_group, const std::map<std::string, hailo_vstream_params_t> &params)
 {//TODO: support network name
@@ -298,14 +393,11 @@ Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> Netwo
    auto input_vstreams_info = net_group.get_input_vstream_infos();
    CHECK_EXPECTED(input_vstreams_info);
    for (auto &input_vstream_info : input_vstreams_info.value()) {
-        auto elem_it = params.find(input_vstream_info.name);
+        if (params.end() != params.find(input_vstream_info.name)) {
        if (elem_it != params.end()) {
            auto vstream_param = update_quantize_flag_in_vstream_param(input_vstream_info, elem_it->second);
            input_vstreams_params.emplace(input_vstream_info.name, vstream_param);
            match_count++;
            input_vstreams_params.emplace(input_vstream_info.name, params.at(input_vstream_info.name));
        } else {
-            auto vstream_param = update_quantize_flag_in_vstream_param(input_vstream_info, HailoRTDefaults::get_vstreams_params());
+            input_vstreams_params.emplace(input_vstream_info.name, HailoRTDefaults::get_vstreams_params());
            input_vstreams_params.emplace(input_vstream_info.name, vstream_param);
        }
    }
@@ -313,15 +405,11 @@ Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> Netwo
    auto output_vstreams_info = net_group.get_output_vstream_infos();
    CHECK_EXPECTED(output_vstreams_info);
    for (auto &output_vstream_info : output_vstreams_info.value()) {
-        auto elem_it = params.find(output_vstream_info.name);
+        if (params.end() != params.find(output_vstream_info.name)) {
        if (elem_it != params.end()) {
            auto vstream_param = update_quantize_flag_in_vstream_param(output_vstream_info, elem_it->second);
            output_vstreams_params.emplace(output_vstream_info.name, vstream_param);
            match_count++;
-        }
+            output_vstreams_params.emplace(output_vstream_info.name, params.at(output_vstream_info.name));
-        else {
+        } else {
-            auto vstream_param = update_quantize_flag_in_vstream_param(output_vstream_info, HailoRTDefaults::get_vstreams_params());
+            output_vstreams_params.emplace(output_vstream_info.name, HailoRTDefaults::get_vstreams_params());
            output_vstreams_params.emplace(output_vstream_info.name, vstream_param);
        }
    }
@@ -383,12 +471,7 @@ Expected<std::vector<AsyncThreadPtr<hailo_status>>> FullNetworkRunner::start_inf
 void FullNetworkRunner::stop()
 {
-    for (auto &input_vstream : m_input_vstreams) {
+    (void) m_cng->shutdown();
        (void) input_vstream.abort();
    }
    for (auto &output_vstream : m_output_vstreams) {
        (void) output_vstream.abort();
    }
 }
 std::set<std::string> FullNetworkRunner::get_input_names()
@@ -423,6 +506,158 @@ VStreamParams FullNetworkRunner::get_params(const std::string &name)
    return VStreamParams();
 }
 FullAsyncNetworkRunner::FullAsyncNetworkRunner(const NetworkParams &params, const std::string &name, VDevice &vdevice,
    std::shared_ptr<InferModel> infer_model, std::shared_ptr<ConfiguredInferModel> configured_infer_model) :
    NetworkRunner(params, name, vdevice, infer_model, configured_infer_model)
 {
 }
 void FullAsyncNetworkRunner::stop()
 {}
 std::set<std::string> FullAsyncNetworkRunner::get_input_names()
 {
    std::set<std::string> results;
    for (const auto &name : m_infer_model->get_input_names()) {
        results.insert(name);
    }
    return results;
 }
 std::set<std::string> FullAsyncNetworkRunner::get_output_names()
 {
    std::set<std::string> results;
    for (const auto &name : m_infer_model->get_output_names()) {
        results.insert(name);
    }
    return results;
 }
 VStreamParams FullAsyncNetworkRunner::get_params(const std::string &name)
 {
    for (const auto &params : m_params.vstream_params) {
        if (name == params.name) {
            return params;
        }
    }
    return VStreamParams();
 }
 Expected<AsyncInferJob> FullAsyncNetworkRunner::create_infer_job(const ConfiguredInferModel::Bindings &bindings,
    std::weak_ptr<NetworkLiveTrack> net_live_track_weak, FramerateThrottle &frame_rate_throttle, hailo_status &inference_status)
 {
    frame_rate_throttle.throttle();
    if (m_overall_latency_meter) {
        m_overall_latency_meter->add_start_sample(std::chrono::steady_clock::now().time_since_epoch());
    }
    auto job = m_configured_infer_model->run_async(bindings, [=, &inference_status] (const AsyncInferCompletionInfo &completion_info) {
        if (HAILO_SUCCESS != completion_info.status) {
            inference_status = completion_info.status;
            LOGGER__ERROR("Failed in infer async request");
            return;
        }
        if (m_overall_latency_meter) {
            m_overall_latency_meter->add_end_sample("INFERENCE", std::chrono::steady_clock::now().time_since_epoch());
        }
        if (auto net_live_track = net_live_track_weak.lock()) {
            /* Using weak_ptr as net_live_track holds a reference to m_configured_infer_model (for stuff like latency measurement),
                so there's a circular dependency */
            net_live_track->progress();
        }
    });
    CHECK_EXPECTED(job);
    return job.release();
 }
 hailo_status FullAsyncNetworkRunner::run_single_thread_async_infer(EventPtr shutdown_event,
    std::shared_ptr<NetworkLiveTrack> net_live_track)
 {
    auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event);
    std::map<std::string, Buffer> inputs_buffer_pool;
    const uint8_t const_byte = 0xAB;
    for (const auto &input_name : get_input_names()) {
        inputs_buffer_pool[input_name] = {};
        auto input_config = m_infer_model->input(input_name);
        CHECK_EXPECTED_AS_STATUS(input_config);
        auto params = get_params(input_name);
        if (params.input_file_path.empty()) {
            auto constant_buffer = Buffer::create(input_config->get_frame_size(), const_byte, BufferStorageParams::create_dma());
            CHECK_EXPECTED_AS_STATUS(constant_buffer);
            inputs_buffer_pool[input_name] = constant_buffer.release();
        } else {
            auto buffer = read_binary_file(params.input_file_path, BufferStorageParams::create_dma());
            CHECK_EXPECTED_AS_STATUS(buffer);
            inputs_buffer_pool[input_name] = buffer.release();
        }
    }
    std::map<std::string, Buffer> outputs_buffer_pool;
    for (const auto &output_name : get_output_names()) {
        outputs_buffer_pool[output_name] = {};
        auto output_config = m_infer_model->output(output_name);
        CHECK_EXPECTED_AS_STATUS(output_config);
        auto constant_buffer = Buffer::create(output_config->get_frame_size(), 0, BufferStorageParams::create_dma());
        CHECK_EXPECTED_AS_STATUS(constant_buffer);
        outputs_buffer_pool[output_name] = constant_buffer.release();
    }
    std::unique_ptr<ConfiguredInferModelActivationGuard> guard = nullptr;
    if (HAILO_SCHEDULING_ALGORITHM_NONE != m_params.scheduling_algorithm) {
        auto status = m_configured_infer_model->set_scheduler_threshold(m_params.scheduler_threshold);
        CHECK_SUCCESS(status);
        status = m_configured_infer_model->set_scheduler_timeout(std::chrono::milliseconds(m_params.scheduler_timeout_ms));
        CHECK_SUCCESS(status);
        status = m_configured_infer_model->set_scheduler_priority(m_params.scheduler_priority);
        CHECK_SUCCESS(status);
    } else {
        auto guard_exp = ConfiguredInferModelActivationGuard::create(m_configured_infer_model);
        CHECK_EXPECTED_AS_STATUS(guard_exp);
        guard = guard_exp.release();
    }
    auto bindings = m_configured_infer_model->create_bindings();
    CHECK_EXPECTED_AS_STATUS(bindings);
    for (auto &pair : inputs_buffer_pool) {
        auto &name = pair.first;
        auto &buffer = pair.second;
        bindings->input(name)->set_buffer(hailort::MemoryView(buffer));
    }
    for (auto &pair : outputs_buffer_pool) {
        auto &name = pair.first;
        auto &buffer = pair.second;
        bindings->output(name)->set_buffer(hailort::MemoryView(buffer));
    }
    FramerateThrottle frame_rate_throttle(m_params.framerate);
    AsyncInferJob last_job;
    auto inference_status = HAILO_SUCCESS;
    while (HAILO_TIMEOUT == shutdown_event->wait(std::chrono::milliseconds(0)) && (HAILO_SUCCESS == inference_status)) {
        for (uint32_t frames_in_cycle = 0; frames_in_cycle < m_params.batch_size; frames_in_cycle++) {
            if (HAILO_SUCCESS == m_configured_infer_model->wait_for_async_ready(HAILO_INFINITE_TIMEOUT)) {
                auto job_exp = create_infer_job(*bindings, net_live_track, frame_rate_throttle, inference_status);
                CHECK_EXPECTED_AS_STATUS(job_exp);
                last_job = job_exp.release();
                last_job.detach();
            }
        }
        if (m_latency_barrier) {
            // When measuring latency we want to send 'batch' frames at a time
            last_job.wait(HAILO_INFINITE_TIMEOUT);
        }
    }
    last_job.wait(HAILO_INFINITE_TIMEOUT);
    return inference_status;
 }
 RawNetworkRunner::RawNetworkRunner(const NetworkParams &params, const std::string &name, VDevice &vdevice,
                                   InputStreamRefVector &&input_streams, OutputStreamRefVector &&output_streams,
                                   std::shared_ptr<ConfiguredNetworkGroup> cng) :
@@ -570,12 +805,7 @@ hailo_status RawNetworkRunner::run_single_thread_async_infer(EventPtr shutdown_e
 void RawNetworkRunner::stop()
 {
-    for (auto &input_stream : m_input_streams) {
+    m_cng->shutdown();
        (void) input_stream.get().abort();
    }
    for (auto &output_stream : m_output_streams) {
        (void) output_stream.get().abort();
    }
 }
 std::set<std::string> RawNetworkRunner::get_input_names()
--- a/hailort/hailortcli/run2/network_runner.hpp
+++ b/hailort/hailortcli/run2/network_runner.hpp
@@ -24,6 +24,7 @@
 #include "hailo/vstream.hpp"
 #include "hailo/event.hpp"
 #include "hailo/network_group.hpp"
 #include "hailo/infer_model.hpp"
 #include "hailo/expected.hpp"
 #include "hailo/buffer.hpp"
@@ -37,6 +38,7 @@ constexpr std::chrono::milliseconds SYNC_EVENT_TIMEOUT(1000);
 enum class InferenceMode {
    FULL,
    FULL_ASYNC,
    RAW,
    RAW_ASYNC,
@@ -74,6 +76,7 @@ struct NetworkParams
    std::vector<VStreamParams> vstream_params;
    std::vector<StreamParams> stream_params;
    hailo_scheduling_algorithm_t scheduling_algorithm;
    bool multi_process_service;
    // Network parameters
    uint16_t batch_size;
@@ -90,7 +93,7 @@ struct NetworkParams
    bool is_async() const
    {
-        return (mode == InferenceMode::RAW_ASYNC) || (mode == InferenceMode::RAW_ASYNC_SINGLE_THREAD);
+        return (mode == InferenceMode::RAW_ASYNC) || (mode == InferenceMode::RAW_ASYNC_SINGLE_THREAD) || (mode == InferenceMode::FULL_ASYNC);
    }
 };
@@ -121,6 +124,8 @@ public:
    NetworkRunner(const NetworkParams &params, const std::string &name,
        VDevice &vdevice, std::shared_ptr<ConfiguredNetworkGroup> cng);
    NetworkRunner(const NetworkParams &params, const std::string &name,
        VDevice &vdevice, std::shared_ptr<InferModel> infer_model, std::shared_ptr<ConfiguredInferModel> configured_infer_model);
    virtual ~NetworkRunner() = default;
    hailo_status run(EventPtr shutdown_event, LiveStats &live_stats, Barrier &activation_barrier);
@@ -134,6 +139,7 @@ public:
 protected:
    static bool inference_succeeded(hailo_status status);
    static Expected<std::string> get_network_group_name(const NetworkParams &params, const Hef &hef);
    // Use 'inference_succeeded(async_thread->get())' to check for a thread's success
    virtual Expected<std::vector<AsyncThreadPtr<hailo_status>>> start_inference_threads(EventPtr shutdown_event,
        std::shared_ptr<NetworkLiveTrack> net_live_track) = 0;
@@ -304,6 +310,8 @@ protected:
    const NetworkParams m_params;
    std::string m_name;
    std::shared_ptr<ConfiguredNetworkGroup> m_cng;
    std::shared_ptr<InferModel> m_infer_model;
    std::shared_ptr<ConfiguredInferModel> m_configured_infer_model;
    LatencyMeterPtr m_overall_latency_meter;
    BarrierPtr m_latency_barrier;
    double m_last_measured_fps;
@@ -339,6 +347,70 @@ private:
    std::vector<OutputVStream> m_output_vstreams;
 };
 class FullAsyncNetworkRunner : public NetworkRunner
 {
 public:
    class ConfiguredInferModelActivationGuard final {
    public:
        static Expected<std::unique_ptr<ConfiguredInferModelActivationGuard>> create(
            std::shared_ptr<ConfiguredInferModel> configured_infer_model)
        {
            auto status = HAILO_UNINITIALIZED;
            auto ptr = std::make_unique<ConfiguredInferModelActivationGuard>(ConfiguredInferModelActivationGuard(configured_infer_model, status));
            CHECK_NOT_NULL_AS_EXPECTED(ptr, HAILO_OUT_OF_HOST_MEMORY);
            CHECK_SUCCESS_AS_EXPECTED(status);
            return ptr;
        }
        ~ConfiguredInferModelActivationGuard()
        {
            if (HAILO_SUCCESS == m_activation_status) {
                (void)m_configured_infer_model->deactivate();
            }
        }
        ConfiguredInferModelActivationGuard(const ConfiguredInferModelActivationGuard &) = delete;
        ConfiguredInferModelActivationGuard &operator=(const ConfiguredInferModelActivationGuard &) = delete;
        ConfiguredInferModelActivationGuard &operator=(ConfiguredInferModelActivationGuard &&other) = delete;
        ConfiguredInferModelActivationGuard(ConfiguredInferModelActivationGuard &&other) :
            m_configured_infer_model(other.m_configured_infer_model), m_activation_status(std::exchange(other.m_activation_status, HAILO_UNINITIALIZED))
        {};
    private:
        ConfiguredInferModelActivationGuard(std::shared_ptr<ConfiguredInferModel> configured_infer_model, hailo_status &status) :
            m_configured_infer_model(configured_infer_model), m_activation_status(HAILO_UNINITIALIZED)
        {
            status = m_configured_infer_model->activate();
            m_activation_status = status;
        }
        std::shared_ptr<ConfiguredInferModel> m_configured_infer_model;
        hailo_status m_activation_status;
    };
    static Expected<std::shared_ptr<FullAsyncNetworkRunner>> create_shared(VDevice &vdevice, NetworkParams params);
    FullAsyncNetworkRunner(const NetworkParams &params, const std::string &name, VDevice &vdevice, std::shared_ptr<InferModel> infer_model,
        std::shared_ptr<ConfiguredInferModel> configured_infer_model);
    virtual Expected<std::vector<AsyncThreadPtr<hailo_status>>> start_inference_threads(EventPtr /*shutdown_event*/,
        std::shared_ptr<NetworkLiveTrack> /*net_live_track*/) override
    {
        return make_unexpected(HAILO_NOT_IMPLEMENTED);
    };
    virtual hailo_status run_single_thread_async_infer(EventPtr, std::shared_ptr<NetworkLiveTrack>) override;
    Expected<AsyncInferJob> create_infer_job(const ConfiguredInferModel::Bindings &bindings,
        std::weak_ptr<NetworkLiveTrack> net_live_track, FramerateThrottle &frame_rate_throttle, hailo_status &inference_status);
    virtual void stop() override;
    virtual std::set<std::string> get_input_names() override;
    virtual std::set<std::string> get_output_names() override;
    VStreamParams get_params(const std::string &name);
 };
 class RawNetworkRunner : public NetworkRunner
 {
 public:
--- a/hailort/hailortcli/run2/run2_command.cpp
+++ b/hailort/hailortcli/run2/run2_command.cpp
@@ -208,12 +208,6 @@ VStreamApp::VStreamApp(const std::string &description, const std::string &name,
            { "i420", HAILO_FORMAT_ORDER_I420 }
        }))
        ->default_val("auto");
    auto quantized_option = format_opt_group->add_flag("-q,--quantized,!--no-quantized",
        "Whether or not data is quantized. This flag is ignored - Determine if the data requires quantization is decided by the src-data and dst-data types.")
        ->default_val(true); // default_val() must be after run_callback_for_default()
        hailo_deprecate_options(format_opt_group, { std::make_shared<OptionDeprecation>(quantized_option) }, false);
 }
 CLI::Option* VStreamApp::add_flag_callback(CLI::App *app, const std::string &name, const std::string &description,
@@ -242,16 +236,6 @@ StreamApp::StreamApp(const std::string &description, const std::string &name, CL
    add_option("--input-file", m_stream_params.input_file_path,
        "Input file path. If not given, random data will be used. File format should be raw binary data with size that is a factor of the input shape size")
        ->default_val("");
    // TODO: async option (HRT-9580)
    // TODO: flag callback?
    // add_flag_callback(format_opt_group, "-q,--quantized,!--no-quantized", "Whether or not data is quantized",
    //     [this](bool result){
    //         m_params.params.user_buffer_format.flags = result ?
    //             static_cast<hailo_format_flags_t>(m_params.params.user_buffer_format.flags | HAILO_FORMAT_FLAGS_QUANTIZED) :
    //             static_cast<hailo_format_flags_t>(m_params.params.user_buffer_format.flags & (~HAILO_FORMAT_FLAGS_QUANTIZED));})
    //     ->run_callback_for_default()
    //     ->default_val(true); // default_val() must be after run_callback_for_default()
 }
 /** NetworkGroupNameValidator */
@@ -294,9 +278,6 @@ NetworkApp::NetworkApp(const std::string &description, const std::string &name)
    auto run_params = add_option_group("Run Parameters");
    run_params->add_option("--framerate", m_params.framerate, "Input vStreams framerate")->default_val(UNLIMITED_FRAMERATE);
    // TODO: support multiple scheduling algorithms
    m_params.scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN;
    auto vstream_subcommand = add_io_app_subcom<VStreamApp>("Set vStream", "set-vstream", hef_path_option, net_group_name_option);
    auto stream_subcommand = add_io_app_subcom<StreamApp>("Set Stream", "set-stream", hef_path_option, net_group_name_option);
    // TODO: doesn't seam to be working (HRT-9886)
@@ -334,19 +315,22 @@ public:
    InferenceMode get_mode() const;
    const std::string &get_output_json_path();
-    void set_scheduling_algorithm(hailo_scheduling_algorithm_t scheduling_algorithm);
+    void update_network_params();
    void set_inference_mode();
    void set_measure_latency();
    void set_batch_size(uint16_t batch_size);
 private:
    void add_measure_fw_actions_subcom();
    void add_net_app_subcom();
    bool is_ethernet_device() const;
    void validate_and_set_scheduling_algorithm();
    std::vector<NetworkParams> m_network_params;
    uint32_t m_time_to_run;
    InferenceMode m_mode;
    hailo_scheduling_algorithm_t m_scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_MAX_ENUM;
    std::string m_stats_json_path;
-    std::vector<std::string> m_device_id;
+    std::vector<std::string> m_device_ids;
    uint32_t m_device_count;
    bool m_multi_process_service;
    std::string m_group_id;
@@ -373,26 +357,35 @@ Run2::Run2() : CLI::App("Run networks", "run2")
    add_option("-m,--mode", m_mode, "Inference mode")
        ->transform(HailoCheckedTransformer<InferenceMode>({
            { "full", InferenceMode::FULL },
            { "full_async", InferenceMode::FULL_ASYNC },
            { "raw", InferenceMode::RAW },
            { "raw_async", InferenceMode::RAW_ASYNC },
            { "raw_async_single_thread", InferenceMode::RAW_ASYNC_SINGLE_THREAD, OptionVisibility::HIDDEN }
        }))->default_val("full");
    add_option("-j,--json", m_stats_json_path, "If set save statistics as json to the specified path")
-    ->default_val("")
+        ->default_val("")
-    ->check(FileSuffixValidator(JSON_SUFFIX));
+        ->check(FileSuffixValidator(JSON_SUFFIX));
    add_option("--scheduling-algorithm", m_scheduling_algorithm, "Scheduling algorithm")
        ->transform(HailoCheckedTransformer<hailo_scheduling_algorithm_t>({
            { "round_robin", HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN },
            { "none", HAILO_SCHEDULING_ALGORITHM_NONE },
        }));
    auto vdevice_options_group = add_option_group("VDevice Options");
-    auto dev_id_opt = vdevice_options_group->add_option("-s,--device-id", m_device_id,
+    auto dev_id_opt = vdevice_options_group->add_option("-s,--device-id", m_device_ids,
        "Device id, same as returned from `hailortcli scan` command. For multiple devices, use space as separator.");
    vdevice_options_group->add_option("--device-count", m_device_count, "VDevice device count")
        ->default_val(HAILO_DEFAULT_DEVICE_COUNT)
        ->check(CLI::PositiveNumber)
        ->excludes(dev_id_opt);
    vdevice_options_group->add_option("--group-id", m_group_id, "VDevice group id")
        ->default_val(HAILO_DEFAULT_VDEVICE_GROUP_ID);
    auto multi_process_flag = vdevice_options_group
        ->add_flag("--multi-process-service", m_multi_process_service,"VDevice multi process service")
        ->default_val(false);
    auto measurement_options_group = add_option_group("Measurement Options");
@@ -411,21 +404,17 @@ Run2::Run2() : CLI::App("Run networks", "run2")
    auto measure_temp_opt = measurement_options_group->add_flag("--measure-temp", m_measure_temp, "Measure chip temperature")
        ->default_val(false);
    auto multi_process_flag = vdevice_options_group->add_flag("--multi-process-service", m_multi_process_service, "VDevice multi process service")
        ->default_val(false);
    if (VDevice::service_over_ip_mode()) {
        multi_process_flag
        ->excludes(measure_power_opt)
        ->excludes(measure_current_opt)
        ->excludes(measure_temp_opt);
        // When working with service over ip - client doesn't have access to physical devices
    } else {
        (void)measure_power_opt;
        (void)measure_current_opt;
        (void)measure_temp_opt;
        (void)multi_process_flag;
    }
    parse_complete_callback([this]() {
        validate_and_set_scheduling_algorithm();
    });
 }
 void Run2::add_measure_fw_actions_subcom()
@@ -510,8 +499,8 @@ bool Run2::get_measure_overall_latency()
 std::vector<hailo_device_id_t> Run2::get_dev_ids()
 {
    std::vector<hailo_device_id_t> res;
-    res.reserve(m_device_id.size());
+    res.reserve(m_device_ids.size());
-    for (auto &id_str : m_device_id) {
+    for (auto &id_str : m_device_ids) {
        hailo_device_id_t id = {};
        std::memset(id.id, 0, sizeof(id.id));
        std::strncpy(id.id, id_str.c_str(), sizeof(id.id) - 1);
@@ -525,25 +514,14 @@ uint32_t Run2::get_device_count()
    return m_device_count;
 }
-void Run2::set_inference_mode()
+void Run2::update_network_params()
 {
    for (auto &params : m_network_params) {
        params.mode = m_mode;
-    }
+        params.multi_process_service = m_multi_process_service;
 }
 void Run2::set_scheduling_algorithm(hailo_scheduling_algorithm_t scheduling_algorithm)
 {
    for (auto &params: m_network_params) {
        params.scheduling_algorithm = scheduling_algorithm;
    }
 }
 void Run2::set_measure_latency()
 {
    for (auto &params : m_network_params) {
        params.measure_hw_latency = m_measure_hw_latency;
        params.measure_overall_latency = m_measure_overall_latency;
        params.scheduling_algorithm = m_scheduling_algorithm;
    }
 }
@@ -584,6 +562,51 @@ const std::string &Run2::get_output_json_path()
    return m_stats_json_path;
 }
 static bool is_valid_ip(const std::string &ip)
 {
    int a,b,c,d;
    return (4 == sscanf(ip.c_str(),"%d.%d.%d.%d", &a, &b, &c, &d)) &&
        IS_FIT_IN_UINT8(a) && IS_FIT_IN_UINT8(b) && IS_FIT_IN_UINT8(c) && IS_FIT_IN_UINT8(d);
 }
 bool Run2::is_ethernet_device() const
 {
    if (m_device_ids.empty()) {
        // By default, if no device ids are given we don't scan for ethernet devices.
        return false;
    }
    return is_valid_ip(m_device_ids[0]);
 }
 void Run2::validate_and_set_scheduling_algorithm()
 {
    if (m_scheduling_algorithm == HAILO_SCHEDULING_ALGORITHM_NONE) {
        PARSE_CHECK(1 == get_network_params().size(), "When setting --scheduling-algorithm=none only one model is allowed");
    }
    if (is_ethernet_device()) {
        PARSE_CHECK((m_scheduling_algorithm == HAILO_SCHEDULING_ALGORITHM_MAX_ENUM) ||
                    (m_scheduling_algorithm == HAILO_SCHEDULING_ALGORITHM_NONE),
                    "On ethernet devices, only --scheduling-algorithm=none is supported");
        PARSE_CHECK(1 == get_network_params().size(), "On Ethernet device only one model is allowed");
        m_scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_NONE;
    }
    if (get_measure_fw_actions()) {
        PARSE_CHECK((m_scheduling_algorithm == HAILO_SCHEDULING_ALGORITHM_MAX_ENUM) ||
                    (m_scheduling_algorithm == HAILO_SCHEDULING_ALGORITHM_NONE),
                    "When measuring fw actions, only --scheduling-algorithm=none is allowed");
        PARSE_CHECK(1 == get_network_params().size(),
            "Only one model is allowed when measuring fw actions");
        m_scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_NONE;
    }
    if (HAILO_SCHEDULING_ALGORITHM_MAX_ENUM == m_scheduling_algorithm) {
        // algorithm wasn't passed, using ROUND_ROBIN as default
        m_scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN;
    }
 }
 /** Run2Command */
 Run2Command::Run2Command(CLI::App &parent_app) : Command(parent_app.add_subcommand(std::make_shared<Run2>()))
 {
@@ -602,18 +625,13 @@ static hailo_status wait_for_threads(std::vector<AsyncThreadPtr<hailo_status>> &
    return last_error_status;
 }
 bool is_valid_ip(const std::string &ip)
 {
    int a,b,c,d;
    return (4 == sscanf(ip.c_str(),"%d.%d.%d.%d", &a, &b, &c, &d)) &&
        IS_FIT_IN_UINT8(a) && IS_FIT_IN_UINT8(b) && IS_FIT_IN_UINT8(c) && IS_FIT_IN_UINT8(d);
 }
 std::string get_str_infer_mode(const InferenceMode& infer_mode)
 {
    switch(infer_mode){
    case InferenceMode::FULL:
        return "full";
    case InferenceMode::FULL_ASYNC:
        return "full_async";
    case InferenceMode::RAW:
        return "raw";
    case InferenceMode::RAW_ASYNC:
@@ -655,12 +673,6 @@ Expected<std::unique_ptr<VDevice>> Run2::create_vdevice()
    if (!dev_ids.empty()) {
        vdevice_params.device_count = static_cast<uint32_t>(dev_ids.size());
        vdevice_params.device_ids = dev_ids.data();
        // Disable scheduler for eth VDevice
        if ((1 == dev_ids.size()) && (is_valid_ip(dev_ids[0].id))) {
            vdevice_params.scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_NONE;
            CHECK_AS_EXPECTED(1 == get_network_params().size(), HAILO_INVALID_OPERATION, "On Ethernet inference only one model is allowed");
            set_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_NONE);
        }
    } else {
        vdevice_params.device_count = get_device_count();
    }
@@ -672,13 +684,12 @@ Expected<std::unique_ptr<VDevice>> Run2::create_vdevice()
        CHECK_AS_EXPECTED(!(get_measure_hw_latency() || get_measure_overall_latency()), HAILO_INVALID_OPERATION, "Latency measurement is not allowed when collecting runtime data");
        CHECK_AS_EXPECTED((get_mode() == InferenceMode::RAW) || (get_mode() == InferenceMode::RAW_ASYNC), HAILO_INVALID_OPERATION,
            "'measure-fw-actions' is only supported with '--mode=raw'. Received mode: '{}'", get_str_infer_mode(get_mode()));
        vdevice_params.scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_NONE;
        set_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_NONE);
    }
    vdevice_params.group_id = get_group_id().c_str();
    vdevice_params.multi_process_service = get_multi_process_service();
    assert(HAILO_SCHEDULING_ALGORITHM_MAX_ENUM != m_scheduling_algorithm);
    vdevice_params.scheduling_algorithm = m_scheduling_algorithm;
    return VDevice::create(vdevice_params);
 }
@@ -757,8 +768,7 @@ hailo_status Run2Command::execute()
 {
    Run2 *app = reinterpret_cast<Run2*>(m_app);
-    app->set_inference_mode();
+    app->update_network_params();
    app->set_measure_latency();
    CHECK(0 < app->get_network_params().size(), HAILO_INVALID_OPERATION, "Nothing to run");
@@ -767,7 +777,7 @@ hailo_status Run2Command::execute()
        LOGGER__WARNING("Measuring latency; frames are sent one at a time and FPS will not be measured");
    }
-    if (1 == app->get_network_params().size()) {
+    if (1 == app->get_network_params().size() && (HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN == app->get_network_params().begin()->scheduling_algorithm)) {
        LOGGER__WARNING("\"hailortcli run2\" is not optimized for single model usage. It is recommended to use \"hailortcli run\" command for a single model");
    }
--- a/hailort/hailortcli/run_command.cpp
+++ b/hailort/hailortcli/run_command.cpp
@@ -445,28 +445,6 @@ hailo_status recv_loop(const inference_runner_params &params, RecvObject &recv_o
    return HAILO_SUCCESS;
 }
 template<typename SendObject, typename RecvObject>
 hailo_status abort_streams(std::vector<std::reference_wrapper<SendObject>> &send_objects,
    std::vector<std::reference_wrapper<RecvObject>> &recv_objects)
 {
    auto status = HAILO_SUCCESS; // Best effort
    for (auto &output_stream : recv_objects) {
        auto abort_status = output_stream.get().abort();
        if (HAILO_SUCCESS != abort_status) {
            LOGGER__ERROR("Failed to abort output stream {}", output_stream.get().name());
            status = abort_status;
        }
    }
    for (auto &input_stream : send_objects) {
        auto abort_status = input_stream.get().abort();
        if (HAILO_SUCCESS != abort_status) {
            LOGGER__ERROR("Failed to abort input stream {}", input_stream.get().name());
            status = abort_status;
        }
    }
    return status;
 }
 Expected<std::map<std::string, std::vector<InputVStream>>> create_input_vstreams(ConfiguredNetworkGroup &configured_net_group,
    const inference_runner_params &params)
 {
@@ -705,7 +683,7 @@ static hailo_status run_streaming_impl(std::shared_ptr<ConfiguredNetworkGroup> c
        auto status = wait_for_exit_with_timeout(std::chrono::seconds(params.time_to_run));
        CHECK_SUCCESS(status);
-        status = abort_streams(send_objects, recv_objects);
+        status = configured_net_group->shutdown();
        barrier.terminate();
        CHECK_SUCCESS(status);
    }
--- a/hailort/libhailort/CMakeLists.txt
+++ b/hailort/libhailort/CMakeLists.txt
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.0.0)
 # set(CMAKE_C_CLANG_TIDY "clang-tidy;-checks=*")
 set(HAILORT_MAJOR_VERSION    4)
-set(HAILORT_MINOR_VERSION    15)
+set(HAILORT_MINOR_VERSION    16)
 set(HAILORT_REVISION_VERSION 0)
 # Add the cmake folder so the modules there are found
--- a/hailort/libhailort/bindings/gstreamer/CMakeLists.txt
+++ b/hailort/libhailort/bindings/gstreamer/CMakeLists.txt
@@ -8,7 +8,7 @@ if(NOT CMAKE_HOST_UNIX)
    message(FATAL_ERROR "Only unix hosts are supported, stopping build")
 endif()
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 # GST_PLUGIN_DEFINE needs PACKAGE to be defined
 set(GST_HAILO_PACKAGE_NAME "hailo")
@@ -25,6 +25,7 @@ add_library(gsthailo SHARED
    gst-hailo/gsthailonet.cpp
    gst-hailo/gsthailosend.cpp
    gst-hailo/gsthailorecv.cpp
    gst-hailo/gsthailonet2.cpp
    gst-hailo/gsthailodevicestats.cpp
    gst-hailo/common.cpp
    gst-hailo/network_group_handle.cpp
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/common.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/common.cpp
@@ -25,4 +25,66 @@ HailoElemProperty<gchar*>::~HailoElemProperty()
    if (nullptr != m_value) {
        g_free(m_value);
    }
 }
 GType gst_scheduling_algorithm_get_type (void)
 {
    static GType scheduling_algorithm_type = 0;
    /* Tightly coupled to hailo_scheduling_algorithm_e */
    if (!scheduling_algorithm_type) {
        static GEnumValue algorithm_types[] = {
            { HAILO_SCHEDULING_ALGORITHM_NONE,         "Scheduler is not active", "HAILO_SCHEDULING_ALGORITHM_NONE" },
            { HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN,  "Round robin",             "HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN" },
            { HAILO_SCHEDULING_ALGORITHM_MAX_ENUM,     NULL,                      NULL },
        };
        scheduling_algorithm_type =
            g_enum_register_static ("GstHailoSchedulingAlgorithms", algorithm_types);
    }
    return scheduling_algorithm_type;
 }
 GType gst_hailo_format_type_get_type (void)
 {
    static GType format_type_enum = 0;
    /* Tightly coupled to hailo_format_type_t */
    if (!format_type_enum) {
        static GEnumValue format_types[] = {
            { HAILO_FORMAT_TYPE_AUTO,     "auto",     "HAILO_FORMAT_TYPE_AUTO"},
            { HAILO_FORMAT_TYPE_UINT8,    "uint8",    "HAILO_FORMAT_TYPE_UINT8"},
            { HAILO_FORMAT_TYPE_UINT16,   "uint16",   "HAILO_FORMAT_TYPE_UINT16"},
            { HAILO_FORMAT_TYPE_FLOAT32,  "float32",  "HAILO_FORMAT_TYPE_FLOAT32"},
            { HAILO_FORMAT_TYPE_MAX_ENUM,  NULL,      NULL },
        };
        format_type_enum = g_enum_register_static ("GstHailoFormatTypes", format_types);
    }
    return format_type_enum;
 }
 bool do_versions_match(GstElement *self)
 {
    hailo_version_t libhailort_version = {};
    auto status = hailo_get_library_version(&libhailort_version);
    if (HAILO_SUCCESS != status) {
        GST_ELEMENT_ERROR(self, RESOURCE, FAILED, ("Fetching libhailort version has failed! status = %d", status), (NULL));
        return false;
    }
    bool versions_match = ((HAILORT_MAJOR_VERSION == libhailort_version.major) &&
        (HAILORT_MINOR_VERSION == libhailort_version.minor) &&
        (HAILORT_REVISION_VERSION == libhailort_version.revision));
    if (!versions_match) {
        GST_ELEMENT_ERROR(self, RESOURCE, FAILED, ("libhailort version (%d.%d.%d) does not match gsthailonet version (%d.%d.%d)",
            libhailort_version.major, libhailort_version.minor, libhailort_version.revision,
            HAILORT_MAJOR_VERSION, HAILORT_MINOR_VERSION, HAILORT_REVISION_VERSION), (NULL));
        return false;
    }
    return true;
 }
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp
@@ -96,6 +96,74 @@ using namespace hailort;
        }                               \
    } while(0)
 #define _CHECK(cond, ret_val, ...)      \
    do {                                \
        if (!(cond)) {                  \
            g_print(__VA_ARGS__); \
            g_print("\n"); \
            return (ret_val);           \
        }                               \
    } while(0)
 #define CHECK(cond, ret_val, ...) _CHECK((cond), (ret_val),  ##__VA_ARGS__)
 #define CHECK_AS_EXPECTED(cond, ret_val, ...) \
    _CHECK((cond), (make_unexpected(ret_val)),  ##__VA_ARGS__)
 #define CHECK_NOT_NULL(arg, status) _CHECK(nullptr != (arg), status, "CHECK_NOT_NULL for %s failed", #arg)
 #define _CHECK_SUCCESS(status, ...)                                                                            \
    do {                                                                                                                        \
        const auto &__check_success_status = (status);                                                                          \
        _CHECK(                                                                                                                 \
            HAILO_SUCCESS == __check_success_status,                                                                            \
            __check_success_status,                                                                                             \
            "CHECK_SUCCESS failed with status=%d", status       \
        );                                                                                                                      \
    } while(0)
 #define CHECK_SUCCESS(status, ...) _CHECK_SUCCESS(status, "" __VA_ARGS__)
 #define _CHECK_SUCCESS_AS_EXPECTED(status, ...)                                                                       \
    do {                                                                                                                               \
        const auto &__check_success_status = (status);                                                                                 \
        _CHECK(                                                                                                                        \
            HAILO_SUCCESS == __check_success_status,                                                                                   \
            make_unexpected(__check_success_status),                                                                                   \
            "CHECK_SUCCESS_AS_EXPECTED failed with status=%d", status  \
        );                                                                                                                             \
    } while(0)
 #define CHECK_SUCCESS_AS_EXPECTED(status, ...) _CHECK_SUCCESS_AS_EXPECTED(status, "" __VA_ARGS__)
 #define _CHECK_EXPECTED_AS_STATUS(obj, ...)                                                                                      \
    do {                                                                                                                                          \
        const auto &__check_expected_obj = (obj);                                                                                                 \
        _CHECK(                                                                                                                                   \
            __check_expected_obj.has_value(),                                                                                                     \
            __check_expected_obj.status(),                                                                                                        \
            "CHECK_EXPECTED_AS_STATUS failed with status=%d", __check_expected_obj.status()       \
        );                                                                                                                                        \
    } while(0)
 #define CHECK_EXPECTED_AS_STATUS(obj, ...) _CHECK_EXPECTED_AS_STATUS(obj, "" __VA_ARGS__)
 #define _CHECK_EXPECTED(obj, ...)                                                                                      \
    do {                                                                                                                                \
        const auto &__check_expected_obj = (obj);                                                                                       \
        _CHECK(                                                                                                                         \
            __check_expected_obj.has_value(),                                                                                           \
            make_unexpected(__check_expected_obj.status()),                                                                             \
            "CHECK_EXPECTED failed with status=%d",  __check_expected_obj.status()       \
        );                                                                                                                              \
    } while(0)
 #define CHECK_EXPECTED(obj, ...) _CHECK_EXPECTED(obj, "" __VA_ARGS__)
 #define RGB_FEATURES_SIZE (3)
 #define RGBA_FEATURES_SIZE (4)
 #define GRAY8_FEATURES_SIZE (1)
 #define YUY2_FEATURES_SIZE (2)
 #define NV12_FEATURES_SIZE (3)
 #define NV21_FEATURES_SIZE (3)
 #define I420_FEATURES_SIZE (3)
 // From https://stackoverflow.com/questions/57092289/do-stdmake-shared-and-stdmake-unique-have-a-nothrow-version
 template <class T, class... Args>
 static inline std::unique_ptr<T> make_unique_nothrow(Args&&... args)
@@ -144,4 +212,12 @@ private:
 template<>
 HailoElemProperty<gchar*>::~HailoElemProperty();
 #define GST_TYPE_SCHEDULING_ALGORITHM (gst_scheduling_algorithm_get_type ())
 GType gst_scheduling_algorithm_get_type (void);
 #define GST_TYPE_HAILO_FORMAT_TYPE (gst_hailo_format_type_get_type ())
 GType gst_hailo_format_type_get_type (void);
 bool do_versions_match(GstElement *self);
 #endif /* _GST_HAILO_COMMON_HPP_ */
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp
@@ -31,51 +31,6 @@
 GST_DEBUG_CATEGORY_STATIC(gst_hailonet_debug_category);
 #define GST_CAT_DEFAULT gst_hailonet_debug_category
 #define GST_TYPE_SCHEDULING_ALGORITHM (gst_scheduling_algorithm_get_type ())
 static GType
 gst_scheduling_algorithm_get_type (void)
 {
    static GType scheduling_algorithm_type = 0;
    /* Tightly coupled to hailo_scheduling_algorithm_e */
    if (!scheduling_algorithm_type) {
        static GEnumValue algorithm_types[] = {
            { HAILO_SCHEDULING_ALGORITHM_NONE,         "Scheduler is not active", "HAILO_SCHEDULING_ALGORITHM_NONE" },
            { HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN,  "Round robin",             "HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN" },
            { HAILO_SCHEDULING_ALGORITHM_MAX_ENUM,     NULL,                      NULL },
        };
        scheduling_algorithm_type =
            g_enum_register_static ("GstHailoSchedulingAlgorithms", algorithm_types);
    }
    return scheduling_algorithm_type;
 }
 #define GST_TYPE_HAILO_FORMAT_TYPE (gst_hailo_format_type_get_type ())
 static GType
 gst_hailo_format_type_get_type (void)
 {
    static GType format_type_enum = 0;
    /* Tightly coupled to hailo_format_type_t */
    if (!format_type_enum) {
        static GEnumValue format_types[] = {
            { HAILO_FORMAT_TYPE_AUTO,     "auto",     "HAILO_FORMAT_TYPE_AUTO"},
            { HAILO_FORMAT_TYPE_UINT8,    "uint8",    "HAILO_FORMAT_TYPE_UINT8"},
            { HAILO_FORMAT_TYPE_UINT16,   "uint16",   "HAILO_FORMAT_TYPE_UINT16"},
            { HAILO_FORMAT_TYPE_FLOAT32,  "float32",  "HAILO_FORMAT_TYPE_FLOAT32"},
            { HAILO_FORMAT_TYPE_MAX_ENUM,  NULL,      NULL },
        };
        format_type_enum = g_enum_register_static ("GstHailoFormatTypes", format_types);
    }
    return format_type_enum;
 }
 constexpr std::chrono::milliseconds WAIT_FOR_FLUSH_TIMEOUT_MS(1000);
 static void gst_hailonet_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec);
@@ -105,8 +60,6 @@ enum
    PROP_SCHEDULER_THRESHOLD,
    PROP_SCHEDULER_PRIORITY,
    PROP_MULTI_PROCESS_SERVICE,
    PROP_INPUT_QUANTIZED,
    PROP_OUTPUT_QUANTIZED,
    PROP_INPUT_FORMAT_TYPE,
    PROP_OUTPUT_FORMAT_TYPE,
    PROP_NMS_SCORE_THRESHOLD,
@@ -200,14 +153,6 @@ static void gst_hailonet_class_init(GstHailoNetClass *klass)
        g_param_spec_boolean("multi-process-service", "Should run over HailoRT service", "Controls wether to run HailoRT over its service. "
            "To use this property, the service should be active and scheduling-algorithm should be set. Defaults to false.",
            HAILO_DEFAULT_MULTI_PROCESS_SERVICE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
    g_object_class_install_property(gobject_class, PROP_INPUT_QUANTIZED,
        g_param_spec_boolean("input-quantized", "Is the input quantized or not", "Deprecated parameter that will be ignored. "
        "Determine whether to quantize (scale) the data will be decided by the src-data and dst-data types.",
            true, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
    g_object_class_install_property(gobject_class, PROP_OUTPUT_QUANTIZED,
        g_param_spec_boolean("output-quantized", "Should the output be quantized or de-quantized","Deprecated parameter that will be ignored. "
        "Determine whether to de-quantize (rescale) the data will be decided by the src-data and dst-data types.",
            true, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
    g_object_class_install_property(gobject_class, PROP_INPUT_FORMAT_TYPE,
        g_param_spec_enum("input-format-type", "Input format type", "Input format type(auto, float32, uint16, uint8). Default value is auto."
            "Gets values from the enum GstHailoFormatType. ",
@@ -531,22 +476,6 @@ void HailoNetImpl::set_property(GObject *object, guint property_id, const GValue
        }
        m_props.m_multi_process_service = g_value_get_boolean(value);
        break;
    case PROP_INPUT_QUANTIZED:
        g_warning("'input-quantized' is a deprecated parameter that will be ignored.");
        if (m_was_configured) {
            g_warning("The network was already configured so changing the quantized flag will not take place!");
            break;
        }
        m_props.m_input_quantized = g_value_get_boolean(value);
        break;
    case PROP_OUTPUT_QUANTIZED:
        g_warning("'output-quantized' is a deprecated parameter that will be ignored.");
        if (m_was_configured) {
            g_warning("The network was already configured so changing the quantized flag will not take place!");
            break;
        }
        m_props.m_output_quantized = g_value_get_boolean(value);
        break;
    case PROP_INPUT_FORMAT_TYPE:
        if (m_was_configured) {
            g_warning("The network was already configured so changing the format type will not take place!");
@@ -655,12 +584,6 @@ void HailoNetImpl::get_property(GObject *object, guint property_id, GValue *valu
    case PROP_MULTI_PROCESS_SERVICE:
        g_value_set_boolean(value, m_props.m_multi_process_service.get());
        break;
    case PROP_INPUT_QUANTIZED:
        g_value_set_boolean(value, m_props.m_input_quantized.get());
        break;
    case PROP_OUTPUT_QUANTIZED:
        g_value_set_boolean(value, m_props.m_output_quantized.get());
        break;
    case PROP_INPUT_FORMAT_TYPE:
        g_value_set_enum(value, m_props.m_input_format_type.get());
        break;
@@ -770,14 +693,8 @@ hailo_status HailoNetImpl::configure_network_group()
        GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler priority failed, status = %d", status);
    }
    auto input_quantized = (m_props.m_input_quantized.was_changed()) ? static_cast<bool>(m_props.m_input_quantized.get()) :
        (m_props.m_input_format_type.get() != HAILO_FORMAT_TYPE_FLOAT32);
    auto output_quantized = (m_props.m_output_quantized.was_changed()) ? static_cast<bool>(m_props.m_output_quantized.get()) :
        (m_props.m_output_format_type.get() != HAILO_FORMAT_TYPE_FLOAT32);
    auto vstreams = m_net_group_handle->create_vstreams(m_props.m_network_name.get(), m_props.m_scheduling_algorithm.get(), m_output_formats,
-        input_quantized, output_quantized, m_props.m_input_format_type.get(), m_props.m_output_format_type.get());
+        m_props.m_input_format_type.get(), m_props.m_output_format_type.get());
    GST_CHECK_EXPECTED_AS_STATUS(vstreams, m_element, RESOURCE, "Creating vstreams failed, status = %d", status);
    GST_HAILOSEND(m_hailosend)->impl->set_input_vstreams(std::move(vstreams->first));
@@ -969,30 +886,9 @@ hailo_status HailoNetImpl::signal_was_flushed_event()
    return m_was_flushed_event->signal();
 }
 static bool do_versions_match(GstHailoNet *self)
 {
    hailo_version_t libhailort_version = {};
    auto status = hailo_get_library_version(&libhailort_version);
    if (HAILO_SUCCESS != status) {
        GST_ELEMENT_ERROR(self, RESOURCE, FAILED, ("Fetching libhailort version has failed! status = %d", status), (NULL));
        return false;
    }
    bool versions_match = ((HAILORT_MAJOR_VERSION == libhailort_version.major) &&
        (HAILORT_MINOR_VERSION == libhailort_version.minor) &&
        (HAILORT_REVISION_VERSION == libhailort_version.revision));
    if (!versions_match) {
        GST_ELEMENT_ERROR(self, RESOURCE, FAILED, ("libhailort version (%d.%d.%d) does not match gsthailonet version (%d.%d.%d)",
            libhailort_version.major, libhailort_version.minor, libhailort_version.revision,
            HAILORT_MAJOR_VERSION, HAILORT_MINOR_VERSION, HAILORT_REVISION_VERSION), (NULL));
        return false;
    }
    return true;
 }
 static void gst_hailonet_init(GstHailoNet *self)
 {
-    if (!do_versions_match(self)) {
+    if (!do_versions_match(GST_ELEMENT(self))) {
        return;
    }
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp
@@ -54,7 +54,7 @@ public:
    HailoNetProperties() : m_device_id(nullptr), m_hef_path(nullptr), m_network_name(nullptr), m_batch_size(HAILO_DEFAULT_BATCH_SIZE),
        m_is_active(false), m_device_count(0), m_vdevice_key(DEFAULT_VDEVICE_KEY), m_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN),
        m_scheduler_timeout_ms(HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS), m_scheduler_threshold(HAILO_DEFAULT_SCHEDULER_THRESHOLD), m_scheduler_priority(HAILO_SCHEDULER_PRIORITY_NORMAL),
-        m_multi_process_service(HAILO_DEFAULT_MULTI_PROCESS_SERVICE), m_input_quantized(true), m_output_quantized(true), m_input_format_type(HAILO_FORMAT_TYPE_AUTO),
+        m_multi_process_service(HAILO_DEFAULT_MULTI_PROCESS_SERVICE), m_input_format_type(HAILO_FORMAT_TYPE_AUTO),
        m_output_format_type(HAILO_FORMAT_TYPE_AUTO), m_nms_score_threshold(0), m_nms_iou_threshold(0), m_nms_max_proposals_per_class(0)
    {}
@@ -71,8 +71,6 @@ public:
    HailoElemProperty<guint32> m_scheduler_threshold;
    HailoElemProperty<guint8> m_scheduler_priority;
    HailoElemProperty<gboolean> m_multi_process_service;
    HailoElemProperty<gboolean> m_input_quantized;
    HailoElemProperty<gboolean> m_output_quantized;
    HailoElemProperty<hailo_format_type_t> m_input_format_type;
    HailoElemProperty<hailo_format_type_t> m_output_format_type;
    HailoElemProperty<gfloat> m_nms_score_threshold;
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.cpp
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.hpp
@@ -0,0 +1,170 @@
 /*
 * Copyright (c) 2021-2023 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */
 #ifndef _GST_HAILONET2_HPP_
 #define _GST_HAILONET2_HPP_
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wconversion"
 #include <gst/gst.h>
 #pragma GCC diagnostic pop
 #include <gst/base/gstqueuearray.h>
 #include <gst/video/gstvideofilter.h>
 #include "hailo/infer_model.hpp"
 #include "common.hpp"
 #include <queue>
 #include <condition_variable>
 #include <mutex>
 #include <thread>
 using namespace hailort;
 G_BEGIN_DECLS
 #define GST_TYPE_HAILO_ALLOCATOR (gst_hailo_allocator_get_type())
 #define GST_HAILO_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_HAILO_ALLOCATOR, GstHailoAllocator))
 #define GST_HAILO_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_HAILO_ALLOCATOR, GstHailoAllocatorClass))
 #define GST_IS_HAILO_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_HAILO_ALLOCATOR))
 #define GST_IS_HAILO_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_HAILO_ALLOCATOR))
 #define MIN_OUTPUTS_POOL_SIZE (MAX_GSTREAMER_BATCH_SIZE)
 #define MAX_OUTPUTS_POOL_SIZE (MAX_GSTREAMER_BATCH_SIZE * 4)
 struct GstHailoAllocator
 {
    GstAllocator parent;
    std::unordered_map<GstMemory*, Buffer> buffers;
 };
 struct GstHailoAllocatorClass
 {
    GstAllocatorClass parent;
 };
 GType gst_hailo_allocator_get_type(void);
 struct HailoNet2Properties final
 {
 public:
    HailoNet2Properties() : m_hef_path(nullptr), m_batch_size(HAILO_DEFAULT_BATCH_SIZE),
        m_device_id(nullptr), m_device_count(0), m_vdevice_group_id(nullptr), m_is_active(false),
        m_outputs_min_pool_size(MIN_OUTPUTS_POOL_SIZE), m_outputs_max_pool_size(MAX_OUTPUTS_POOL_SIZE),
        m_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN), m_scheduler_timeout_ms(HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS),
        m_scheduler_threshold(HAILO_DEFAULT_SCHEDULER_THRESHOLD), m_scheduler_priority(HAILO_SCHEDULER_PRIORITY_NORMAL),
        m_input_format_type(HAILO_FORMAT_TYPE_AUTO), m_output_format_type(HAILO_FORMAT_TYPE_AUTO),
        m_nms_score_threshold(0), m_nms_iou_threshold(0), m_nms_max_proposals_per_class(0), m_input_from_meta(false),
        m_multi_process_service(HAILO_DEFAULT_MULTI_PROCESS_SERVICE),
        m_vdevice_key(DEFAULT_VDEVICE_KEY)
    {}
    void free_strings()
    {
      if (m_hef_path.was_changed()) {
        g_free(m_hef_path.get());
      }
      if (m_device_id.was_changed()) {
        g_free(m_device_id.get());
      }
      if (m_vdevice_group_id.was_changed()) {
        g_free(m_vdevice_group_id.get());
      }
    }
    HailoElemProperty<gchar*> m_hef_path;
    HailoElemProperty<guint16> m_batch_size;
    HailoElemProperty<gchar*> m_device_id;
    HailoElemProperty<guint16> m_device_count;
    HailoElemProperty<gchar*> m_vdevice_group_id;
    HailoElemProperty<gboolean> m_is_active;
    HailoElemProperty<guint> m_outputs_min_pool_size;
    HailoElemProperty<guint> m_outputs_max_pool_size;
    HailoElemProperty<hailo_scheduling_algorithm_t> m_scheduling_algorithm;
    HailoElemProperty<guint32> m_scheduler_timeout_ms;
    HailoElemProperty<guint32> m_scheduler_threshold;
    HailoElemProperty<guint8> m_scheduler_priority;
    HailoElemProperty<hailo_format_type_t> m_input_format_type;
    HailoElemProperty<hailo_format_type_t> m_output_format_type;
    HailoElemProperty<gfloat> m_nms_score_threshold;
    HailoElemProperty<gfloat> m_nms_iou_threshold;
    HailoElemProperty<guint32> m_nms_max_proposals_per_class;
    HailoElemProperty<gboolean> m_input_from_meta;
    HailoElemProperty<gboolean> m_multi_process_service;
    // Deprecated
    HailoElemProperty<guint32> m_vdevice_key;
 };
 typedef struct _GstHailoNet2 {
  GstElement element;
  GstPad *sinkpad;
  GstPad *srcpad;
  GstQueueArray *input_queue;
  GstQueueArray *thread_queue;
  std::atomic_uint32_t buffers_in_thread_queue;
  std::thread thread;
  HailoNet2Properties props;
  GstCaps *input_caps;
  std::atomic_bool is_thread_running;
  std::atomic_bool has_got_eos;
  std::unique_ptr<VDevice> vdevice;
  std::shared_ptr<InferModel> infer_model;
  std::shared_ptr<ConfiguredInferModel> configured_infer_model;
  ConfiguredInferModel::Bindings infer_bindings;
  bool is_configured;
  std::mutex infer_mutex;
  bool has_called_activate;
  std::atomic_uint32_t ongoing_frames;
  std::condition_variable flush_cv;
  std::mutex flush_mutex;
  GstVideoInfo input_frame_info;
  GstHailoAllocator *allocator;
  std::unordered_map<std::string, GstBufferPool*> output_buffer_pools;
  std::unordered_map<std::string, hailo_vstream_info_t> output_vstream_infos;
  std::mutex input_queue_mutex;
  std::mutex thread_queue_mutex;
  std::condition_variable thread_cv;
 } GstHailoNet2;
 typedef struct _GstHailoNet2Class {
  GstElementClass parent_class;
 } GstHailoNet2Class;
 #define GST_TYPE_HAILONET2 (gst_hailonet2_get_type())
 #define GST_HAILONET2(obj) \
  (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_HAILONET2,GstHailoNet2))
 #define GST_HAILONET2_CLASS(klass) \
  (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_HAILONET2,GstHailoNet2Class))
 #define GST_IS_HAILONET2(obj) \
  (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_HAILONET2))
 #define GST_IS_HAILONET2_CLASS(klass) \
  (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_HAILONET2))
 GType gst_hailonet2_get_type (void);
 G_END_DECLS
 #endif /* _GST_HAILONET2_HPP_ */
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp
@@ -20,6 +20,7 @@
 #include "gsthailonet.hpp"
 #include "gsthailosend.hpp"
 #include "gsthailorecv.hpp"
 #include "gsthailonet2.hpp"
 #include "gsthailodevicestats.hpp"
 #include "metadata/tensor_meta.hpp"
@@ -31,7 +32,8 @@ static gboolean plugin_init(GstPlugin *plugin)
    return gst_element_register(plugin, "hailonet", GST_RANK_PRIMARY, GST_TYPE_HAILONET) &&
        gst_element_register(plugin, "hailodevicestats", GST_RANK_PRIMARY, GST_TYPE_HAILODEVICESTATS) &&
        gst_element_register(nullptr, "hailosend", GST_RANK_PRIMARY, GST_TYPE_HAILOSEND) &&
-        gst_element_register(nullptr, "hailorecv", GST_RANK_PRIMARY, GST_TYPE_HAILORECV);
+        gst_element_register(nullptr, "hailorecv", GST_RANK_PRIMARY, GST_TYPE_HAILORECV) &&
        gst_element_register(plugin, "hailonet2", GST_RANK_PRIMARY, GST_TYPE_HAILONET2);
 }
 GST_PLUGIN_DEFINE(GST_VERSION_MAJOR, GST_VERSION_MINOR, hailo, "hailo gstreamer plugin", plugin_init, VERSION,
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.cpp
@@ -28,13 +28,6 @@
 GST_DEBUG_CATEGORY_STATIC(gst_hailosend_debug_category);
 #define GST_CAT_DEFAULT gst_hailosend_debug_category
 #define RGB_FEATURES_SIZE (3)
 #define RGBA_FEATURES_SIZE (4)
 #define GRAY8_FEATURES_SIZE (1)
 #define YUY2_FEATURES_SIZE (2)
 #define NV12_FEATURES_SIZE (3)
 #define NV21_FEATURES_SIZE (3)
 #define I420_FEATURES_SIZE (3)
 static void gst_hailosend_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec);
 static void gst_hailosend_get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec);
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.hpp
@@ -53,7 +53,7 @@ inline const void *get_tensor_data(GstStructure *s) {
 * @brief This struct represents raw tensor metadata and contains instance of parent GstMeta and fields describing
 * inference result tensor. This metadata instances is attached to buffer by gvainference elements
 */
-struct GstHailoTensorMeta {
+struct HAILORTAPI GstHailoTensorMeta {
    GstMeta meta;              /**< parent meta object */
    hailo_vstream_info_t info; /**< struct that holds vstream info, e.g. shape, quant_info, layer_name etc... */
 };
@@ -62,14 +62,14 @@ struct GstHailoTensorMeta {
 * @brief This function registers, if needed, and returns GstMetaInfo for _GstHailoTensorMeta
 * @return GstMetaInfo* for registered type
 */
-const GstMetaInfo *gst_tensor_meta_get_info(void);
+HAILORTAPI const GstMetaInfo *gst_tensor_meta_get_info(void);
 /**
 * @brief This function registers, if needed, and returns a GType for api "GstHailoTensorMetaAPI" and associate it with
 * TENSOR_META_TAG tag
 * @return GType type
 */
-GType gst_tensor_meta_api_get_type(void);
+HAILORTAPI GType gst_tensor_meta_api_get_type(void);
 #define GST_TENSOR_META_API_TYPE (gst_tensor_meta_api_get_type())
 /**
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp
@@ -185,8 +185,8 @@ hailo_status NetworkGroupHandle::set_scheduler_priority(const char *network_name
 }
 Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> NetworkGroupHandle::create_vstreams(const char *network_name,
-    hailo_scheduling_algorithm_t scheduling_algorithm, const std::vector<hailo_format_with_name_t> &output_formats, bool input_quantized, 
+    hailo_scheduling_algorithm_t scheduling_algorithm, const std::vector<hailo_format_with_name_t> &output_formats,
-    bool output_quantized, hailo_format_type_t input_format_type, hailo_format_type_t output_format_type)
+    hailo_format_type_t input_format_type, hailo_format_type_t output_format_type)
 {
    GST_CHECK(nullptr != network_name, make_unexpected(HAILO_INVALID_ARGUMENT), m_element, RESOURCE, "Got nullptr in network name!");
@@ -200,7 +200,7 @@ Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> Netwo
    auto expected_input_vstream_infos = hef()->get_input_vstream_infos(network_name);
    GST_CHECK_EXPECTED(expected_input_vstream_infos, m_element, RESOURCE, "Failed getting input vstream infos, status = %d",
        expected_input_vstream_infos.status());
-    auto expected_input_params_map = m_cng->make_input_vstream_params(input_quantized, input_format_type, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS,
+    auto expected_input_params_map = m_cng->make_input_vstream_params({}, input_format_type, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS,
        HAILO_DEFAULT_VSTREAM_QUEUE_SIZE, m_network_name);
    GST_CHECK_EXPECTED(expected_input_params_map, m_element, RESOURCE, "Failed making input vstream params, status = %d",
        expected_input_params_map.status());
@@ -223,7 +223,7 @@ Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> Netwo
    GST_CHECK(1 == input_vstreams->size(), make_unexpected(HAILO_INVALID_OPERATION), m_element, RESOURCE,
        "hailosend element supports only HEFs with one input for now!");
-    auto output_params_map = m_cng->make_output_vstream_params(output_quantized, output_format_type, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS,
+    auto output_params_map = m_cng->make_output_vstream_params({}, output_format_type, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS,
        HAILO_DEFAULT_VSTREAM_QUEUE_SIZE, m_network_name);
    GST_CHECK_EXPECTED(output_params_map, m_element, RESOURCE, "Failed making output vstream params, status = %d",
        output_params_map.status());
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp
@@ -83,8 +83,8 @@ public:
        bool multi_process_service, const char *hef_path);
    hailo_status configure_network_group(const char *net_group_name, hailo_scheduling_algorithm_t scheduling_algorithm, uint16_t batch_size);
    Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> create_vstreams(const char *network_name,
-        hailo_scheduling_algorithm_t scheduling_algorithm, const std::vector<hailo_format_with_name_t> &output_formats, bool input_quantized, 
+        hailo_scheduling_algorithm_t scheduling_algorithm, const std::vector<hailo_format_with_name_t> &output_formats,
-        bool output_quantized, hailo_format_type_t input_format_type, hailo_format_type_t output_format_type);
+        hailo_format_type_t input_format_type, hailo_format_type_t output_format_type);
    hailo_status activate_network_group();
    Expected<bool> remove_network_group();
--- a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py
+++ b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py
@@ -889,6 +889,12 @@ class InferVStreams(object):
                        self._net_group_name)
                    output_tensor_info = output_buffers_info[output_name].output_tensor_info
                    shape, dtype = output_tensor_info
                    if (output_buffers_info[output_name].output_order == FormatOrder.HAILO_NMS_WITH_BYTE_MASK):
                        # Note: In python bindings the output data gets converted to py::array with dtype=dtype.
                        #   In `HAILO_NMS_WITH_BYTE_MASK` we would like to get the data as uint8 and convert it by it's format.
                        #   Therefore we need to get it as uint8 instead of float32 and adjust the shape size.
                        dtype = numpy.uint8
                        shape[0] = shape[0] * 4
                    output_buffers[output_name] = numpy.empty([batch_size] + list(shape), dtype=dtype)
        return output_buffers, output_buffers_info
@@ -1061,49 +1067,60 @@ class InferVStreams(object):
        return False
-class HailoDetectionBox(object):
+class HailoDetection(object):
-# TODO: HRT-11492 - Add documentation to class and functions
+    """Represents Hailo detection information"""
-    def __init__(self, bbox, class_id, mask_size, mask):
+    def __init__(self, detection):
-        self._bbox = bbox
+        self._y_min = detection.box.y_min
-        self._mask_size = mask_size
+        self._x_min = detection.box.x_min
-        self._mask = mask
+        self._y_max = detection.box.y_max
-        self._class_id = class_id
+        self._x_max = detection.box.x_max
-
+        self._score = detection.score
-    @property
+        self._class_id = detection.class_id
-    def bbox(self):
+        self._mask = detection.mask()
        return self._bbox
    @property
    def y_min(self):
-        return self._bbox[0]
+        """Get detection's box y_min coordinate"""
        return self._y_min
    @property
    def x_min(self):
-        return self._bbox[1]
+        """Get detection's box x_min coordinate"""
        return self._x_min
    @property
    def y_max(self):
-        return self._bbox[2]
+        """Get detection's box y_max coordinate"""
        return self._y_max
    @property
    def x_max(self):
-        return self._bbox[3]
+        """Get detection's box x_max coordinate"""
        return self._x_max
    @property
    def score(self):
-        return self._bbox[4]
+        """Get detection's score"""
        return self._score
    @property
    def class_id(self):
        """Get detection's class_id"""
        return self._class_id
    @property
    def mask_size(self):
        return self._mask_size
    @property
    def mask(self):
        """Byte Mask:
        The mask is a binary mask that defines a region of interest (ROI) of the image.
        Mask pixel values of 1 indicate image pixels that belong to the ROI.
        Mask pixel values of 0 indicate image pixels that are part of the background.
        The size of the mask is the size of the box, in the original input image's dimensions.
        Mask width = ceil((x_max - x_min) * image_width)
        Mask height = ceil((y_max - y_min) * image_height)
        First pixel represents the pixel (x_min * image_width, y_min * image_height) in the original input image.
        """
        return self._mask
 class HailoRTTransformUtils(object):
@@ -1156,15 +1173,6 @@ class HailoRTTransformUtils(object):
                                                       "Please compile again or provide a list of quant_infos.")
            _pyhailort.dequantize_output_buffer_in_place(raw_buffer, src_format_type, dst_format_type, elements_count, quant_info)
    @staticmethod
    def is_qp_valid(quant_info):
        """Returns if quant_info is valid.
        Args:
            quant_info (:class:`~hailo_platform.pyhailort.pyhailort.QuantInfo`): The quantization info.
        """
        return _pyhailort.is_qp_valid(quant_info)
    @staticmethod
    def quantize_input_buffer(src_buffer, dst_buffer, elements_count, quant_info):
        """Quantize the data in input buffer `src_buffer` and output it to the buffer `dst_buffer`
@@ -1233,116 +1241,80 @@ class HailoRTTransformUtils(object):
    def _output_raw_buffer_to_nms_with_byte_mask_format(raw_output_buffer, number_of_classes, batch_size, image_height, image_width,
            max_bboxes_per_class, output_dtype, is_tf_format=False):
        if is_tf_format:
-            if os.environ.get('HAILO_TF_FORMAT_INTERNAL'):
+            return HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_tf_format(raw_output_buffer, number_of_classes,
-                return HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_tf_format(raw_output_buffer, number_of_classes,
+                batch_size, image_height, image_width, max_bboxes_per_class, output_dtype)
                    batch_size, image_height, image_width, max_bboxes_per_class, output_dtype)
            else:
                raise HailoRTException("TF format is not supported with HAILO_NMS_WITH_BYTE_MASK format order")
        else:
-            return HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_hailo_format(raw_output_buffer, number_of_classes)
+            return HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_hailo_format(raw_output_buffer)
    @staticmethod
-    def _output_raw_buffer_to_nms_with_byte_mask_hailo_format(raw_output_buffer, number_of_classes):
+    def _output_raw_buffer_to_nms_with_byte_mask_hailo_format(raw_output_buffer):
        converted_output_buffer = []
        for frame in raw_output_buffer:
            converted_output_buffer.append(
-                HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_hailo_format_single_frame(frame, number_of_classes))
+                HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_hailo_format_single_frame(frame))
        return converted_output_buffer
    @staticmethod
-    def _output_raw_buffer_to_nms_with_byte_mask_hailo_format_single_frame(raw_output_buffer, number_of_classes):
+    def _output_raw_buffer_to_nms_with_byte_mask_hailo_format_single_frame(raw_output_buffer):
-        offset = 0
+        detections = _pyhailort.convert_nms_with_byte_mask_buffer_to_detections(raw_output_buffer)
        converted_output_frame = []
-        for class_i in range(number_of_classes):
+        for detection in detections:
-            class_bboxes_amount = int(raw_output_buffer[offset])
+            converted_output_frame.append(HailoDetection(detection))
            offset += 1
            classes_boxes = []
            if class_bboxes_amount != 0:
                for bbox_i in range(class_bboxes_amount):
                    bbox = raw_output_buffer[offset : offset + BBOX_PARAMS]
                    offset += BBOX_PARAMS
                    bbox_mask_size_in_bytes = raw_output_buffer[offset]
                    offset += 1
                    bbox_mask_size = int(bbox_mask_size_in_bytes / 4)
                    bbox_mask = raw_output_buffer[offset : (offset + bbox_mask_size)]
                    offset += bbox_mask_size
                    hailo_bbox = HailoDetectionBox(bbox, class_i, bbox_mask_size_in_bytes, bbox_mask)
                    classes_boxes.append(hailo_bbox)
            converted_output_frame.append(classes_boxes)
        return converted_output_frame
    @staticmethod
    def _output_raw_buffer_to_nms_with_byte_mask_tf_format(raw_output_buffer, number_of_classes, batch_size, image_height, image_width,
            max_bboxes_per_class, output_dtype):
-        offset = 0
+
-        # The + 1 is for the extra row containing the bbox coordinates, score and class_id
+        BBOX_WITH_MASK_PARAMS = 6 # 4 coordinates + score + class_idx
-        output_height = image_height + 1
+        BBOX_WITH_MASK_AXIS = 2
        CLASSES_AXIS = 1
        # We create the tf_format buffer with reversed max_bboxes_per_class/features for performance optimization
-        converted_output_buffer = numpy.empty([batch_size, max_bboxes_per_class, output_height, image_width], dtype=output_dtype)
+        converted_output_buffer = numpy.empty([batch_size, max_bboxes_per_class, (image_height * image_width + BBOX_WITH_MASK_PARAMS)], dtype=output_dtype)
        for frame_idx in range(len(raw_output_buffer)):
-            offset = HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_tf_format_single_frame(
+            HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_tf_format_single_frame(
                raw_output_buffer[frame_idx], converted_output_buffer[frame_idx], number_of_classes, max_bboxes_per_class,
-                image_height, image_width, offset)
+                image_height, image_width)
-        converted_output_buffer = numpy.moveaxis(converted_output_buffer, 1, 3)
+        converted_output_buffer = numpy.moveaxis(converted_output_buffer, CLASSES_AXIS, BBOX_WITH_MASK_AXIS)
        converted_output_buffer = numpy.expand_dims(converted_output_buffer, 1)
        return converted_output_buffer
    @staticmethod
    def _output_raw_buffer_to_nms_with_byte_mask_tf_format_single_frame(raw_output_buffer, converted_output_frame, number_of_classes,
-        max_boxes, image_height, image_width, offset):
+        max_boxes, image_height, image_width):
-        detections = []
+        detections = _pyhailort.convert_nms_with_byte_mask_buffer_to_detections(raw_output_buffer)
-        for class_i in range(number_of_classes):
+        bbox_idx = 0
-            class_bboxes_amount = int(raw_output_buffer[offset])
+        for detection in detections:
-            offset += 1
+            if (bbox_idx >= max_boxes):
                return
            bbox = numpy.array([detection.box.y_min, detection.box.x_min, detection.box.y_max, detection.box.x_max,
                    detection.score, detection.class_id])
            bbox_mask = detection.mask()
-            if class_bboxes_amount != 0:
+            y_min = numpy.ceil(bbox[0] * image_height)
-                for bbox_i in range(class_bboxes_amount):
+            x_min = numpy.ceil(bbox[1] * image_width)
-                    bbox = raw_output_buffer[offset : offset + BBOX_PARAMS]
+            bbox_width = numpy.ceil((bbox[3] - bbox[1]) * image_width)
-                    offset += BBOX_PARAMS
+            resized_mask = numpy.zeros(image_height*image_width, dtype="uint8")
-                    bbox_mask_size_in_bytes = raw_output_buffer[offset]
+            for i in range(bbox_mask.size):
-                    offset += 1
+                if (bbox_mask[i] == 1):
-                    bbox_mask_size = int(bbox_mask_size_in_bytes // 4)
+                    x = int(x_min + (i % bbox_width))
                    y = int(y_min + (i // bbox_width))
                    if (x >= image_width):
                        x = image_width - 1
                    if ( y >= image_height):
                        y = image_height - 1
                    idx = (image_width * y) + x
                    resized_mask[idx] = 1
-                    bbox_mask = raw_output_buffer[offset : (offset + bbox_mask_size)]
+            bbox_with_mask = numpy.append(bbox, resized_mask)
-                    offset += bbox_mask_size
+            converted_output_frame[bbox_idx] = bbox_with_mask
            bbox_idx += 1
                    y_min = bbox[0] * image_height
                    x_min = bbox[1] * image_width
                    bbox_width = round((bbox[3] - bbox[1]) * image_width)
                    resized_mask = numpy.empty([image_height, image_width])
                    for i in range(bbox_mask_size):
                        if (bbox_mask[i] == 1):
                            x = int(x_min + (i % bbox_width))
                            y = int(y_min + (i // bbox_width))
                            if (x >= image_width):
                                x = image_width - 1
                            if ( y >= image_height):
                                y = image_height - 1
                            resized_mask[y][x] = 1
                    padding = image_width - len(bbox)
                    bbox_padded = numpy.pad(bbox, pad_width=(0, padding), mode='constant')
                    bbox_padded[len(bbox)] = class_i
                    converted_detection = numpy.append(resized_mask ,[bbox_padded], axis=0)
                    detections.append((bbox[4], converted_detection))
        detections.sort(key=lambda tup: tup[0], reverse=True)
        for detection_idx in range(len(detections)):
            if (detection_idx >= max_boxes):
                return offset
            converted_output_frame[detection_idx] = detections[detection_idx][1]
        return offset
    @staticmethod
    def _get_format_type(dtype):
@@ -1515,7 +1487,7 @@ class HailoFormatFlags(_pyhailort.FormatFlags):
 SUPPORTED_PROTOCOL_VERSION = 2
 SUPPORTED_FW_MAJOR = 4
-SUPPORTED_FW_MINOR = 15
+SUPPORTED_FW_MINOR = 16
 SUPPORTED_FW_REVISION = 0
 MEGA_MULTIPLIER = 1000.0 * 1000.0
@@ -2706,8 +2678,7 @@ class InputVStreamParams(object):
        Args:
            configured_network (:class:`ConfiguredNetwork`): The configured network group for which
                the params are created.
-            quantized (bool): Deprecated parameter that will be ignored. Determine whether to quantize (scale)
+            quantized: Unused.
                the data will be decided by the src-data and dst-data types.
            format_type (:class:`~hailo_platform.pyhailort.pyhailort.FormatType`): The
                default format type of the data for all input virtual streams.
                The default is :attr:`~hailo_platform.pyhailort.pyhailort.FormatType.AUTO`,
@@ -2725,16 +2696,13 @@ class InputVStreamParams(object):
        """
        if format_type is None:
            format_type = FormatType.AUTO
        if quantized is None:
            quantized = format_type != FormatType.FLOAT32
        if timeout_ms is None:
            timeout_ms = DEFAULT_VSTREAM_TIMEOUT_MS
        if queue_size is None:
            queue_size = DEFAULT_VSTREAM_QUEUE_SIZE
        name = network_name if network_name is not None else ""
        with ExceptionWrapper():
-            return configured_network._configured_network.make_input_vstream_params(name, quantized,
+            return configured_network._configured_network.make_input_vstream_params(name, format_type, timeout_ms, queue_size)
                format_type, timeout_ms, queue_size)
    @staticmethod
    def make_from_network_group(configured_network, quantized=None, format_type=None, timeout_ms=None, queue_size=None, network_name=None):
@@ -2744,8 +2712,7 @@ class InputVStreamParams(object):
        Args:
            configured_network (:class:`ConfiguredNetwork`): The configured network group for which
                the params are created.
-            quantized (bool): Deprecated parameter that will be ignored. Determine whether to quantize (scale)
+            quantized: Unused.
                the data will be decided by the src-data and dst-data types.
            format_type (:class:`~hailo_platform.pyhailort.pyhailort.FormatType`): The
                default format type of the data for all input virtual streams.
                The default is :attr:`~hailo_platform.pyhailort.pyhailort.FormatType.AUTO`,
@@ -2761,7 +2728,8 @@ class InputVStreamParams(object):
            dict: The created virtual streams params. The keys are the vstreams names. The values are the
            params.
        """
-        return InputVStreamParams.make(configured_network, quantized, format_type, timeout_ms, queue_size, network_name)
+        return InputVStreamParams.make(configured_network=configured_network, format_type=format_type, timeout_ms=timeout_ms,
            queue_size=queue_size, network_name=network_name)
 class OutputVStreamParams(object):
@@ -2775,8 +2743,7 @@ class OutputVStreamParams(object):
        Args:
            configured_network (:class:`ConfiguredNetwork`): The configured network group for which
                the params are created.
-            quantized (bool): Deprecated parameter that will be ignored. Determine whether to de-quantize (rescale)
+            quantized: Unused.
                the data will be decided by the src-data and dst-data types.
            format_type (:class:`~hailo_platform.pyhailort.pyhailort.FormatType`): The
                default format type of the data for all output virtual streams.
                The default is :attr:`~hailo_platform.pyhailort.pyhailort.FormatType.AUTO`,
@@ -2794,16 +2761,13 @@ class OutputVStreamParams(object):
        """
        if format_type is None:
            format_type = FormatType.AUTO
        if quantized is None:
            quantized = format_type != FormatType.FLOAT32
        if timeout_ms is None:
            timeout_ms = DEFAULT_VSTREAM_TIMEOUT_MS
        if queue_size is None:
            queue_size = DEFAULT_VSTREAM_QUEUE_SIZE
        name = network_name if network_name is not None else ""
        with ExceptionWrapper():
-            return configured_network._configured_network.make_output_vstream_params(name, quantized,
+            return configured_network._configured_network.make_output_vstream_params(name, format_type, timeout_ms, queue_size)
                format_type, timeout_ms, queue_size)
    @staticmethod
    def make_from_network_group(configured_network, quantized=None, format_type=None, timeout_ms=None, queue_size=None, network_name=None):
@@ -2813,8 +2777,7 @@ class OutputVStreamParams(object):
        Args:
            configured_network (:class:`ConfiguredNetwork`): The configured network group for which
                the params are created.
-            quantized (bool): Deprecated parameter that will be ignored. Determine whether to de-quantize (rescale)
+            quantized: Unused.
                the data will be decided by the src-data and dst-data types.
            format_type (:class:`~hailo_platform.pyhailort.pyhailort.FormatType`): The
                default format type of the data for all output virtual streams.
                The default is :attr:`~hailo_platform.pyhailort.pyhailort.FormatType.AUTO`,
@@ -2830,7 +2793,8 @@ class OutputVStreamParams(object):
            dict: The created virtual streams params. The keys are the vstreams names. The values are the
            params.
        """
-        return OutputVStreamParams.make(configured_network, quantized, format_type, timeout_ms, queue_size, network_name)
+        return OutputVStreamParams.make(configured_network=configured_network, format_type=format_type, timeout_ms=timeout_ms,
            queue_size=queue_size, network_name=network_name)
    @staticmethod
    def make_groups(configured_network, quantized=None, format_type=None, timeout_ms=None, queue_size=None):
@@ -2840,8 +2804,7 @@ class OutputVStreamParams(object):
        Args:
            configured_network (:class:`ConfiguredNetwork`): The configured network group for which
                the params are created.
-            quantized (bool): Deprecated parameter that will be ignored. Determine whether to de-quantize (rescale)
+            quantized: Unused.
                the data will be decided by the src-data and dst-data types.
            format_type (:class:`~hailo_platform.pyhailort.pyhailort.FormatType`): The
                default format type of the data for all output virtual streams.
                The default is :attr:`~hailo_platform.pyhailort.pyhailort.FormatType.AUTO`,
@@ -2855,7 +2818,7 @@ class OutputVStreamParams(object):
            list of dicts: Each element in the list represent a group of params, where the keys are the vstreams names, and the values are the
            params. The params groups are splitted with respect to their underlying streams for multi process usges.
        """
-        all_params = OutputVStreamParams.make(configured_network, quantized=quantized, format_type=format_type, timeout_ms=timeout_ms, queue_size=queue_size)
+        all_params = OutputVStreamParams.make(configured_network=configured_network, format_type=format_type, timeout_ms=timeout_ms, queue_size=queue_size)
        low_level_streams_names = [stream_info.name for stream_info in configured_network.get_output_stream_infos()]
        stream_name_to_vstream_names = {stream_name: configured_network.get_vstream_names_from_stream_name(stream_name) for stream_name in low_level_streams_names}
        results = []
@@ -2994,7 +2957,7 @@ class OutputLayerUtils(object):
        if self._is_nms:
            self._quantized_empty_bbox = numpy.asarray([0] * BBOX_PARAMS, dtype=self.output_dtype)
-            if not (self._user_buffer_format.flags & _pyhailort.FormatFlags.QUANTIZED):
+            if self.output_dtype == numpy.float32:
                HailoRTTransformUtils.dequantize_output_buffer_in_place(self._quantized_empty_bbox, self.output_dtype,
                    BBOX_PARAMS, self._vstream_info.quant_info)
--- a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_0_Inference_Tutorial.ipynb
+++ b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_0_Inference_Tutorial.ipynb
@@ -62,10 +62,8 @@
    "network_group_params = network_group.create_params()\n",
    "\n",
    "# Create input and output virtual streams params\n",
-    "# Quantized argument signifies whether or not the incoming data is already quantized.\n",
+    "input_vstreams_params = InputVStreamParams.make(network_group, format_type=FormatType.FLOAT32)\n",
-    "# Data is quantized by HailoRT if and only if quantized == False .\n",
+    "output_vstreams_params = OutputVStreamParams.make(network_group, format_type=FormatType.UINT8)\n",
    "input_vstreams_params = InputVStreamParams.make(network_group, quantized=False, format_type=FormatType.FLOAT32)\n",
    "output_vstreams_params = OutputVStreamParams.make(network_group, quantized=True, format_type=FormatType.UINT8)\n",
    "\n",
    "# Define dataset params\n",
    "input_vstream_info = hef.get_input_vstream_infos()[0]\n",
@@ -108,6 +106,8 @@
    "\n",
    "This section shows how to run streaming inference using multiple processes in Python.\n",
    "\n",
    "Note: This flow is not supported on Windows.\n",
    "\n",
    "We will not use infer. Instead we will use a send and receive model.\n",
    "The send function and the receive function will run in different processes."
   ]
--- a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Inference_Tutorial_Multi_Process_Service.ipynb
+++ b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Inference_Tutorial_Multi_Process_Service.ipynb
@@ -5,7 +5,7 @@
   "metadata": {},
   "source": [
    "\n",
-    "# Python inference tutorial - Multi Process Service and Model Scheduler\n",
+    "# Python Inference Tutorial - Multi Process Service and Model Scheduler\n",
    "\n",
    "This tutorial will walk you through the inference process using The Model Scheduler.\n",
    "\n",
@@ -77,10 +77,8 @@
    "        network_group = network_groups[0]\n",
    "\n",
    "        # Create input and output virtual streams params\n",
-    "        # Quantized argument signifies whether or not the incoming data is already quantized.\n",
+    "        input_vstreams_params = InputVStreamParams.make(network_group, format_type=FormatType.FLOAT32)\n",
-    "        # Data is quantized by HailoRT if and only if quantized == False.\n",
+    "        output_vstreams_params = OutputVStreamParams.make(network_group, format_type=FormatType.UINT8)\n",
    "        input_vstreams_params = InputVStreamParams.make(network_group, quantized=False, format_type=FormatType.FLOAT32)\n",
    "        output_vstreams_params = OutputVStreamParams.make(network_group, quantized=True, format_type=FormatType.UINT8)\n",
    "\n",
    "        # Define dataset params\n",
    "        input_vstream_info = hef.get_input_vstream_infos()[0]\n",
--- a/hailort/libhailort/bindings/python/platform/setup.py
+++ b/hailort/libhailort/bindings/python/platform/setup.py
@@ -69,6 +69,6 @@ if __name__ == "__main__":
            "linux_aarch64",
        ],
        url="https://hailo.ai/",
-        version="4.15.0",
+        version="4.16.0",
        zip_safe=False,
    )
--- a/hailort/libhailort/bindings/python/src/CMakeLists.txt
+++ b/hailort/libhailort/bindings/python/src/CMakeLists.txt
@@ -49,7 +49,7 @@ set_target_properties(_pyhailort PROPERTIES
    # VISIBILITY_INLINES_HIDDEN YES
 )
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 target_link_libraries(_pyhailort PRIVATE HailoRT::libhailort)
 if(WIN32)
--- a/hailort/libhailort/bindings/python/src/hef_api.cpp
+++ b/hailort/libhailort/bindings/python/src/hef_api.cpp
@@ -111,22 +111,6 @@ py::list HefWrapper::get_vstream_names_from_stream_name(const std::string &strea
    return py::cast(results.release());
 }
 py::dict HefWrapper::get_input_vstreams_params(const std::string &name, bool quantized, hailo_format_type_t format_type,
    uint32_t timeout_ms, uint32_t queue_size)
 {
    auto result = hef->make_input_vstream_params(name, quantized, format_type, timeout_ms, queue_size);
    VALIDATE_EXPECTED(result);
    return py::cast(result.value());
 }
 py::dict HefWrapper::get_output_vstreams_params(const std::string &name, bool quantized, hailo_format_type_t format_type,
    uint32_t timeout_ms, uint32_t queue_size)
 {
    auto result = hef->make_output_vstream_params(name, quantized, format_type, timeout_ms, queue_size);
    VALIDATE_EXPECTED(result);
    return py::cast(result.value());
 }
 py::list HefWrapper::get_input_vstream_infos(const std::string &name)
 {
    auto result = hef->get_input_vstream_infos(name);
@@ -215,8 +199,6 @@ void HefWrapper::initialize_python_module(py::module &m)
        .def("get_udp_rates_dict", &HefWrapper::get_udp_rates_dict)
        .def("create_configure_params", &HefWrapper::create_configure_params)
        .def("create_configure_params_mipi_input", &HefWrapper::create_configure_params_mipi_input)
        .def("get_input_vstreams_params", &HefWrapper::get_input_vstreams_params)
        .def("get_output_vstreams_params", &HefWrapper::get_output_vstreams_params)
        .def("get_input_vstream_infos", &HefWrapper::get_input_vstream_infos)
        .def("get_output_vstream_infos", &HefWrapper::get_output_vstream_infos)
        .def("get_all_vstream_infos", &HefWrapper::get_all_vstream_infos)
--- a/hailort/libhailort/bindings/python/src/hef_api.hpp
+++ b/hailort/libhailort/bindings/python/src/hef_api.hpp
@@ -44,10 +44,6 @@ public:
    std::string get_vstream_name_from_original_name(const std::string &original_name, const std::string &net_group_name);
    py::list get_stream_names_from_vstream_name(const std::string &vstream_name, const std::string &net_group_name);
    py::list get_vstream_names_from_stream_name(const std::string &stream_name, const std::string &net_group_name);
    py::dict get_input_vstreams_params(const std::string &name, bool quantized, hailo_format_type_t format_type,
        uint32_t timeout_ms, uint32_t queue_size);
    py::dict get_output_vstreams_params(const std::string &name, bool quantized, hailo_format_type_t format_type,
        uint32_t timeout_ms, uint32_t queue_size);
    py::list get_input_vstream_infos(const std::string &name);
    py::list get_output_vstream_infos(const std::string &name);
    py::list get_all_vstream_infos(const std::string &name);
--- a/hailort/libhailort/bindings/python/src/network_group_api.hpp
+++ b/hailort/libhailort/bindings/python/src/network_group_api.hpp
@@ -223,18 +223,18 @@ public:
        return py::cast(result.release());
    }
-    auto make_input_vstream_params(const std::string &name, bool quantized, hailo_format_type_t format_type,
+    auto make_input_vstream_params(const std::string &name, hailo_format_type_t format_type,
        uint32_t timeout_ms, uint32_t queue_size)
    {
-        auto result = get().make_input_vstream_params(quantized, format_type, timeout_ms, queue_size, name);
+        auto result = get().make_input_vstream_params({}, format_type, timeout_ms, queue_size, name);
        VALIDATE_EXPECTED(result);
        return py::cast(result.release());
    }
-    auto make_output_vstream_params(const std::string &name, bool quantized, hailo_format_type_t format_type,
+    auto make_output_vstream_params(const std::string &name, hailo_format_type_t format_type,
        uint32_t timeout_ms, uint32_t queue_size)
    {
-        auto result = get().make_output_vstream_params(quantized, format_type, timeout_ms, queue_size, name);
+        auto result = get().make_output_vstream_params({}, format_type, timeout_ms, queue_size, name);
        VALIDATE_EXPECTED(result);
        return py::cast(result.release());
    }
--- a/hailort/libhailort/bindings/python/src/pyhailort.cpp
+++ b/hailort/libhailort/bindings/python/src/pyhailort.cpp
@@ -137,6 +137,22 @@ public:
    }
 };
 std::vector<hailo_detection_with_byte_mask_t> convert_nms_with_byte_mask_buffer_to_detections(py::array src_buffer)
 {
    std::vector<hailo_detection_with_byte_mask_t> detections;
    uint8_t *src_ptr = static_cast<uint8_t*>(src_buffer.mutable_data());
    uint16_t detections_count = *(uint16_t*)src_ptr;
    detections.reserve(detections_count);
    size_t buffer_offset = sizeof(uint16_t);
    for (size_t i = 0; i < detections_count; i++) {
        hailo_detection_with_byte_mask_t detection = *(hailo_detection_with_byte_mask_t*)(src_ptr + buffer_offset);
        buffer_offset += sizeof(hailo_detection_with_byte_mask_t) + detection.mask_size;
        detections.emplace_back(std::move(detection));
    }
    return detections;
 }
 static void validate_versions_match()
 {
    hailo_version_t libhailort_version = {};
@@ -162,6 +178,7 @@ PYBIND11_MODULE(_pyhailort, m) {
    validate_versions_match();
    m.def("get_status_message", &get_status_message);
    m.def("convert_nms_with_byte_mask_buffer_to_detections", &convert_nms_with_byte_mask_buffer_to_detections);
    m.def("dequantize_output_buffer_in_place", &QuantizationBindings::dequantize_output_buffer_in_place);
    m.def("dequantize_output_buffer", &QuantizationBindings::dequantize_output_buffer);
    m.def("quantize_input_buffer", &QuantizationBindings::quantize_input_buffer);
@@ -207,12 +224,31 @@ PYBIND11_MODULE(_pyhailort, m) {
        .def(py::pickle(&PowerMeasurementData::get_state, &PowerMeasurementData::set_state))
        ;
    py::class_<hailo_rectangle_t>(m, "HailoRectangle")
        .def_readonly("y_min", &hailo_rectangle_t::y_min)
        .def_readonly("x_min", &hailo_rectangle_t::x_min)
        .def_readonly("y_max", &hailo_rectangle_t::y_max)
        .def_readonly("x_max", &hailo_rectangle_t::x_max)
        ;
    py::class_<hailo_detection_with_byte_mask_t>(m, "HailoDetectionWithByteMask")
        .def_readonly("box", &hailo_detection_with_byte_mask_t::box)
        .def_readonly("mask_size", &hailo_detection_with_byte_mask_t::mask_size)
        .def_readonly("score", &hailo_detection_with_byte_mask_t::score)
        .def_readonly("class_id", &hailo_detection_with_byte_mask_t::class_id)
        .def("mask", [](const hailo_detection_with_byte_mask_t &detection) -> py::array {
            auto shape = *py::array::ShapeContainer({detection.mask_size});
            return py::array(py::dtype("uint8"), shape, detection.mask);
        })
        ;
    py::enum_<hailo_device_architecture_t>(m, "DeviceArchitecture")
        .value("HAILO8_A0", HAILO_ARCH_HAILO8_A0)
        .value("HAILO8", HAILO_ARCH_HAILO8)
        .value("HAILO8L", HAILO_ARCH_HAILO8L)
        .value("HAILO15H", HAILO_ARCH_HAILO15H)
        .value("PLUTO", HAILO_ARCH_PLUTO)
        .value("HAILO15M", HAILO_ARCH_HAILO15M)
    ;
    /* TODO: SDK-15648 */
@@ -524,7 +560,6 @@ PYBIND11_MODULE(_pyhailort, m) {
    py::enum_<hailo_format_flags_t>(m, "FormatFlags", py::arithmetic())
        .value("NONE", HAILO_FORMAT_FLAGS_NONE)
        .value("QUANTIZED", HAILO_FORMAT_FLAGS_QUANTIZED)
        .value("TRANSPOSED", HAILO_FORMAT_FLAGS_TRANSPOSED)
        .value("HOST_ARGMAX", HAILO_FORMAT_FLAGS_HOST_ARGMAX)
        ;
--- a/hailort/libhailort/cmake/toolchains/toolchains.yaml
+++ b/hailort/libhailort/cmake/toolchains/toolchains.yaml
@@ -1,44 +0,0 @@
 -   name: linux.x86_64
    required_packages:
    -   gcc
    -   g++
    python_versions:
    -   version: '3.8'
        installation: deb
        package_name: python3.8-dev
    -   version: '3.9'
        installation: deb
        package_name: python3.9-dev
    -   version: '3.10'
        installation: deb
        package_name: python3.10-dev
 -   name: linux.aarch64
    required_packages:
    -   gcc-aarch64-linux-gnu
    -   g++-aarch64-linux-gnu
    python_versions:
    -   version: '3.8'
        installation: manual
        package_name: https://launchpad.net/ubuntu/+source/python3.8/3.8.2-1ubuntu1/+build/18834117/+files/libpython3.8-dev_3.8.2-1ubuntu1_arm64.deb
        package_dest: /usr/include/aarch64-linux-gnu
    -   version: '3.9'
        installation: manual
        package_name: https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa/+files/libpython3.9-dev_3.9.18-1+focal1_arm64.deb
        package_dest: /usr/include/aarch64-linux-gnu
    -   version: '3.10'
        installation: manual
        package_name: https://launchpadlibrarian.net/569418529/libpython3.10-dev_3.10.0-5_arm64.deb
        package_dest: /usr/include/aarch64-linux-gnu
 -   name: linux.armv7l
    required_packages:
    -   gcc-arm-linux-gnueabi
    -   g++-arm-linux-gnueabi
 -   name: linux.armv7lhf
    required_packages:
    -   gcc-arm-linux-gnueabihf
    -   g++-arm-linux-gnueabihf
 -   name: linux.android28-arm64-v8a
    android_ndk:
        version_name: "android-ndk-r21d"
        file: "https://dl.google.com/android/repository/android-ndk-r21d-linux-x86_64.zip"
 -   name: windows.x86_64
--- a/hailort/libhailort/examples/README.md
+++ b/hailort/libhailort/examples/README.md
@@ -54,8 +54,11 @@ The following examples are provided, demonstrating the HailoRT API:
    - The main thread will stop the async operations and the threads by deactivating the network group.
  - `multi_process_example` - Demonstrates how to work with HailoRT multi-process service and using the HailoRT Model Scheduler for network groups switching.
  Using the script `multi_process_example.sh` / `multi_process_example.ps1` one can specify the number of processes to run each hef, see `multi_process_example.sh -h`  / `multi_process_example.ps1 -h` for more information.
    - For Windows, in case of restricted execution policy, either change the policy, or run the script with "PowerShell -NoProfile -ExecutionPolicy Bypass -File <FilePath>"
  - `notification_callback_example` - Demonstrates how to work with notification callbacks, same as `notification_callback_example` C example.
 You can find more details about each example in the HailoRT user guide.
  - `async_infer_example` - Basic asynchronous inference of a shortcut network, uses HailoRT C++ api.
  - `async_infer_functionality_example` - More advanced asynchronous inference of a multiple input and output model, uses HailoRT C++ api.
 ## Compiling with CMake
 Examples are configured and compiled using the following commands:
 ```sh
--- a/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 SET_SOURCE_FILES_PROPERTIES(data_quantization_example.c PROPERTIES LANGUAGE C)
--- a/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 SET_SOURCE_FILES_PROPERTIES(infer_pipeline_example.c PROPERTIES LANGUAGE C)
--- a/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c
+++ b/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c
@@ -101,6 +101,7 @@ int main(int argc, char **argv)
    hailo_activated_network_group activated_network_group = NULL;
    size_t vstreams_infos_size = MAX_EDGE_LAYERS;
    hailo_vstream_info_t vstreams_infos[MAX_EDGE_LAYERS] = {0};
    bool unused = {0};
    parse_arguments(argc, argv, &interface_name);
@@ -123,11 +124,11 @@ int main(int argc, char **argv)
    REQUIRE_ACTION(network_group_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef, 
        "Invalid network group size");
-    status = hailo_make_input_vstream_params(network_group, true, HAILO_FORMAT_TYPE_AUTO,
+    status = hailo_make_input_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_AUTO,
        input_vstream_params, &input_vstreams_size);
    REQUIRE_SUCCESS(status, l_release_hef, "Failed making input virtual stream params");
-    status = hailo_make_output_vstream_params(network_group, true, HAILO_FORMAT_TYPE_AUTO,
+    status = hailo_make_output_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_AUTO,
        output_vstream_params, &output_vstreams_size);
    REQUIRE_SUCCESS(status, l_release_hef, "Failed making output virtual stream params");
--- a/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 SET_SOURCE_FILES_PROPERTIES(multi_device_example.c PROPERTIES LANGUAGE C)
--- a/hailort/libhailort/examples/c/multi_device_example/multi_device_example.c
+++ b/hailort/libhailort/examples/c/multi_device_example/multi_device_example.c
@@ -143,6 +143,7 @@ int main()
    size_t output_vstreams_size = MAX_EDGE_LAYERS;
    hailo_input_vstream input_vstreams[MAX_EDGE_LAYERS] = {NULL};
    hailo_output_vstream output_vstreams[MAX_EDGE_LAYERS] = {NULL};
    bool unused = {0};
    status = hailo_scan_devices(NULL, device_ids, &actual_count);
    REQUIRE_SUCCESS(status, l_exit, "Failed to scan devices");
@@ -172,11 +173,11 @@ int main()
    REQUIRE_ACTION(network_group_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef, 
        "Invalid network group size");
-    status = hailo_make_input_vstream_params(network_group, true, HAILO_FORMAT_TYPE_AUTO,
+    status = hailo_make_input_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_AUTO,
        input_vstream_params, &input_vstreams_size);
    REQUIRE_SUCCESS(status, l_release_hef, "Failed making input virtual stream params");
-    status = hailo_make_output_vstream_params(network_group, true, HAILO_FORMAT_TYPE_AUTO,
+    status = hailo_make_output_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_AUTO,
        output_vstream_params, &output_vstreams_size);
    REQUIRE_SUCCESS(status, l_release_hef, "Failed making output virtual stream params");
--- a/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 find_package(Threads REQUIRED)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 SET_SOURCE_FILES_PROPERTIES(multi_network_vstream_example.c PROPERTIES LANGUAGE C)
--- a/hailort/libhailort/examples/c/multi_network_vstream_example/multi_network_vstream_example.c
+++ b/hailort/libhailort/examples/c/multi_network_vstream_example/multi_network_vstream_example.c
@@ -167,6 +167,7 @@ int main()
    hailo_input_vstream input_vstreams[NET_COUNT][MAX_EDGE_LAYERS];
    hailo_output_vstream output_vstreams[NET_COUNT][MAX_EDGE_LAYERS];
    uint16_t batch_size[NET_COUNT] = {FIRST_NET_BATCH_SIZE, SECOND_NET_BATCH_SIZE};
    bool unused = {0};
    status = hailo_init_vdevice_params(&params);
    REQUIRE_SUCCESS(status, l_exit, "Failed init vdevice_params");
@@ -201,11 +202,11 @@ int main()
    /* Build vstream params per network */
    for (uint8_t network_index = 0; network_index < NET_COUNT; network_index++) {
-        status = hailo_hef_make_input_vstream_params(hef, network_info[network_index].name, true, HAILO_FORMAT_TYPE_AUTO,
+        status = hailo_hef_make_input_vstream_params(hef, network_info[network_index].name, unused, HAILO_FORMAT_TYPE_AUTO,
            input_vstream_params[network_index], &input_vstreams_size[network_index]);
        REQUIRE_SUCCESS(status, l_release_hef, "Failed making input virtual stream params");
-        status = hailo_hef_make_output_vstream_params(hef, network_info[network_index].name, true, HAILO_FORMAT_TYPE_AUTO,
+        status = hailo_hef_make_output_vstream_params(hef, network_info[network_index].name, unused, HAILO_FORMAT_TYPE_AUTO,
            output_vstream_params[network_index], &output_vstreams_size[network_index]);
        REQUIRE_SUCCESS(status, l_release_hef, "Failed making output virtual stream params");
--- a/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 SET_SOURCE_FILES_PROPERTIES(notification_callback_example.c PROPERTIES LANGUAGE C)
--- a/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.0.0)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 SET_SOURCE_FILES_PROPERTIES(power_measurement_example.c PROPERTIES LANGUAGE C)
--- a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.0.0)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 SET_SOURCE_FILES_PROPERTIES(raw_async_streams_single_thread_example.c PROPERTIES LANGUAGE C)
--- a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c
+++ b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c
@@ -50,7 +50,7 @@ static void output_done_callback(const hailo_stream_read_async_completion_info_t
        // Real applications can forward the buffer to post-process/display. Here we just re-launch new async reads.
        status = hailo_stream_read_raw_buffer_async(stream, completion_info->buffer_addr, completion_info->buffer_size,
            output_done_callback, stream);
-        if ((HAILO_SUCCESS != status) && (HAILO_STREAM_NOT_ACTIVATED != status)) {
+        if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) {
            fprintf(stderr, "Failed read async with status=%d\n", status);
        }
        break;
@@ -73,7 +73,7 @@ static void input_done_callback(const hailo_stream_write_async_completion_info_t
        // new async writes.
        status = hailo_stream_write_raw_buffer_async(stream, completion_info->buffer_addr, completion_info->buffer_size,
            input_done_callback, stream);
-        if ((HAILO_SUCCESS != status) && (HAILO_STREAM_NOT_ACTIVATED != status)) {
+        if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) {
            fprintf(stderr, "Failed write async with status=%d\n", status);
        }
        break;
@@ -90,7 +90,6 @@ static hailo_status infer(hailo_configured_network_group network_group, size_t n
    size_t ongoing_transfers)
 {
    hailo_status status = HAILO_UNINITIALIZED;
    hailo_activated_network_group activated_network_group = NULL;
    size_t i = 0;
    size_t frame_index = 0;
    size_t frame_size = 0;
@@ -99,9 +98,6 @@ static hailo_status infer(hailo_configured_network_group network_group, size_t n
    void *buffers[MAX_EDGE_LAYERS * MAX_ONGOING_TRANSFERS] = {0};
    size_t allocated_buffers = 0;
    status = hailo_activate_network_group(network_group, NULL, &activated_network_group);
    REQUIRE_SUCCESS(status, l_exit, "Failed activate network group status=%d", status);
    // We launch "ongoing_transfers" async operations for both input and output streams. On each async callback, we launch
    // some new operation with the same buffer.
    for (stream_index = 0; stream_index < number_output_streams; stream_index++) {
@@ -111,12 +107,12 @@ static hailo_status infer(hailo_configured_network_group network_group, size_t n
        for (frame_index = 0; frame_index < ongoing_transfers; frame_index++) {
            // Buffers read from async operation must be page aligned.
            current_buffer = page_aligned_alloc(frame_size);
-            REQUIRE_ACTION(INVALID_ADDR != current_buffer, status=HAILO_OUT_OF_HOST_MEMORY, l_deactivate, "allocation failed");
+            REQUIRE_ACTION(INVALID_ADDR != current_buffer, status=HAILO_OUT_OF_HOST_MEMORY, l_shutdown, "allocation failed");
            buffers[allocated_buffers++] = current_buffer;
            status = hailo_stream_read_raw_buffer_async(output_streams[stream_index], current_buffer, frame_size,
                output_done_callback, output_streams[stream_index]);
-            REQUIRE_SUCCESS(status, l_deactivate, "Failed read async with status=%d", status);
+            REQUIRE_SUCCESS(status, l_shutdown, "Failed read async with status=%d", status);
        }
    }
@@ -127,28 +123,27 @@ static hailo_status infer(hailo_configured_network_group network_group, size_t n
        for (frame_index = 0; frame_index < ongoing_transfers; frame_index++) {
            // Buffers written to async operation must be page aligned.
            current_buffer = page_aligned_alloc(frame_size);
-            REQUIRE_ACTION(INVALID_ADDR != current_buffer, status=HAILO_OUT_OF_HOST_MEMORY, l_deactivate, "allocation failed");
+            REQUIRE_ACTION(INVALID_ADDR != current_buffer, status=HAILO_OUT_OF_HOST_MEMORY, l_shutdown, "allocation failed");
            buffers[allocated_buffers++] = current_buffer;
            status = hailo_stream_write_raw_buffer_async(input_streams[stream_index], current_buffer, frame_size,
                input_done_callback, input_streams[stream_index]);
-            REQUIRE_SUCCESS(status, l_deactivate, "Failed write async with status=%d", status);
+            REQUIRE_SUCCESS(status, l_shutdown, "Failed write async with status=%d", status);
        }
    }
-    // After all async operations are launched, the inference will continue until we deactivate the network.
+    // After all async operations are launched, the inference will continue until we shutdown the network.
    hailo_sleep(INFER_TIME_SECONDS);
    status = HAILO_SUCCESS;
-l_deactivate:
+l_shutdown:
-    // Calling hailo_deactivate_network_group will make sure that all async operations are done. All pending async I/O
+    // Calling hailo_shutdown_network_group will ensure that all async operations are done. All pending async I/O
    // operations will be canceled and their callbacks called with status=HAILO_STREAM_ABORTED_BY_USER.
-    (void) hailo_deactivate_network_group(activated_network_group);
+    (void) hailo_shutdown_network_group(network_group);
    // There are no async I/O operations ongoing so it is safe to free the buffers now.
    for (i = 0; i < allocated_buffers; i++) page_aligned_free(buffers[i], frame_size);
 l_exit:
    return status;
 }
@@ -200,6 +195,7 @@ int main()
    size_t index = 0;
    size_t queue_size = 0;
    size_t ongoing_transfers = MAX_ONGOING_TRANSFERS;
    hailo_activated_network_group activated_network_group = NULL;
    // Create device object.
    status = hailo_create_device_by_id(NULL, &device);
@@ -238,14 +234,20 @@ int main()
        ongoing_transfers = MIN(queue_size, ongoing_transfers);
    }
    // Activate network group
    status = hailo_activate_network_group(network_group, NULL, &activated_network_group);
    REQUIRE_SUCCESS(status, l_release_device, "Failed activate network group");
    // Run infer.
    status = infer(network_group, number_input_streams, input_streams, number_output_streams, output_streams,
        ongoing_transfers);
-    REQUIRE_SUCCESS(status, l_release_device, "Failed performing inference");
+    REQUIRE_SUCCESS(status, l_deactivate, "Failed performing inference");
    status = HAILO_SUCCESS;
    printf("Inference ran successfully\n");
 l_deactivate:
    (void) hailo_deactivate_network_group(activated_network_group);
 l_release_device:
    (void) hailo_release_device(device);
 l_exit:
--- a/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 SET_SOURCE_FILES_PROPERTIES(raw_streams_example.c PROPERTIES LANGUAGE C)
--- a/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 SET_SOURCE_FILES_PROPERTIES(switch_network_groups_example.c PROPERTIES LANGUAGE C)
--- a/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c
+++ b/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c
@@ -102,17 +102,18 @@ hailo_status build_vstreams(hailo_configured_network_group network_group,
    hailo_status status = HAILO_UNINITIALIZED;
    hailo_input_vstream_params_by_name_t input_vstream_params[MAX_EDGE_LAYERS];
    hailo_output_vstream_params_by_name_t output_vstream_params[MAX_EDGE_LAYERS];
    bool unused = {0};
    // Make sure it can hold amount of vstreams for hailo_make_input/output_vstream_params
    size_t input_vstream_size = MAX_EDGE_LAYERS;
    size_t output_vstream_size = MAX_EDGE_LAYERS;
-    status = hailo_make_input_vstream_params(network_group, true, HAILO_FORMAT_TYPE_AUTO,
+    status = hailo_make_input_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_AUTO,
        input_vstream_params, &input_vstream_size);
    REQUIRE_SUCCESS(status, l_exit, "Failed making input virtual stream params");
    *num_input_vstreams = input_vstream_size;
-    status = hailo_make_output_vstream_params(network_group, true, HAILO_FORMAT_TYPE_AUTO,
+    status = hailo_make_output_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_AUTO,
        output_vstream_params, &output_vstream_size);
    REQUIRE_SUCCESS(status, l_exit, "Failed making output virtual stream params");
    *num_output_vstreams = output_vstream_size;
--- a/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 SET_SOURCE_FILES_PROPERTIES(switch_network_groups_manually_example.c PROPERTIES LANGUAGE C)
--- a/hailort/libhailort/examples/c/switch_network_groups_manually_example/switch_network_groups_manually_example.c
+++ b/hailort/libhailort/examples/c/switch_network_groups_manually_example/switch_network_groups_manually_example.c
@@ -176,6 +176,7 @@ int main()
    hailo_thread output_vstream_thread = {0};
    input_vstream_thread_args_t input_args = {0};
    output_vstream_thread_args_t output_args = {0};
    bool unused = {0};
    char HEF_FILES[HEF_COUNT][250] = {"hefs/shortcut_net.hef","hefs/shortcut_net.hef"};
@@ -201,13 +202,13 @@ int main()
            "Unexpected network group size");
        // Mae sure each hef is single input single output
-        status = hailo_make_input_vstream_params(network_groups[hef_index], true, HAILO_FORMAT_TYPE_AUTO,
+        status = hailo_make_input_vstream_params(network_groups[hef_index], unused, HAILO_FORMAT_TYPE_AUTO,
            &input_vstream_params[hef_index], &input_vstream_size);
        REQUIRE_SUCCESS(status, l_release_hef, "Failed making input virtual stream params");
        REQUIRE_ACTION(input_vstream_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef,
            "INVALID HEF - Only hefs with single input vstream are allowed");
-        status = hailo_make_output_vstream_params(network_groups[hef_index], true, HAILO_FORMAT_TYPE_AUTO,
+        status = hailo_make_output_vstream_params(network_groups[hef_index], unused, HAILO_FORMAT_TYPE_AUTO,
            &output_vstream_params[hef_index], &output_vstream_size);
        REQUIRE_SUCCESS(status, l_release_hef, "Failed making output virtual stream params");
        REQUIRE_ACTION(output_vstream_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef,
--- a/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 SET_SOURCE_FILES_PROPERTIES(vstreams_example.c PROPERTIES LANGUAGE C)
--- a/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c
+++ b/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c
@@ -137,7 +137,7 @@ int main()
    size_t output_vstreams_size = MAX_EDGE_LAYERS;
    hailo_input_vstream input_vstreams[MAX_EDGE_LAYERS] = {NULL};
    hailo_output_vstream output_vstreams[MAX_EDGE_LAYERS] = {NULL};
-    bool quantized = true;
+    bool unused = {0};
    status = hailo_create_vdevice(NULL, &vdevice);
    REQUIRE_SUCCESS(status, l_exit, "Failed to create vdevice");
@@ -154,9 +154,8 @@ int main()
        "Invalid network group size");
-    // Set input format type to auto, and mark the data as quantized - libhailort will not scale the data before writing to the HW
+    // Set input format type to auto - libhailort will not scale the data before writing to the HW
-    quantized = true;
+    status = hailo_make_input_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_AUTO,
    status = hailo_make_input_vstream_params(network_group, quantized, HAILO_FORMAT_TYPE_AUTO,
        input_vstream_params, &input_vstreams_size);
    REQUIRE_SUCCESS(status, l_release_hef, "Failed making input virtual stream params");
@@ -166,10 +165,9 @@ int main()
        input_vstream_params[i].params.user_buffer_format.order = HAILO_FORMAT_ORDER_NCHW;
    }
-    // Set output format type to float32, and mark the data as not quantized - libhailort will de-quantize the data after reading from the HW
+    // Set output format type to float32 - libhailort will de-quantize the data after reading from the HW
    // Note: this process might affect the overall performance
-    quantized = false;
+    status = hailo_make_output_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_FLOAT32,
    status = hailo_make_output_vstream_params(network_group, quantized, HAILO_FORMAT_TYPE_FLOAT32,
        output_vstream_params, &output_vstreams_size);
    REQUIRE_SUCCESS(status, l_release_hef, "Failed making output virtual stream params");
--- a/hailort/libhailort/examples/cpp/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/CMakeLists.txt
@@ -2,6 +2,8 @@ cmake_minimum_required(VERSION 3.0.0)
 add_subdirectory(vstreams_example)
 add_subdirectory(infer_pipeline_example)
 add_subdirectory(async_infer_example)
 add_subdirectory(async_infer_functionality_example)
 add_subdirectory(raw_streams_example)
 add_subdirectory(multi_network_vstream_example)
 add_subdirectory(switch_network_groups_example)
@@ -15,6 +17,8 @@ add_subdirectory(notification_callback_example)
 set(CPP_EXAMPLE_TARGETS
    cpp_vstreams_example
    cpp_infer_pipeline_example
    cpp_async_infer_example
    cpp_async_infer_functionality_example
    cpp_raw_streams_example
    cpp_multi_network_vstream_example
    cpp_switch_network_groups_example
--- a/hailort/libhailort/examples/cpp/async_infer_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/async_infer_example/CMakeLists.txt
@@ -0,0 +1,16 @@
 cmake_minimum_required(VERSION 3.0.0)
 find_package(HailoRT 4.16.0 EXACT REQUIRED)
 add_executable(cpp_async_infer_example async_infer_example.cpp)
 target_link_libraries(cpp_async_infer_example PRIVATE HailoRT::libhailort)
 if(WIN32)
    target_compile_options(cpp_async_infer_example PRIVATE
        /DWIN32_LEAN_AND_MEAN
        /DNOMINMAX                  # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own)
        /wd4201 /wd4251
    )
 endif()
 set_target_properties(cpp_async_infer_example PROPERTIES CXX_STANDARD 14)
--- a/hailort/libhailort/examples/cpp/async_infer_example/async_infer_example.cpp
+++ b/hailort/libhailort/examples/cpp/async_infer_example/async_infer_example.cpp
@@ -0,0 +1,93 @@
 /**
 * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
 * @file async_infer_example.cpp
 * This example demonstrates the Async Infer API usage and assumes the model has only one input and output.
 **/
 #include "hailo/hailort.hpp"
 #include <iostream>
 #if defined(__unix__)
 #include <sys/mman.h>
 #endif
 #define HEF_FILE ("hefs/shortcut_net.hef")
 using namespace hailort;
 static std::shared_ptr<uint8_t> page_aligned_alloc(size_t size)
 {
 #if defined(__unix__)
    auto addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    if (MAP_FAILED == addr) throw std::bad_alloc();
    return std::shared_ptr<uint8_t>(reinterpret_cast<uint8_t*>(addr), [size](void *addr) { munmap(addr, size); });
 #elif defined(_MSC_VER)
    auto addr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
    if (!addr) throw std::bad_alloc();
    return std::shared_ptr<uint8_t>(reinterpret_cast<uint8_t*>(addr), [](void *addr){ VirtualFree(addr, 0, MEM_RELEASE); });
 #else
 #pragma error("Aligned alloc not supported")
 #endif
 }
 int main()
 {
    auto vdevice = VDevice::create();
    if (!vdevice) {
        std::cerr << "Failed create vdevice, status = " << vdevice.status() << std::endl;
        return vdevice.status();
    }
    auto infer_model_exp = vdevice.value()->create_infer_model(HEF_FILE);
    if (!infer_model_exp) {
        std::cerr << "Failed to create infer model, status = " << infer_model_exp.status() << std::endl;
        return infer_model_exp.status();
    }
    auto infer_model = infer_model_exp.release();
    auto configured_infer_model = infer_model->configure();
    if (!configured_infer_model) {
        std::cerr << "Failed to create configured infer model, status = " << configured_infer_model.status() << std::endl;
        return configured_infer_model.status();
    }
    auto bindings = configured_infer_model->create_bindings();
    if (!bindings) {
        std::cerr << "Failed to create infer bindings, status = " << bindings.status() << std::endl;
        return bindings.status();
    }
    size_t input_frame_size = infer_model->input()->get_frame_size();
    auto input_buffer = page_aligned_alloc(input_frame_size);
    auto status = bindings->input()->set_buffer(MemoryView(input_buffer.get(), input_frame_size));
    if (HAILO_SUCCESS != status) {
        std::cerr << "Failed to set infer input buffer, status = " << status << std::endl;
        return status;
    }
    size_t output_frame_size = infer_model->output()->get_frame_size();
    auto output_buffer = page_aligned_alloc(output_frame_size);
    status = bindings->output()->set_buffer(MemoryView(output_buffer.get(), output_frame_size));
    if (HAILO_SUCCESS != status) {
        std::cerr << "Failed to set infer input buffer, status = " << status << std::endl;
        return status;
    }
    auto job = configured_infer_model->run_async(bindings.value());
    if (!job) {
        std::cerr << "Failed to start async infer job, status = " << job.status() << std::endl;
        return job.status();
    }
    status = job->wait(std::chrono::milliseconds(1000));
    if (HAILO_SUCCESS != status) {
        std::cerr << "Failed to wait for infer to finish, status = " << status << std::endl;
        return status;
    }
    return HAILO_SUCCESS;
 }
--- a/hailort/libhailort/examples/cpp/async_infer_functionality_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/async_infer_functionality_example/CMakeLists.txt
@@ -0,0 +1,16 @@
 cmake_minimum_required(VERSION 3.0.0)
 find_package(HailoRT 4.16.0 EXACT REQUIRED)
 add_executable(cpp_async_infer_functionality_example async_infer_functionality_example.cpp)
 target_link_libraries(cpp_async_infer_functionality_example PRIVATE HailoRT::libhailort)
 if(WIN32)
    target_compile_options(cpp_async_infer_functionality_example PRIVATE
        /DWIN32_LEAN_AND_MEAN
        /DNOMINMAX                  # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own)
        /wd4201 /wd4251
    )
 endif()
 set_target_properties(cpp_async_infer_functionality_example PROPERTIES CXX_STANDARD 14)
--- a/hailort/libhailort/examples/cpp/async_infer_functionality_example/async_infer_functionality_example.cpp
+++ b/hailort/libhailort/examples/cpp/async_infer_functionality_example/async_infer_functionality_example.cpp
@@ -0,0 +1,129 @@
 /**
 * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
 * @file async_infer_functionality_example.cpp
 * This example demonstrates the Async Infer API usage with a specific model with multiple inputs and outputs
 * and changes configutrations of the streams.
 **/
 #include "hailo/hailort.hpp"
 #include <iostream>
 #if defined(__unix__)
 #include <sys/mman.h>
 #endif
 #define FRAMES_COUNT (100)
 using namespace hailort;
 static std::shared_ptr<uint8_t> page_aligned_alloc(size_t size)
 {
 #if defined(__unix__)
    auto addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    if (MAP_FAILED == addr) throw std::bad_alloc();
    return std::shared_ptr<uint8_t>(reinterpret_cast<uint8_t*>(addr), [size](void *addr) { munmap(addr, size); });
 #elif defined(_MSC_VER)
    auto addr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
    if (!addr) throw std::bad_alloc();
    return std::shared_ptr<uint8_t>(reinterpret_cast<uint8_t*>(addr), [](void *addr){ VirtualFree(addr, 0, MEM_RELEASE); });
 #else
 #pragma error("Aligned alloc not supported")
 #endif
 }
 int main()
 {
    auto vdevice = VDevice::create();
    if (!vdevice) {
        std::cerr << "Failed create vdevice, status = " << vdevice.status() << std::endl;
        return vdevice.status();
    }
    auto infer_model_exp = vdevice.value()->create_infer_model("hefs/multi_network_shortcut_net.hef");
    if (!infer_model_exp) {
        std::cerr << "Failed to create infer model, status = " << infer_model_exp.status() << std::endl;
        return infer_model_exp.status();
    }
    auto infer_model = infer_model_exp.release();
    infer_model->input("multi_network_shortcut_net_scope1/input_layer_0")->set_format_type(HAILO_FORMAT_TYPE_FLOAT32);
    infer_model->output("multi_network_shortcut_net_scope1/shortcut0")->set_format_type(HAILO_FORMAT_TYPE_FLOAT32);
    infer_model->input("multi_network_shortcut_net_scope2/input_layer_1")->set_format_type(HAILO_FORMAT_TYPE_FLOAT32);
    infer_model->output("multi_network_shortcut_net_scope2/shortcut1")->set_format_type(HAILO_FORMAT_TYPE_FLOAT32);
    auto configured_infer_model = infer_model->configure();
    if (!configured_infer_model) {
        std::cerr << "Failed to create configured infer model, status = " << configured_infer_model.status() << std::endl;
        return configured_infer_model.status();
    }
    // We store buffers vector here as a guard for the memory. The buffer will be freed only after
    // configured_infer_model will be released.
    std::vector<std::shared_ptr<uint8_t>> buffer_guards;
    auto bindings = configured_infer_model->create_bindings();
    if (!bindings) {
        std::cerr << "Failed to create infer bindings, status = " << bindings.status() << std::endl;
        return bindings.status();
    }
    for (const auto &input_name : infer_model->get_input_names()) {
        size_t input_frame_size = infer_model->input(input_name)->get_frame_size();
        auto input_buffer = page_aligned_alloc(input_frame_size);
        auto status = bindings->input(input_name)->set_buffer(MemoryView(input_buffer.get(), input_frame_size));
        if (HAILO_SUCCESS != status) {
            std::cerr << "Failed to set infer input buffer, status = " << status << std::endl;
            return status;
        }
        buffer_guards.push_back(input_buffer);
    }
    for (const auto &output_name : infer_model->get_output_names()) {
        size_t output_frame_size = infer_model->output(output_name)->get_frame_size();
        auto output_buffer = page_aligned_alloc(output_frame_size);
        auto status = bindings->output(output_name)->set_buffer(MemoryView(output_buffer.get(), output_frame_size));
        if (HAILO_SUCCESS != status) {
            std::cerr << "Failed to set infer output buffer, status = " << status << std::endl;
            return status;
        }
        buffer_guards.push_back(output_buffer);
    }
    AsyncInferJob last_infer_job;
    for (uint32_t i = 0; i < FRAMES_COUNT; i++) {
        // Waiting for available requests in the pipeline
        auto status = configured_infer_model->wait_for_async_ready(std::chrono::milliseconds(1000));
        if (HAILO_SUCCESS != status) {
            std::cerr << "Failed to wait for async ready, status = " << status << std::endl;
            return status;
        }
        auto job = configured_infer_model->run_async(bindings.value(), [] (const AsyncInferCompletionInfo &/*completion_info*/) {
            // Use completion_info to get the job status and the corresponding bindings
        });
        if (!job) {
            std::cerr << "Failed to start async infer job, status = " << job.status() << std::endl;
            return job.status();
        }
        job->detach();
        if (i == FRAMES_COUNT - 1) {
            last_infer_job = job.release();
        }
    }
    // Wait for last infer to finish
    auto status = last_infer_job.wait(std::chrono::milliseconds(1000));
    if (HAILO_SUCCESS != status) {
        std::cerr << "Failed to wait for infer to finish, status = " << status << std::endl;
        return status;
    }
    return HAILO_SUCCESS;
 }
--- a/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.0.0)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 add_executable(cpp_infer_pipeline_example infer_pipeline_example.cpp)
 target_link_libraries(cpp_infer_pipeline_example PRIVATE HailoRT::libhailort)
--- a/hailort/libhailort/examples/cpp/infer_pipeline_example/infer_pipeline_example.cpp
+++ b/hailort/libhailort/examples/cpp/infer_pipeline_example/infer_pipeline_example.cpp
@@ -109,13 +109,13 @@ int main(int argc, char **argv)
        return network_group.status();
    }
-    auto input_params = network_group.value()->make_input_vstream_params(true, FORMAT_TYPE, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS, HAILO_DEFAULT_VSTREAM_QUEUE_SIZE);
+    auto input_params = network_group.value()->make_input_vstream_params({}, FORMAT_TYPE, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS, HAILO_DEFAULT_VSTREAM_QUEUE_SIZE);
    if (!input_params) {
        std::cerr << "Failed make_input_vstream_params " << input_params.status() << std::endl;
        return input_params.status();
    }
-    auto output_params = network_group.value()->make_output_vstream_params(true, FORMAT_TYPE, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS, HAILO_DEFAULT_VSTREAM_QUEUE_SIZE);
+    auto output_params = network_group.value()->make_output_vstream_params({}, FORMAT_TYPE, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS, HAILO_DEFAULT_VSTREAM_QUEUE_SIZE);
    if (!output_params) {
        std::cerr << "Failed make_output_vstream_params " << output_params.status() << std::endl;
        return output_params.status();
--- a/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 add_executable(cpp_multi_device_example multi_device_example.cpp)
 target_link_libraries(cpp_multi_device_example PRIVATE HailoRT::libhailort Threads::Threads)
--- a/hailort/libhailort/examples/cpp/multi_device_example/multi_device_example.cpp
+++ b/hailort/libhailort/examples/cpp/multi_device_example/multi_device_example.cpp
@@ -16,7 +16,6 @@
 #define HEF_FILE ("hefs/shortcut_net.hef")
 constexpr size_t BATCH_SIZE = 1;
 constexpr size_t FRAMES_COUNT = 100;
 constexpr bool QUANTIZED = true;
 constexpr hailo_format_type_t FORMAT_TYPE = HAILO_FORMAT_TYPE_AUTO;
 constexpr size_t MAX_LAYER_EDGES = 16;
@@ -166,7 +165,7 @@ int main()
        return network_group.status();
    }
-    auto vstreams = VStreamsBuilder::create_vstreams(*network_group.value(), QUANTIZED, FORMAT_TYPE);
+    auto vstreams = VStreamsBuilder::create_vstreams(*network_group.value(), {}, FORMAT_TYPE);
    if (!vstreams) {
        std::cerr << "Failed creating vstreams " << vstreams.status() << std::endl;
        return vstreams.status();
--- a/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 find_package(Threads REQUIRED)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 add_executable(cpp_multi_network_vstream_example multi_network_vstream_example.cpp)
 target_link_libraries(cpp_multi_network_vstream_example PRIVATE HailoRT::libhailort Threads::Threads)
--- a/hailort/libhailort/examples/cpp/multi_network_vstream_example/multi_network_vstream_example.cpp
+++ b/hailort/libhailort/examples/cpp/multi_network_vstream_example/multi_network_vstream_example.cpp
@@ -15,7 +15,6 @@
 #define HEF_FILE ("hefs/multi_network_shortcut_net.hef")
 constexpr size_t INFER_FRAME_COUNT = 100;
 constexpr bool QUANTIZED = true;
 constexpr hailo_format_type_t FORMAT_TYPE = HAILO_FORMAT_TYPE_AUTO;
 constexpr size_t MAX_LAYER_EDGES = 16;
 constexpr size_t NET_GROUPS_COUNT = 1;
@@ -81,7 +80,7 @@ Expected<std::map<std::string, InOutVStreams>> create_vstreams_per_network(Confi
    // Create vstreams for each network
    std::map<std::string, InOutVStreams> networks_vstreams;
    for (auto &network_info : networks_infos) {
-        auto vstreams = VStreamsBuilder::create_vstreams(net_group, QUANTIZED, FORMAT_TYPE, network_info.name);
+        auto vstreams = VStreamsBuilder::create_vstreams(net_group, {}, FORMAT_TYPE, network_info.name);
        if (!vstreams) {
            std::cerr << "Failed to create vstreams for network " << network_info.name << std::endl;
            return make_unexpected(vstreams.status());
--- a/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 add_executable(cpp_multi_process_example multi_process_example.cpp)
 target_link_libraries(cpp_multi_process_example PRIVATE HailoRT::libhailort Threads::Threads)
--- a/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.cpp
+++ b/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.cpp
@@ -17,7 +17,6 @@
 constexpr size_t FRAMES_COUNT = 100;
 constexpr bool QUANTIZED = true;
 constexpr hailo_format_type_t FORMAT_TYPE = HAILO_FORMAT_TYPE_AUTO;
 constexpr size_t MAX_LAYER_EDGES = 16;
 constexpr uint32_t DEVICE_COUNT = 1;
@@ -156,7 +155,7 @@ int main(int argc, char **argv)
        return network_group.status();
    }
-    auto vstreams = VStreamsBuilder::create_vstreams(*network_group.value(), QUANTIZED, FORMAT_TYPE);
+    auto vstreams = VStreamsBuilder::create_vstreams(*network_group.value(), {}, FORMAT_TYPE);
    if (!vstreams) {
        std::cerr << "Failed creating vstreams " << vstreams.status() << std::endl;
        return vstreams.status();
--- a/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.0.0)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 add_executable(cpp_notification_callback_example notification_callback_example.cpp)
 target_link_libraries(cpp_notification_callback_example PRIVATE HailoRT::libhailort)
--- a/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.0.0)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 add_executable(cpp_power_measurement_example power_measurement_example.cpp)
 target_link_libraries(cpp_power_measurement_example PRIVATE HailoRT::libhailort)
--- a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 add_executable(cpp_raw_async_streams_multi_thread_example raw_async_streams_multi_thread_example.cpp)
 target_link_libraries(cpp_raw_async_streams_multi_thread_example PRIVATE HailoRT::libhailort Threads::Threads)
--- a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp
@@ -86,43 +86,21 @@ static void input_async_callback(const InputStream::CompletionInfo &completion_i
    }
 }
-int main()
+static hailo_status infer(ConfiguredNetworkGroup &network_group)
 {
    auto device = Device::create();
    if (!device) {
        std::cerr << "Failed create device " << device.status() << std::endl;
        return EXIT_FAILURE;
    }
    static const auto HEF_FILE = "hefs/shortcut_net.hef";
    auto network_group = configure_network_group(*device.value(), HEF_FILE);
    if (!network_group) {
        std::cerr << "Failed to configure network group " << HEF_FILE << std::endl;
        return EXIT_FAILURE;
    }
    // Assume one input and output
-    auto &output = network_group->get()->get_output_streams()[0].get();
+    auto &output = network_group.get_output_streams()[0].get();
-    auto &input = network_group->get()->get_input_streams()[0].get();
+    auto &input = network_group.get_input_streams()[0].get();
    // Allocate buffers. The buffers sent to the async API must be page aligned.
    // For simplicity, in this example, we pass one buffer for each stream (It may be problematic in output since the
    // buffer will be overridden on each read).
-    // Note - the buffers are allocated before we activate the network group. This will ensure that they won't be freed
+    // Note - the buffers can be freed only after all callbacks are called. The user can either wait for all
-    // until the network group will become inactive.
+    // callbacks, or as done in this example, call ConfiguredNetworkGroup::shutdown that will make sure all callbacks
    // are called.
    auto output_buffer = page_aligned_alloc(output.get_frame_size());
    auto input_buffer = page_aligned_alloc(input.get_frame_size());
    // The destructor of activated_network_group will make sure that all async operations are done. All pending
    // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORTED_BY_USER.
    // Be sure to capture variables in the callbacks that will be destructed after the activated_network_group.
    // Otherwise, the lambda would have access an uninitialized data.
    auto activated_network_group = network_group.value()->activate();
    if (!activated_network_group) {
        std::cerr << "Failed to activate network group "  << activated_network_group.status() << std::endl;
        return EXIT_FAILURE;
    }
    std::atomic<hailo_status> output_status(HAILO_UNINITIALIZED);
    std::thread output_thread([&]() {
        while (true) {
@@ -148,14 +126,47 @@ int main()
    // After all async operations are launched, the inference is running.
    std::this_thread::sleep_for(std::chrono::seconds(5));
-    // Make it stop. We explicitly destruct activated_network_group to stop all async I/O.
+    // Calling shutdown on a network group will ensure that all async operations are done. All pending
-    activated_network_group->reset();
+    // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORTED_BY_USER.
    // Only after the shutdown is called, we can safely free the buffers and any variable captured inside the async
    // callback lambda.
    network_group.shutdown();
-    // Thread should be stopped with HAILO_STREAM_NOT_ACTIVATED status.
+    // Thread should be stopped with HAILO_STREAM_ABORTED_BY_USER status.
    output_thread.join();
    input_thread.join();
-    if ((HAILO_STREAM_NOT_ACTIVATED != output_status) || (HAILO_STREAM_NOT_ACTIVATED != input_status)) {
+
    if ((HAILO_STREAM_ABORTED_BY_USER != output_status) || (HAILO_STREAM_ABORTED_BY_USER != input_status)) {
        std::cerr << "Got unexpected statues from thread: " << output_status << ", " << input_status << std::endl;
        return HAILO_INTERNAL_FAILURE;
    }
    return HAILO_SUCCESS;
 }
 int main()
 {
    auto device = Device::create();
    if (!device) {
        std::cerr << "Failed create device " << device.status() << std::endl;
        return EXIT_FAILURE;
    }
    static const auto HEF_FILE = "hefs/shortcut_net.hef";
    auto network_group = configure_network_group(*device.value(), HEF_FILE);
    if (!network_group) {
        std::cerr << "Failed to configure network group " << HEF_FILE << std::endl;
        return EXIT_FAILURE;
    }
    auto activated_network_group = network_group.value()->activate();
    if (!activated_network_group) {
        std::cerr << "Failed to activate network group "  << activated_network_group.status() << std::endl;
        return EXIT_FAILURE;
    }
    auto status = infer(*network_group.value());
    if (HAILO_SUCCESS != status) {
        return EXIT_FAILURE;
    }
--- a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 add_executable(cpp_raw_async_streams_single_thread_example raw_async_streams_single_thread_example.cpp)
 target_link_libraries(cpp_raw_async_streams_single_thread_example PRIVATE HailoRT::libhailort Threads::Threads)
--- a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp
@@ -36,8 +36,12 @@ static AlignedBuffer page_aligned_alloc(size_t size)
 #endif
 }
-static hailo_status infer(ConfiguredNetworkGroup &network_group, InputStream &input, OutputStream &output)
+static hailo_status infer(ConfiguredNetworkGroup &network_group)
 {
    // Assume one input and output
    auto &output = network_group.get_output_streams()[0].get();
    auto &input = network_group.get_input_streams()[0].get();
    auto input_queue_size = input.get_async_max_queue_size();
    auto output_queue_size = output.get_async_max_queue_size();
    if (!input_queue_size || !output_queue_size) {
@@ -45,8 +49,10 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group, InputStream &in
        return HAILO_INTERNAL_FAILURE;
    }
-    // We store buffers vector here as a guard for the memory. The buffer will be freed only after
+    // Allocate buffers. The buffers sent to the async API must be page aligned.
-    // activated_network_group will be released.
+    // Note - the buffers can be freed only after all callbacks are called. The user can either wait for all
    // callbacks, or as done in this example, call ConfiguredNetworkGroup::shutdown that will make sure all callbacks
    // are called.
    std::vector<AlignedBuffer> buffer_guards;
    OutputStream::TransferDoneCallback read_done = [&output, &read_done](const OutputStream::CompletionInfo &completion_info) {
@@ -55,7 +61,7 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group, InputStream &in
        case HAILO_SUCCESS:
            // Real applications can forward the buffer to post-process/display. Here we just re-launch new async read.
            status = output.read_async(completion_info.buffer_addr, completion_info.buffer_size, read_done);
-            if ((HAILO_SUCCESS != status) && (HAILO_STREAM_NOT_ACTIVATED != status)) {
+            if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) {
                std::cerr << "Failed read async with status=" << status << std::endl;
            }
            break;
@@ -74,7 +80,7 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group, InputStream &in
            // Real applications may free the buffer and replace it with new buffer ready to be sent. Here we just
            // re-launch new async write.
            status = input.write_async(completion_info.buffer_addr, completion_info.buffer_size, write_done);
-            if ((HAILO_SUCCESS != status) && (HAILO_STREAM_NOT_ACTIVATED != status)) {
+            if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) {
                std::cerr << "Failed read async with status=" << status << std::endl;
            }
            break;
@@ -86,16 +92,6 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group, InputStream &in
        }
    };
    // The destructor of activated_network_group will make sure that all async operations are done. All pending
    // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORTED_BY_USER.
    // Be sure to capture variables in the callbacks that will be destructed after the activated_network_group.
    // Otherwise, the lambda would have access an uninitialized data.
    auto activated_network_group = network_group.activate();
    if (!activated_network_group) {
        std::cerr << "Failed to activate network group "  << activated_network_group.status() << std::endl;
        return activated_network_group.status();
    }
    // We launch "*output_queue_size" async read operation. On each async callback, we launch a new async read operation.
    for (size_t i = 0; i < *output_queue_size; i++) {
        // Buffers read from async operation must be page aligned.
@@ -122,10 +118,14 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group, InputStream &in
        buffer_guards.emplace_back(buffer);
    }
    // After all async operations are launched, the inference will continue until the activated_network_group
    // destructor is called.
    std::this_thread::sleep_for(std::chrono::seconds(5));
    // Calling shutdown on a network group will ensure that all async operations are done. All pending
    // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORTED_BY_USER.
    // Only after the shutdown is called, we can safely free the buffers and any variable captured inside the async
    // callback lambda.
    network_group.shutdown();
    return HAILO_SUCCESS;
 }
@@ -167,27 +167,29 @@ int main()
    auto device = Device::create();
    if (!device) {
        std::cerr << "Failed to create device " << device.status() << std::endl;
-        return device.status();
+        return EXIT_FAILURE;
    }
    static const auto HEF_FILE = "hefs/shortcut_net.hef";
    auto network_group = configure_network_group(*device.value(), HEF_FILE);
    if (!network_group) {
        std::cerr << "Failed to configure network group" << HEF_FILE << std::endl;
-        return network_group.status();
+        return EXIT_FAILURE;
    }
-    // Assume one input and output
+    auto activated_network_group = network_group.value()->activate();
-    auto output = network_group->get()->get_output_streams()[0];
+    if (!activated_network_group) {
-    auto input = network_group->get()->get_input_streams()[0];
+        std::cerr << "Failed to activate network group "  << activated_network_group.status() << std::endl;
        return EXIT_FAILURE;
    }
    // Now start the inference
-    auto status = infer(*network_group.value(), input.get(), output.get());
+    auto status = infer(*network_group.value());
    if (HAILO_SUCCESS != status) {
        std::cerr << "Inference failed with " << status << std::endl;
-        return status;
+        return EXIT_FAILURE;
    }
    std::cout << "Inference finished successfully" << std::endl;
-    return HAILO_SUCCESS;
+    return EXIT_SUCCESS;
 }
--- a/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 add_executable(cpp_raw_streams_example raw_streams_example.cpp)
 target_link_libraries(cpp_raw_streams_example PRIVATE HailoRT::libhailort Threads::Threads)
--- a/Show More
+++ b/Show More