v4.21.0 (#25)

2025-04-01 15:01:01 +03:00
parent 301c3c6c9b
commit 0df636dcb6
596 changed files with 8983 additions and 5537 deletions
--- a/common/include/byte_order.h
+++ b/common/include/byte_order.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/common/include/context_switch_defs.h
+++ b/common/include/context_switch_defs.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
@@ -45,7 +45,7 @@ extern "C" {
 #define CONTEXT_SWITCH_DEFS__PACKED_VDMA_CHANNEL_ID__ENGINE_INDEX_SHIFT (5)

 #define CONTEXT_SWITCH_DEFS__PACKED_VDMA_CHANNEL_ID__SET(dst, engine_index, vdma_channel_index) do { \
-        (dst) = (vdma_channel_index) | ((engine_index) << CONTEXT_SWITCH_DEFS__PACKED_VDMA_CHANNEL_ID__ENGINE_INDEX_SHIFT);\
+        (dst) = (uint8_t)((vdma_channel_index) | ((engine_index) << CONTEXT_SWITCH_DEFS__PACKED_VDMA_CHANNEL_ID__ENGINE_INDEX_SHIFT));\
    } while (0)

 #define CONTEXT_SWITCH_DEFS__PACKED_VDMA_CHANNEL_ID__READ(src, engine_index, vdma_channel_index) do {\
@@ -73,7 +73,6 @@ typedef struct {
    uint16_t feature_padding_payload;
    uint32_t buffer_padding_payload;
    uint16_t buffer_padding;
-    bool is_periph_calculated_in_hailort;
    bool is_core_hw_padding_config_in_dfc;
 } CONTEXT_SWITCH_DEFS__stream_reg_info_t;

@@ -382,11 +381,12 @@ typedef struct {
 } CONTEXT_SWITCH_DEFS__activate_ddr_buffer_output_data_t;

 typedef struct {
+    CONTEXT_SWITCH_DEFS__stream_reg_info_t stream_reg_info;
+    CONTROL_PROTOCOL__host_buffer_info_t host_buffer_info;
+    uint16_t batch_size;
    uint8_t packed_vdma_channel_id;
    uint8_t stream_index;
    uint8_t network_index;
-    CONTEXT_SWITCH_DEFS__stream_reg_info_t stream_reg_info;
-    CONTROL_PROTOCOL__host_buffer_info_t host_buffer_info;
 } CONTEXT_SWITCH_DEFS__activate_cache_output_data_t;

 typedef struct {
--- a/common/include/control_protocol.h
+++ b/common/include/control_protocol.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
@@ -447,7 +447,6 @@ typedef struct {
    uint16_t feature_padding_payload;
    uint32_t buffer_padding_payload;
    uint16_t buffer_padding;
-    bool is_periph_calculated_in_hailort;
    bool is_core_hw_padding_config_in_dfc;
 } CONTROL_PROTOCOL__nn_stream_config_t;

@@ -878,15 +877,21 @@ typedef struct {
    bool can_fast_batch_switch;
 } CONTROL_PROTOCOL__INFER_FEATURE_LIST_t;

+typedef struct {
+    uint8_t packed_vdma_channel_id;
+} CONTROL_PROTOCOL__config_channel_info_t;
+
 typedef struct {
    uint16_t dynamic_contexts_count;
    CONTROL_PROTOCOL__INFER_FEATURE_LIST_t infer_features;
    CONTROL_PROTOCOL__VALIDATION_FEATURE_LIST_t validation_features;
    uint8_t networks_count;
    uint16_t csm_buffer_size;
-    uint16_t batch_size[CONTROL_PROTOCOL__MAX_NETWORKS_PER_NETWORK_GROUP];
+    uint16_t batch_size;
    uint32_t external_action_list_address;
    uint32_t boundary_channels_bitmap[CONTROL_PROTOCOL__MAX_VDMA_ENGINES_COUNT];
+    uint8_t config_channels_count;
+    CONTROL_PROTOCOL__config_channel_info_t config_channel_info[CONTROL_PROTOCOL__MAX_CFG_CHANNELS];
 } CONTROL_PROTOCOL__application_header_t;

 typedef struct {
@@ -1316,13 +1321,23 @@ typedef struct {
 } CONTROL_PROTOCOL__hw_infer_channels_info_t;

 typedef enum {
-    CONTROL_PROTOCOL__HW_INFER_STATE_START, 
+    CONTROL_PROTOCOL__HW_INFER_STATE_START,
    CONTROL_PROTOCOL__HW_INFER_STATE_STOP,

    /* must be last*/
    CONTROL_PROTOCOL__HW_INFER_STATE_COUNT
 } CONTROL_PROTOCOL__hw_infer_state_t;

+typedef enum {
+    CONTROL_PROTOCOL__DESC_BOUNDARY_CHANNEL,
+    CONTROL_PROTOCOL__CCB_BOUNDARY_CHANNEL,
+
+    /* must be last*/
+    CONTROL_PROTOCOL__BOUNDARY_CHANNEL_MODE_COUNT
+} CONTROL_PROTOCOL__boundary_channel_mode_t;
+
+#define CHANGE_HW_INFER_REQUEST_PARAMETER_COUNT (6)
+
 typedef struct {
    uint32_t hw_infer_state_length;
    uint8_t hw_infer_state;
@@ -1334,6 +1349,8 @@ typedef struct {
    uint16_t batch_count;
    uint32_t channels_info_length;
    CONTROL_PROTOCOL__hw_infer_channels_info_t channels_info;
+    uint32_t boundary_channel_mode_length;
+    uint8_t boundary_channel_mode;
 } CONTROL_PROTOCOL__change_hw_infer_status_request_t;

 typedef union {
--- a/common/include/d2h_events.h
+++ b/common/include/d2h_events.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
@@ -86,13 +86,11 @@ typedef struct {

 /* D2H_EVENT_health_monitor_closed_streams_event_message_t should be the same as hailo_health_monitor_dataflow_shutdown_notification_message_t */
 typedef struct {
-    uint32_t closed_input_streams;
-    uint32_t closed_output_streams;
    float32_t ts0_temperature;
    float32_t ts1_temperature;
 } D2H_EVENT_health_monitor_closed_streams_event_message_t;

-#define D2H_EVENT_HEALTH_MONITOR_CLOSED_STREAMS_EVENT_PARAMETER_COUNT  (4)
+#define D2H_EVENT_HEALTH_MONITOR_CLOSED_STREAMS_EVENT_PARAMETER_COUNT  (2)

 /* D2H_EVENT_health_monitor_temperature_alarm_event_message_t should be the same as hailo_health_monitor_temperature_alarm_notification_message_t */
 typedef struct {
--- a/common/include/firmware_header.h
+++ b/common/include/firmware_header.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
@@ -19,7 +19,8 @@ extern "C" {

 #define FIRMWARE_HEADER_MAGIC_HAILO8    (0x1DD89DE0)
 #define FIRMWARE_HEADER_MAGIC_HAILO15   (0xE905DAAB)
-#define FIRMWARE_HEADER_MAGIC_HAILO15L     (0xF94739AB)
+#define FIRMWARE_HEADER_MAGIC_HAILO15L  (0xF94739AB)
+#define FIRMWARE_HEADER_MAGIC_MARS      (0xF94739AB)

 typedef enum {
    FIRMWARE_HEADER_VERSION_INITIAL = 0,
@@ -31,7 +32,8 @@ typedef enum {
 typedef enum {
    FIRMWARE_TYPE_HAILO8 = 0,
    FIRMWARE_TYPE_HAILO15,
-    FIRMWARE_TYPE_HAILO15L
+    FIRMWARE_TYPE_HAILO15L,
+    FIRMWARE_TYPE_MARS
 } firmware_type_t;


@@ -41,6 +43,8 @@ typedef enum {
 #define COMPILED_FIRMWARE_TYPE (FIRMWARE_TYPE_HAILO8)
 #elif defined(PLUTO)
 #define COMPILED_FIRMWARE_TYPE (FIRMWARE_TYPE_HAILO15L)
+#elif defined(MARS)
+#define COMPILED_FIRMWARE_TYPE (FIRMWARE_TYPE_MARS)
 #endif /* MERCURY */

 typedef struct {
--- a/common/include/firmware_header_utils.h
+++ b/common/include/firmware_header_utils.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/common/include/firmware_status.h
+++ b/common/include/firmware_status.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
@@ -415,6 +415,7 @@ Updating rules:
   FIRMWARE_STATUS__X(CONTROL_PROTOCOL_STATUS_INVALID_BATCH_COUNT_LENGTH)\
   FIRMWARE_STATUS__X(CONTROL_PROTOCOL_STATUS_INVALID_CACHE_INFO_LENGTH)\
   FIRMWARE_STATUS__X(CONTROL_PROTOCOL_STATUS_INVALID_READ_OFFSET_DELTA_LENGTH)\
+   FIRMWARE_STATUS__X(CONTROL_PROTOCOL_STATUS_INVALID_BOUNDARY_CHANNELS_MODE_LENGTH)\
   \
   FIRMWARE_MODULE__X(FIRMWARE_MODULE__POWER_MEASUREMENT)\
   FIRMWARE_STATUS__X(HAILO_POWER_MEASUREMENT_STATUS_POWER_INIT_ERROR)\
@@ -771,6 +772,9 @@ Updating rules:
   FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_INVALID_READ_OFFSET_SIZE)\
   FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_INVALID_SLEEP_TIME)\
   FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_SRAM_MEMORY_FULL)\
+   FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_TIMEOUT_LCU)\
+   FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_TIMEOUT_SEQUENCER)\
+   FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_TIMEOUT_CONFIG_DONE)\
   \
   FIRMWARE_MODULE__X(FIRMWARE_MODULE__D2H_EVENT_MANAGER)\
   FIRMWARE_STATUS__X(HAILO_D2H_EVENT_MANAGER_STATUS_MESSAGE_HIGH_PRIORITY_QUEUE_CREATE_FAILED)\
@@ -1062,6 +1066,7 @@ Updating rules:
   FIRMWARE_STATUS__X(DRAM_DMA_SERVICE_STATUS_INVALID_STREAM_INDEX)\
   FIRMWARE_STATUS__X(DRAM_DMA_SERVICE_STATUS_INVALID_CHANNEL_INDEX)\
   FIRMWARE_STATUS__X(DRAM_DMA_SERVICE_STATUS_FAILED_TO_RESET_QM_CREDITS)\
+   FIRMWARE_STATUS__X(DRAM_DMA_SERVICE_STATUS_TRIED_USING_BURST_IN_NOT_ENHANCED)\
   \
   FIRMWARE_MODULE__X(FIRMWARE_MODULE__NN_CORE_SERVICE)\
   FIRMWARE_STATUS__X(NN_CORE_SERVICE_STATUS_INVALID_ARG_PASSED)\
@@ -1127,12 +1132,19 @@ Updating rules:
   FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_NETWORK_GROUP_ALREADY_ACTIVATED)\
   FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_STATE_MACHINE_NOT_IN_RESET_STATE_BEFORE_DEACTIVATE)\
   FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_INVALID_STATE)\
+   FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_INVALID_BOUNDARY_CHANNEL_MODE)\
   \
   FIRMWARE_MODULE__X(FIRMWARE_MODULE__INFINITE_CONTEXT_LOADER)\
   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_EVENT_BITS_NOT_CLEARED_BEFORE_COPY_CALL)\
   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_TIMEOUT_OCCURED_WAITING_FOR_COPY)\
   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_NOT_SUPPORTED)\
   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_NOT_MODULE_NOT_INITIALIZED)\
+   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_QUEUE_SEND_FAIL)\
+   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_QUEUE_RECEIVE_FAIL)\
+   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_QUEUE_FULL)\
+   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_QUEUE_FAILED_INIT)\
+   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_NULL_POINTER)\
+   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_INVALID_NUM_CONTEXTS)\

 typedef enum {
 #define FIRMWARE_MODULE__X(module) module,
--- a/common/include/firmware_version.h
+++ b/common/include/firmware_version.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/common/include/logger_level.h
+++ b/common/include/logger_level.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/common/include/sensor_config_exports.h
+++ b/common/include/sensor_config_exports.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/common/include/status.h
+++ b/common/include/status.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/common/include/stdfloat.h
+++ b/common/include/stdfloat.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/common/include/user_config_common.h
+++ b/common/include/user_config_common.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/common/include/utils.h
+++ b/common/include/utils.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/common/src/firmware_header_utils.c
+++ b/common/src/firmware_header_utils.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
@@ -45,7 +45,7 @@ static HAILO_COMMON_STATUS_t firmware_header_utils__validate_fw_header(uintptr_t

    switch (firmware_type) {
    case FIRMWARE_TYPE_HAILO8:
-        firmware_magic = FIRMWARE_HEADER_MAGIC_HAILO8; 
+        firmware_magic = FIRMWARE_HEADER_MAGIC_HAILO8;
        break;
    case FIRMWARE_TYPE_HAILO15:
        firmware_magic = FIRMWARE_HEADER_MAGIC_HAILO15;
@@ -53,6 +53,9 @@ static HAILO_COMMON_STATUS_t firmware_header_utils__validate_fw_header(uintptr_t
    case FIRMWARE_TYPE_HAILO15L:
        firmware_magic = FIRMWARE_HEADER_MAGIC_HAILO15L;
        break;
+    case FIRMWARE_TYPE_MARS:
+        firmware_magic = FIRMWARE_HEADER_MAGIC_MARS;
+        break;
    default:
        status = HAILO_STATUS__FIRMWARE_HEADER_UTILS__INVALID_FIRMWARE_TYPE;
        goto exit;
--- a/common/src/firmware_status.c
+++ b/common/src/firmware_status.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/CMakeLists.txt
+++ b/hailort/CMakeLists.txt
@@ -29,8 +29,8 @@ endif()

 # Set firmware version
 add_definitions( -DFIRMWARE_VERSION_MAJOR=4 )
-add_definitions( -DFIRMWARE_VERSION_MINOR=20 )
-add_definitions( -DFIRMWARE_VERSION_REVISION=1 )
+add_definitions( -DFIRMWARE_VERSION_MINOR=21 )
+add_definitions( -DFIRMWARE_VERSION_REVISION=0 )
 if(HAILO_BUILD_SERVICE)
    add_definitions( -DHAILO_SUPPORT_MULTI_PROCESS )
 endif()
@@ -58,8 +58,6 @@ set(RPC_DIR ${PROJECT_SOURCE_DIR}/hailort/rpc)
 set(HRPC_DIR ${PROJECT_SOURCE_DIR}/hailort/hrpc)
 set(HRPC_PROTOCOL_DIR ${PROJECT_SOURCE_DIR}/hailort/hrpc_protocol)
 set(HAILORT_SERVICE_DIR ${PROJECT_SOURCE_DIR}/hailort/hailort_service)
-set(HAILORT_SERVER_DIR ${PROJECT_SOURCE_DIR}/hailort/hailort_server)
-set(HAILORT_LIBUSB_DIR ${PROJECT_SOURCE_DIR}/hailort/internals/libusb-wrapper/)

 if(HAILO_BUILD_SERVICE)
    add_subdirectory(rpc)
@@ -91,5 +89,3 @@ endif()
 if(CMAKE_SYSTEM_NAME STREQUAL QNX)
    add_subdirectory(drivers/qnx)
 endif()
-
-add_subdirectory(hailort_server)
--- a/hailort/LICENSE
+++ b/hailort/LICENSE
@@ -1,6 +1,6 @@
 The MIT License (MIT)

-Copyright (c) 2020-2022 Hailo Technologies Ltd.
+Copyright (c) 2019-2025 Hailo Technologies Ltd.
 All rights reserved.

 Permission is hereby granted, free of charge, to any person obtaining a copy of
--- a/hailort/LICENSE-3RD-PARTY.md
+++ b/hailort/LICENSE-3RD-PARTY.md
@@ -6,14 +6,16 @@
 | pybind11                         | Wenzel Jakob                      | BSD                                | 2.10.1         | Cloned entire package                                | https://github.com/pybind/pybind11                                            |
 | spdlog                           | Gabi Melman                       | MIT                                | 1.14.1         | Cloned entire package                                | https://github.com/gabime/spdlog                                              |
 | folly                            | Facebook, Inc. and its affiliates | Apache License 2.0                 | v2020.08.17.00 | Copied only the file `folly/TokenBucket.h`           | https://github.com/facebook/folly                                             |
-| nlohmann_json_cmake_fetchcontent | ArthurSonzogni                    | MIT License                        | v3.9.1         | Cloned entire package                                | https://github.com/ArthurSonzogni/nlohmann_json_cmake_fetchcontent            |
-| readerwriterqueue                | Cameron Desrochers                | Simplified BSD                     | 1.0.3          | Cloned entire package                                | https://github.com/cameron314/readerwriterqueue                               |
-| DotWriter                        | John Vilk                         | MIT License                        | master         | Fork                                                 | https://github.com/hailo-ai/DotWriter                                         |
-| benchmark                        | Google Inc.                       | Apache License 2.0                 | 1.6.0          | Cloned entire package                                | https://github.com/google/benchmark.git                                       |
-| md5                              | Alexander Peslyak                 | cut-down BSD                       | -              | Copied code from website                             | http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 |
-| pevents                          | Mahmoud Al-Qudsi                  | MIT License                        | master         | Cloned entire package                                | https://github.com/neosmart/pevents.git                                       |
-| grpc                             | Google Inc.                       | Apache License 2.0                 | 1.54.0         | Cloned entire package                                | https://github.com/grpc/grpc                                                  |
-| stb                              | Sean Barrett                      | MIT License                        | 0.97           | Copied only the file `stb/stb_image_resize.h`        | https://github.com/nothings/stb                                               |
-| eigen                            |                                   | Mozilla Public License 2.0         | 3.4.0          | Cloned entire package                                | https://gitlab.com/libeigen/eigen                                             |
-| libusb                           |                                   | GNU LESSER GENERAL PUBLIC LICENSE  | 1.0.27         | Cloned entire package                                | https://github.com/libusb/libusb.git                                          |
-| xxHash                           | Yann Collet                       | 2-Clause BSD                       | 0.8.2          | Cloned entire package, used as a header-only lib     | https://github.com/Cyan4973/xxHash                                            |
+| nlohmann_json_cmake_fetchcontent | ArthurSonzogni                    | MIT License                        | v3.9.1                | Cloned entire package                                | https://github.com/ArthurSonzogni/nlohmann_json_cmake_fetchcontent            |
+| readerwriterqueue                | Cameron Desrochers                | Simplified BSD                     | 1.0.3                 | Cloned entire package                                | https://github.com/cameron314/readerwriterqueue                               |
+| DotWriter                        | John Vilk                         | MIT License                        | master                | Fork                                                 | https://github.com/hailo-ai/DotWriter                                         |
+| benchmark                        | Google Inc.                       | Apache License 2.0                 | 1.6.0                 | Cloned entire package                                | https://github.com/google/benchmark.git                                       |
+| md5                              | Alexander Peslyak                 | cut-down BSD                       | -                     | Copied code from website                             | http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 |
+| pevents                          | Mahmoud Al-Qudsi                  | MIT License                        | master                | Cloned entire package                                | https://github.com/neosmart/pevents.git                                       |
+| grpc                             | Google Inc.                       | Apache License 2.0                 | 1.54.0                | Cloned entire package                                | https://github.com/grpc/grpc                                                  |
+| stb                              | Sean Barrett                      | MIT License                        | 0.97                  | Copied only the file `stb/stb_image_resize.h`        | https://github.com/nothings/stb                                               |
+| eigen                            |                                   | Mozilla Public License 2.0         | 3.4.0                 | Cloned entire package                                | https://gitlab.com/libeigen/eigen                                             |
+| cpp-httplib                      |                                   | MIT License                        | v0.18.2               | Cloned entire package                                | https://github.com/yhirose/cpp-httplib.git                                    |
+| xxHash                           | Yann Collet                       | 2-Clause BSD                       | 0.8.2                 | Cloned entire package, used as a header-only lib     | https://github.com/Cyan4973/xxHash                                            |
+| tokenizers_cpp                   | mlc-ai                            | Apache License 2.0                 | disable-sentencepiece | Cloned entire package                                | https://github.com/mlc-ai/tokenizers-cpp.git                                  |
+| libnpy                           | Leon Merten Lohse                 | MIT License                        | 1.0.1                 | Cloned entire package, used as a header-only lib     | https://github.com/llohse/libnpy.git                                          |
--- a/hailort/cmake/external/cpp-httplib.cmake
+++ b/hailort/cmake/external/cpp-httplib.cmake
@@ -0,0 +1,21 @@
+cmake_minimum_required(VERSION 3.11.0)
+
+include(FetchContent)
+
+FetchContent_Declare(
+    cpp-httplib
+    GIT_REPOSITORY https://github.com/yhirose/cpp-httplib.git
+    GIT_TAG 51dee793fec2fa70239f5cf190e165b54803880f # v0.18.2
+    GIT_SHALLOW TRUE
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/cpp-httplib-src
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/cpp-httplib-subbuild
+)
+
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(cpp-httplib)
+if(NOT cpp-httplib_POPULATED)
+    FetchContent_Populate(cpp-httplib)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
+        add_subdirectory(${cpp-httplib_SOURCE_DIR} ${cpp-httplib_BINARY_DIR} EXCLUDE_FROM_ALL)
+    endif()
+endif()
--- a/hailort/cmake/external/libnpy.cmake
+++ b/hailort/cmake/external/libnpy.cmake
@@ -0,0 +1,23 @@
+cmake_minimum_required(VERSION 3.11.0)
+
+include(FetchContent)
+
+FetchContent_Declare(
+    libnpy
+    GIT_REPOSITORY https://github.com/llohse/libnpy.git
+    GIT_TAG 890ea4fcda302a580e633c624c6a63e2a5d422f6 # v1.0.1
+    GIT_SHALLOW TRUE
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/libnpy-src
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/libnpy-subbuild
+)
+
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(libnpy)
+if(NOT libnpy_POPULATED)
+    FetchContent_Populate(libnpy)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
+        # Add libnpy as a header-only library
+        add_library(libnpy INTERFACE)
+        target_include_directories(libnpy INTERFACE ${libnpy_SOURCE_DIR}/include)
+    endif()
+endif()
--- a/hailort/cmake/external/libusb.cmake
+++ b/hailort/cmake/external/libusb.cmake
@@ -1,256 +0,0 @@
-cmake_minimum_required(VERSION 3.11.0)
-
-include(FetchContent)
-
-FetchContent_Declare(
-    libusb
-    GIT_REPOSITORY https://github.com/libusb/libusb.git
-    GIT_TAG d52e355daa09f17ce64819122cb067b8a2ee0d4b # Version 1.0.27
-    GIT_SHALLOW TRUE
-    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/libusb-src
-    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/libusb-subbuild
-)
-
-# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-# Note this cmakeFile is taken from https://github.com/libusb/libusb-cmake and modified to work with our build system
-FetchContent_GetProperties(libusb)
-if(NOT libusb_POPULATED)
-    FetchContent_Populate(libusb)
-    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
-        set(LIBUSB_ROOT ${HAILO_EXTERNAL_DIR}/libusb-src/libusb/)
-
-        # Get the version information from version.h ignoring the nano version as it appears in version_nano.h and so we need it?
-        file(READ "${LIBUSB_ROOT}/version.h" VERSIONHEADERDATA)
-        string(REGEX MATCH "#define LIBUSB_MAJOR ([0-9]*)" _ ${VERSIONHEADERDATA})
-        set(LIBUSB_VERSION_MAJOR ${CMAKE_MATCH_1})
-        string(REGEX MATCH "#define LIBUSB_MINOR ([0-9]*)" _ ${VERSIONHEADERDATA})
-        set(LIBUSB_VERSION_MINOR ${CMAKE_MATCH_1})
-        string(REGEX MATCH "#define LIBUSB_MICRO ([0-9]*)" _ ${VERSIONHEADERDATA})
-        set(LIBUSB_VERSION_MICRO ${CMAKE_MATCH_1})
-        set(LIBUSB_VERSION "${LIBUSB_VERSION_MAJOR}.${LIBUSB_VERSION_MINOR}.${LIBUSB_VERSION_MICRO}")
-    
-        project(usb-1.0
-            DESCRIPTION "A cross-platform library to access USB devices"
-            VERSION ${LIBUSB_VERSION}
-            LANGUAGES C
-        )
-        if(EMSCRIPTEN)
-            set(CMAKE_CXX_STANDARD 20)
-            enable_language(CXX)
-        endif()
-    
-        # This function generates all the local variables what end up getting written to config.
-        # We use a function as any vars set in this context don't mess with the rest of the file.
-        # e.g. Logging LIBUSB_ENABLE_LOGGING mapps to ENABLE_LOGGING in the config, keeps it clean
-        function(generate_config_file)
-            include(CheckIncludeFiles)
-            include(CheckFunctionExists)
-            include(CheckSymbolExists)
-            include(CheckStructHasMember)
-            include(CheckCCompilerFlag)
-    
-            check_function_exists(clock_gettime             HAVE_CLOCK_GETTIME)
-            check_function_exists(pthread_condattr_setclock HAVE_PTHREAD_CONDATTR_SETCLOCK)
-            check_function_exists(pthread_setname_np        HAVE_PTHREAD_SETNAME_NP)
-            check_function_exists(pthread_threadid_np       HAVE_PTHREAD_THREADID_NP)
-            check_function_exists(eventfd                   HAVE_EVENTFD)
-            check_function_exists(pipe2                     HAVE_PIPE2)
-            check_function_exists(syslog                    HAVE_SYSLOG)
-    
-            check_include_files(asm/types.h      HAVE_ASM_TYPES_H)
-            check_include_files(sys/eventfd.h    HAVE_EVENTFD)
-            check_include_files(string.h         HAVE_STRING_H)
-            check_include_files(sys/time.h       HAVE_SYS_TIME_H)
-    
-            check_symbol_exists(timerfd_create  "sys/timerfd.h" HAVE_TIMERFD)
-            check_symbol_exists(nfds_t  "poll.h" HAVE_NFDS_T)
-    
-            check_struct_has_member("struct timespec" tv_sec time.h HAVE_STRUCT_TIMESPEC)
-    
-            if(HAVE_VISIBILITY)
-                set(DEFAULT_VISIBILITY "__attribute__((visibility(\"default\")))")
-            else()
-                set(DEFAULT_VISIBILITY "" )
-            endif()
-    
-            # Set vars that will be written into the config file.
-            if(WIN32)
-                set(PLATFORM_WINDOWS 1)
-            else()
-                set(PLATFORM_POSIX 1)
-            endif()
-    
-            if(LIBUSB_ENABLE_LOGGING)
-                set(ENABLE_LOGGING ${LIBUSB_ENABLE_LOGGING})
-            endif()
-            if(LIBUSB_ENABLE_DEBUG_LOGGING)
-                set(ENABLE_DEBUG_LOGGING ${LIBUSB_ENABLE_DEBUG_LOGGING})
-            endif()
-    
-            if(CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID STREQUAL "GNU")
-                check_c_compiler_flag("-fvisibility=hidden" HAVE_VISIBILITY)
-            endif()
-    
-            file(MAKE_DIRECTORY "${LIBUSB_GEN_INCLUDES}")
-            if(NOT MSVC)
-                set(_GNU_SOURCE TRUE)
-            endif()
-            configure_file("${HAILORT_LIBUSB_DIR}/config.h.in" "${LIBUSB_GEN_INCLUDES}/config.h" @ONLY)
-        endfunction()
-    
-        if(BUILD_SHARED_LIBS)
-            set(LIBUSB_BUILD_SHARED_LIBS_DEFAULT ON)
-        else()
-            set(LIBUSB_BUILD_SHARED_LIBS_DEFAULT OFF)
-        endif()
-    
-        option(LIBUSB_BUILD_SHARED_LIBS "Build Shared Libraries for libusb" ${LIBUSB_BUILD_SHARED_LIBS_DEFAULT})
-        option(LIBUSB_BUILD_TESTING "Build Tests" OFF)
-        if(LIBUSB_BUILD_TESTING)
-            enable_testing()
-        endif()
-    
-        option(LIBUSB_BUILD_EXAMPLES "Build Example Applications" OFF)
-    
-        option(LIBUSB_INSTALL_TARGETS "Install libusb targets" ON)
-        option(LIBUSB_TARGETS_INCLUDE_USING_SYSTEM "Make targets include paths System" ON)
-    
-        option(LIBUSB_ENABLE_LOGGING "Enable Logging" ON)
-        option(LIBUSB_ENABLE_DEBUG_LOGGING "Enable Debug Logging" OFF)
-        
-        # Dont use libudev on linux currently
-        if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-            option(LIBUSB_ENABLE_UDEV "Enable udev backend for device enumeration" OFF)
-        endif()
-    
-        set(LIBUSB_GEN_INCLUDES "${CMAKE_CURRENT_BINARY_DIR}/gen_include")
-        generate_config_file()
-    
-    if(LIBUSB_BUILD_SHARED_LIBS)
-        add_library(usb-1.0 SHARED)
-    else()
-        add_library(usb-1.0 STATIC)
-    endif()
-    
-    set_target_properties(usb-1.0 PROPERTIES
-        PREFIX lib # to be consistent with mainline libusb build system(s)
-    )
-    
-    # common sources
-    target_sources(usb-1.0 PRIVATE
-        "${LIBUSB_GEN_INCLUDES}/config.h"
-        "${LIBUSB_ROOT}/core.c"
-        "${LIBUSB_ROOT}/descriptor.c"
-        "${LIBUSB_ROOT}/hotplug.c"
-        "${LIBUSB_ROOT}/io.c"
-        "${LIBUSB_ROOT}/libusb.h"
-        "${LIBUSB_ROOT}/libusbi.h"
-        "${LIBUSB_ROOT}/strerror.c"
-        "${LIBUSB_ROOT}/sync.c"
-        "${LIBUSB_ROOT}/version.h"
-        "${LIBUSB_ROOT}/version_nano.h"
-    )
-    target_include_directories(usb-1.0
-        PRIVATE
-            "${LIBUSB_GEN_INCLUDES}"
-            "${LIBUSB_ROOT}/os"
-    )
-    
-    if (LIBUSB_TARGETS_INCLUDE_USING_SYSTEM)
-        target_include_directories(usb-1.0 SYSTEM PUBLIC "${LIBUSB_ROOT}")
-    else()
-        target_include_directories(usb-1.0 PUBLIC "${LIBUSB_ROOT}")
-    endif()
-    
-    if(WIN32)
-        target_sources(usb-1.0 PRIVATE
-            "${LIBUSB_ROOT}/libusb-1.0.def"
-            "${LIBUSB_ROOT}/os/events_windows.c"
-            "${LIBUSB_ROOT}/os/events_windows.h"
-            "${LIBUSB_ROOT}/os/threads_windows.c"
-            "${LIBUSB_ROOT}/os/threads_windows.h"
-            "${LIBUSB_ROOT}/os/windows_common.c"
-            "${LIBUSB_ROOT}/os/windows_common.h"
-            "${LIBUSB_ROOT}/os/windows_usbdk.c"
-            "${LIBUSB_ROOT}/os/windows_usbdk.h"
-            "${LIBUSB_ROOT}/os/windows_winusb.c"
-            "${LIBUSB_ROOT}/os/windows_winusb.h"
-            $<$<C_COMPILER_ID:MSVC>:${LIBUSB_ROOT}/libusb-1.0.rc>
-        )
-        target_compile_definitions(usb-1.0 PRIVATE $<$<C_COMPILER_ID:MSVC>:_CRT_SECURE_NO_WARNINGS=1>)
-        target_link_libraries(usb-1.0 PRIVATE windowsapp)
-    else()
-        # common POSIX/non-Windows sources
-        target_sources(usb-1.0 PRIVATE
-            "${LIBUSB_ROOT}/os/events_posix.c"
-            "${LIBUSB_ROOT}/os/events_posix.h"
-            "${LIBUSB_ROOT}/os/threads_posix.c"
-            "${LIBUSB_ROOT}/os/threads_posix.h"
-        )
-        if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-            target_sources(usb-1.0 PRIVATE
-                "${LIBUSB_ROOT}/os/linux_usbfs.c"
-                "${LIBUSB_ROOT}/os/linux_usbfs.h"
-            )
-            if(LIBUSB_ENABLE_UDEV)
-                target_sources(usb-1.0 PRIVATE
-                    "${LIBUSB_ROOT}/os/linux_udev.c"
-                )
-                target_link_libraries(usb-1.0 PRIVATE udev)
-                target_compile_definitions(usb-1.0 PRIVATE HAVE_LIBUDEV=1)
-            else()
-                target_sources(usb-1.0 PRIVATE
-                    "${LIBUSB_ROOT}/os/linux_netlink.c"
-                )
-            endif()
-            find_package(Threads REQUIRED)
-            target_link_libraries(usb-1.0 PRIVATE Threads::Threads)
-        elseif(ANDROID)
-            target_sources(usb-1.0 PRIVATE
-                "${LIBUSB_ROOT}/os/linux_netlink.c"
-                "${LIBUSB_ROOT}/os/linux_usbfs.c"
-                "${LIBUSB_ROOT}/os/linux_usbfs.h"
-            )
-            target_link_libraries(usb-1.0 PRIVATE android log)
-        elseif(APPLE)
-            target_sources(usb-1.0 PRIVATE
-                "${LIBUSB_ROOT}/os/darwin_usb.c"
-                "${LIBUSB_ROOT}/os/darwin_usb.h"
-            )
-            target_link_libraries(usb-1.0 PRIVATE
-                "-framework Foundation"
-                "-framework IOKit"
-                "-framework Security"
-            )
-        elseif(CMAKE_SYSTEM_NAME STREQUAL "NetBSD")
-            target_sources(usb-1.0 PRIVATE
-                "${LIBUSB_ROOT}/os/netbsd_usb.c"
-            )
-        elseif(CMAKE_SYSTEM_NAME STREQUAL "OpenBSD")
-            target_sources(usb-1.0 PRIVATE
-                "${LIBUSB_ROOT}/os/openbsd_usb.c"
-            )
-        elseif(EMSCRIPTEN)
-            target_sources(usb-1.0 PRIVATE
-                "${LIBUSB_ROOT}/os/emscripten_webusb.cpp"
-            )
-            target_compile_options(usb-1.0 PRIVATE -pthread)
-        else()
-            message(FATAL_ERROR "Unsupported target platform: ${CMAKE_SYSTEM_NAME}")
-        endif()
-    endif()
-    
-    if(LIBUSB_BUILD_TESTING)
-        add_subdirectory(tests)
-    endif()
-    
-    if(LIBUSB_BUILD_EXAMPLES)
-        add_subdirectory(examples)
-    endif()
-    
-    if(LIBUSB_INSTALL_TARGETS)
-        install(TARGETS usb-1.0)
-        install(FILES "${LIBUSB_ROOT}/libusb.h" DESTINATION "include/libusb-1.0")
-    endif()
-    endif()
-endif()
--- a/hailort/cmake/external/tokenizers.cmake
+++ b/hailort/cmake/external/tokenizers.cmake
@@ -0,0 +1,72 @@
+cmake_minimum_required(VERSION 3.14)
+
+include(FetchContent)
+
+FetchContent_Declare(
+    tokenizers
+    GIT_REPOSITORY https://github.com/mlc-ai/tokenizers-cpp.git
+    GIT_TAG 125d072f52290fa6d2944b3d72ccc937786ec631 # disable-sentencepiece
+    # GIT_SHALLOW TRUE
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/tokenizers-src
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/tokenizers-subbuild
+)
+
+# https://stackoverflow.com/questions/61499646/cmake-set-variable-readonly-protect-from-override
+macro(set_readonly VAR)
+  # Set the variable itself
+  set("${VAR}" "${ARGN}")
+  # Store the variable's value for restore it upon modifications.
+  set("_${VAR}_readonly_val" "${ARGN}")
+  # Register a watcher for a variable
+  variable_watch("${VAR}" readonly_guard)
+endmacro()
+
+# Watcher for a variable which emulates readonly property.
+macro(readonly_guard VAR access value current_list_file stack)
+  if ("${access}" STREQUAL "MODIFIED_ACCESS")
+    message(WARNING "Attempt to change readonly variable '${VAR}'!")
+    # Restore a value of the variable to the initial one.
+    set(${VAR} "${_${VAR}_readonly_val}")
+  endif()
+endmacro()
+
+# On kirkstone-builds we have an issue with compiling tokenizers_cpp, so we support getting .a path
+option(TOKENIZERS_LIB_PATH "Path to tokenizers_cpp library" "")
+option(TOKENIZERS_RUST_LIB_PATH "Path to tokenizers_cpp rust library" "")
+option(TOKENIZERS_INCLUDE_DIR "Path to include dir of tokenizers_cpp" "")
+if (TOKENIZERS_LIB_PATH AND TOKENIZERS_RUST_LIB_PATH AND TOKENIZERS_INCLUDE_DIR)
+  message(STATUS "Will link against given tokenizers: ${TOKENIZERS_LIB_PATH}")
+  message(STATUS "Will link against given tokenizers rust: ${TOKENIZERS_RUST_LIB_PATH}")
+  message(STATUS "Will include given include dir:     ${TOKENIZERS_INCLUDE_DIR}")
+
+  # Create an imported target for the static library
+  add_library(tokenizers_cpp STATIC IMPORTED)
+
+  # Set the properties of the imported library
+  set_target_properties(tokenizers_cpp PROPERTIES
+      IMPORTED_LOCATION ${TOKENIZERS_LIB_PATH}
+      INTERFACE_INCLUDE_DIRECTORIES ${TOKENIZERS_INCLUDE_DIR}
+  )
+
+  target_link_libraries(tokenizers_cpp INTERFACE ${TOKENIZERS_RUST_LIB_PATH} dl)
+else()
+  # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+  FetchContent_GetProperties(tokenizers)
+  if(NOT tokenizers_POPULATED)
+      FetchContent_Populate(tokenizers)
+      if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+          set_readonly(TOKENIZERS_CPP_CARGO_TARGET x86_64-unknown-linux-gnu)
+      elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+          set_readonly(TOKENIZERS_CPP_CARGO_TARGET aarch64-unknown-linux-gnu)
+      endif()
+      set(MLC_ENABLE_SENTENCEPIECE_TOKENIZER OFF) # Disable sentencepiece for reducing binary size
+      if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
+          # This step requires cargo to be installed
+          find_program(CARGO_EXECUTABLE cargo)
+          if (NOT CARGO_EXECUTABLE)
+              message(FATAL_ERROR "Cargo is not installed or not found in PATH.")
+          endif()
+          add_subdirectory(${tokenizers_SOURCE_DIR} ${tokenizers_BINARY_DIR} EXCLUDE_FROM_ALL)
+      endif()
+  endif()
+endif()
--- a/hailort/common/async_thread.hpp
+++ b/hailort/common/async_thread.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/barrier.cpp
+++ b/hailort/common/barrier.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/barrier.hpp
+++ b/hailort/common/barrier.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/buffer_pool.cpp
+++ b/hailort/common/buffer_pool.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file buffer_pool.cpp
 * @brief Buffer pool implementation
@@ -21,6 +21,33 @@ BasicBufferPool::BasicBufferPool(size_t buffer_size, std::vector<BufferPtr> &&bu
    m_free_buffers_queue(std::move(free_buffers_queue))
 {}

+Expected<BasicBufferPoolPtr> BasicBufferPool::create_shared(size_t buffer_size, size_t buffer_count,
+    std::function<Expected<Buffer>(size_t)> allocate_func)
+{
+    TRY(auto shutdown_event, Event::create_shared(Event::State::not_signalled));
+    TRY(auto free_buffers_queue, SpscQueue<BufferPtr>::create(buffer_count, shutdown_event, DEFAULT_TRANSFER_TIMEOUT));
+
+    std::vector<BufferPtr> buffers;
+    buffers.reserve(buffer_count);
+
+    for (size_t i = 0; i < buffer_count; i++) {
+        TRY(auto buffer, allocate_func(buffer_size));
+        
+        auto buffer_ptr = make_shared_nothrow<Buffer>(std::move(buffer));
+        CHECK_NOT_NULL(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+        auto status = free_buffers_queue.enqueue(buffer_ptr);
+        CHECK_SUCCESS(status);
+
+        buffers.emplace_back(buffer_ptr);
+    }
+
+    auto buffer_pool = make_shared_nothrow<BasicBufferPool>(buffer_size, std::move(buffers), std::move(free_buffers_queue), buffer_count);
+    CHECK_NOT_NULL(buffer_pool, HAILO_OUT_OF_HOST_MEMORY);
+
+    return buffer_pool;
+}
+
 Expected<BufferPtr> BasicBufferPool::acquire_buffer()
 {
    TRY_WITH_ACCEPTABLE_STATUS(HAILO_SHUTDOWN_EVENT_SIGNALED, auto buffer,
--- a/hailort/common/buffer_pool.hpp
+++ b/hailort/common/buffer_pool.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file buffer_pool.hpp
 * @brief Buffer pool
@@ -21,6 +21,8 @@

 namespace hailort
 {
+class BasicBufferPool;
+using BasicBufferPoolPtr = std::shared_ptr<BasicBufferPool>;

 // TODO: HRT-12690 - Make other buffer pools to use this as base class
 class BasicBufferPool
@@ -28,6 +30,8 @@ class BasicBufferPool
 public:
    BasicBufferPool(size_t buffer_size, std::vector<BufferPtr> &&buffers,
        SpscQueue<BufferPtr> &&m_free_buffers_queue, size_t buffers_count);
+    static Expected<BasicBufferPoolPtr> create_shared(size_t buffer_size, size_t buffer_count,
+        std::function<Expected<Buffer>(size_t)> allocate_func);

    BasicBufferPool(BasicBufferPool &&) = delete;
    BasicBufferPool(const BasicBufferPool &) = delete;
@@ -48,7 +52,82 @@ private:
    SpscQueue<BufferPtr> m_free_buffers_queue;
    std::mutex m_mutex;
 };
-using BasicBufferPoolPtr = std::shared_ptr<BasicBufferPool>;
+
+template<typename T>
+class ObjectPool
+{
+public:
+    static Expected<std::shared_ptr<ObjectPool<T>>> create_shared(size_t count, std::function<Expected<T>()> create_object_func)
+    {
+        TRY(auto shutdown_event, Event::create_shared(Event::State::not_signalled));
+        TRY(auto free_objects_queue, SpscQueue<std::shared_ptr<T>>::create(count, shutdown_event, DEFAULT_TRANSFER_TIMEOUT));
+
+        std::vector<std::shared_ptr<T>> objects;
+        objects.reserve(count);
+
+        for (size_t i = 0; i < count; i++) {
+            TRY(auto object, create_object_func());
+
+            auto object_ptr = make_shared_nothrow<T>(std::move(object));
+            CHECK_NOT_NULL(object_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+            auto status = free_objects_queue.enqueue(object_ptr);
+            CHECK_SUCCESS(status);
+
+            objects.emplace_back(object_ptr);
+        }
+
+        auto object_pool = make_shared_nothrow<ObjectPool<T>>(std::move(objects), std::move(free_objects_queue), count);
+        CHECK_NOT_NULL(object_pool, HAILO_OUT_OF_HOST_MEMORY);
+
+        return object_pool;
+    }
+
+    ObjectPool(std::vector<std::shared_ptr<T>> &&objects, SpscQueue<std::shared_ptr<T>> &&free_objects_queue,
+        size_t objects_count) :
+        m_objects_count(objects_count),
+        m_objects(std::move(objects)),
+        m_free_objects_queue(std::move(free_objects_queue))
+    {}
+
+    ObjectPool(ObjectPool &&) = delete;
+    ObjectPool(const ObjectPool &) = delete;
+    ObjectPool &operator=(ObjectPool &&) = delete;
+    ObjectPool &operator=(const ObjectPool &) = delete;
+    virtual ~ObjectPool() = default;
+
+    Expected<std::shared_ptr<T>> acquire()
+    {
+        TRY_WITH_ACCEPTABLE_STATUS(HAILO_SHUTDOWN_EVENT_SIGNALED, auto object,
+            m_free_objects_queue.dequeue(DEFAULT_TRANSFER_TIMEOUT));
+        return object;
+    }
+
+    hailo_status return_to_pool(std::shared_ptr<T> object)
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        auto status = m_free_objects_queue.enqueue(object);
+        CHECK_SUCCESS(status);
+
+        return HAILO_SUCCESS;
+    }
+
+    size_t count() const
+    {
+        return m_objects_count;
+    }
+
+    size_t current_count() const
+    {
+        return m_free_objects_queue.size_approx();
+    }
+
+private:
+    const size_t m_objects_count;
+    std::vector<std::shared_ptr<T>> m_objects;
+    SpscQueue<std::shared_ptr<T>> m_free_objects_queue;
+    std::mutex m_mutex;
+};

 // TODO: HRT-12690 - DMA buffer pool is also used in the service - code duplication
 class DmaAbleBufferPool : public BasicBufferPool
--- a/hailort/common/circular_buffer.hpp
+++ b/hailort/common/circular_buffer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/compiler_extensions_compat.hpp
+++ b/hailort/common/compiler_extensions_compat.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/device_measurements.cpp
+++ b/hailort/common/device_measurements.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
@@ -11,6 +11,8 @@
 #include "common/device_measurements.hpp"
 #include "common/utils.hpp"

+#include <algorithm>
+
 using namespace hailort;

 constexpr std::chrono::milliseconds DEFAULT_MEASUREMENTS_INTERVAL(100);
@@ -82,10 +84,10 @@ hailo_status TemperatureMeasurement::start_measurement()
                break;
            }

-            float32_t ts_avg = ((temp_info->ts0_temperature + temp_info->ts1_temperature) / 2);
+            float32_t ts_max = std::max(temp_info->ts0_temperature, temp_info->ts1_temperature);
            {
                std::unique_lock<std::mutex> lock(m_mutex);
-                m_acc->add_data_point(ts_avg, temp_info->sample_count);
+                m_acc->add_data_point(ts_max, temp_info->sample_count);
            }
            
            std::this_thread::sleep_for(DEFAULT_MEASUREMENTS_INTERVAL); 
--- a/hailort/common/device_measurements.hpp
+++ b/hailort/common/device_measurements.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/env_vars.hpp
+++ b/hailort/common/env_vars.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
@@ -20,6 +20,7 @@ namespace hailort

 #define SCHEDULER_MON_ENV_VAR ("HAILO_MONITOR")
 #define SCHEDULER_MON_ENV_VAR_VALUE ("1")
+#define SCHEDULER_MON_TIME_INTERVAL_IN_MILLISECONDS_ENV_VAR ("HAILO_MONITOR_TIME_INTERVAL")

 #define TRACE_ENV_VAR ("HAILO_TRACE")
 #define TRACE_ENV_VAR_VALUE ("scheduler")
--- a/hailort/common/ethernet_utils.hpp
+++ b/hailort/common/ethernet_utils.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/event_internal.cpp
+++ b/hailort/common/event_internal.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file event_internal.cpp
 * @brief Internal implementation for events, shared between all os.
--- a/hailort/common/event_internal.hpp
+++ b/hailort/common/event_internal.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/file_descriptor.hpp
+++ b/hailort/common/file_descriptor.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/file_utils.cpp
+++ b/hailort/common/file_utils.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/file_utils.hpp
+++ b/hailort/common/file_utils.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/filesystem.hpp
+++ b/hailort/common/filesystem.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/fork_support.cpp
+++ b/hailort/common/fork_support.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file fork_support.cpp
 **/
--- a/hailort/common/fork_support.hpp
+++ b/hailort/common/fork_support.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file fork_support.hpp
 * @brief Utilities/classes uses to support fork in the process.
--- a/hailort/common/genai/serializer/genai_rpc.hpp
+++ b/hailort/common/genai/serializer/genai_rpc.hpp
@@ -0,0 +1,132 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file genai_rpc.hpp
+ * @brief HailoRT-GenAI protocol decleration
+ **/
+
+#ifndef _HAILO_COMMON_GENAI_RPC_HPP_
+#define _HAILO_COMMON_GENAI_RPC_HPP_
+
+namespace hailort
+{
+namespace genai
+{
+
+static const uint32_t MAX_STRING_SIZE = 128;
+
+#pragma pack(push, 1)
+struct LLM_Create_Request {
+    char lora_name[MAX_STRING_SIZE];
+    size_t lora_name_length;
+    bool is_builtin; // If builtin, the next message is the HEF raw buffers
+
+    char group_id[MAX_STRING_SIZE]; // We need 'hailo_vdevice_params_t', but only group-id is relevant
+    size_t group_id_length;
+};
+
+struct LLM_Create_Reply {
+    hailo_status status;
+};
+
+struct LLM_Get_Generator_Default_Params_Request {
+    uint8_t placeholder;
+};
+
+struct LLM_Get_Generator_Default_Params_Reply {
+    float32_t temperature;
+    float32_t top_p;
+    uint32_t top_k;
+    float32_t frequency_penalty;
+    uint32_t max_generated_tokens;
+    bool do_sample;
+    uint32_t seed;
+    hailo_status status;
+};
+
+struct LLM_Generator_Create_Request {
+    float temperature;
+    float top_p;
+    uint32_t top_k;
+    float32_t frequency_penalty;
+    uint32_t max_generated_tokens;
+    bool do_sample;
+    uint32_t seed;
+};
+
+struct LLM_Generator_Create_Reply {
+    hailo_status status;
+};
+
+struct LLM_Generator_Write_Request {
+    // Indicates that the next message to the server is the input prompt
+    uint8_t placeholder;
+};
+
+struct LLM_Generator_Write_Reply {
+    hailo_status status;
+};
+
+struct LLM_Generator_Generate_Request {
+    // Indicates that the server should start generating text
+    uint8_t placeholder;
+};
+
+struct LLM_Generator_Generate_Reply {
+    hailo_status status;
+};
+
+struct LLM_Generator_Read_Request {
+    // Indicates that the server should write back the next generated token
+    uint8_t placeholder;
+};
+
+struct LLM_Generator_Read_Reply {
+    hailo_status status;
+    char output_token[MAX_STRING_SIZE];
+    size_t output_token_length;
+    uint32_t generation_status;
+};
+
+enum class HailoGenAIActionID {
+    LLM__CREATE = 0,
+    LLM__GET_DEFAULT_GENERATOR_PARAMS,
+    LLM__GENERATOR_CREATE,
+    LLM__GENERATOR_WRITE,
+    LLM__GENERATOR_GENERATE,
+    LLM__GENERATOR_READ,
+
+    MAX_VALUE = HAILO_MAX_ENUM,
+};
+
+struct GenAIRequest {
+    HailoGenAIActionID type;
+    union {
+        LLM_Create_Request llm_create;
+        LLM_Get_Generator_Default_Params_Request llm_get_default_generator_params;
+        LLM_Generator_Create_Request llm_generator_create;
+        LLM_Generator_Write_Request llm_generator_write;
+        LLM_Generator_Generate_Request llm_generator_generate;
+        LLM_Generator_Read_Request llm_generator_read;
+    } data;
+};
+
+struct GenAIReply {
+    HailoGenAIActionID type;
+    union {
+        LLM_Create_Reply llm_create;
+        LLM_Get_Generator_Default_Params_Reply llm_get_default_generator_params;
+        LLM_Generator_Create_Reply llm_generator_create;
+        LLM_Generator_Write_Reply llm_generator_write;
+        LLM_Generator_Generate_Reply llm_generator_generate;
+        LLM_Generator_Read_Reply llm_generator_read;
+    } data;
+};
+#pragma pack(pop)
+
+} // namespace genai
+} // namespace hailort
+
+#endif /* _HAILO_COMMON_GENAI_RPC_HPP_ */
--- a/hailort/common/genai/serializer/serializer.cpp
+++ b/hailort/common/genai/serializer/serializer.cpp
@@ -0,0 +1,295 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file serializer.cpp
+ * @brief HailoRT-GenAI protocol serialization implementation
+ **/
+
+#include "hailo/genai/common.hpp"
+#include "hailo/genai/llm/llm.hpp"
+
+#include "serializer.hpp"
+#include "hailo/buffer.hpp"
+#include "hailo/hailort.h"
+#include "hailo/hailort_common.hpp"
+#include "common/utils.hpp"
+
+namespace hailort
+{
+namespace genai
+{
+
+Expected<Buffer> LLMCreateSerializer::serialize_request(const hailo_vdevice_params_t &vdevice_params, const LLMParams &llm_params)
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIRequest), BufferStorageParams::create_dma()));
+    GenAIRequest *request = buffer.as_pointer<GenAIRequest>();
+
+    request->type = HailoGenAIActionID::LLM__CREATE;
+    auto lora = llm_params.lora();
+    std::copy(lora.begin(), lora.end(), request->data.llm_create.lora_name);
+    request->data.llm_create.lora_name_length = llm_params.lora().size();
+    request->data.llm_create.is_builtin = (llm_params.hef() == BUILTIN);
+
+    std::string group_id = (nullptr == vdevice_params.group_id) ? "" :
+        std::string(vdevice_params.group_id);
+    std::copy(group_id.begin(), group_id.end(), request->data.llm_create.group_id);
+    request->data.llm_create.group_id_length = group_id.size();
+
+    return buffer;
+}
+
+Expected<std::tuple<std::string, bool, std::string>> LLMCreateSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    const GenAIRequest *request = serialized_request.as_pointer<GenAIRequest>();
+    CHECK(request->type == HailoGenAIActionID::LLM__CREATE, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__CREATE), static_cast<int>(request->type));
+
+    std::string group_id = (0 == request->data.llm_create.group_id_length) ? "" :
+        std::string(request->data.llm_create.group_id, request->data.llm_create.group_id_length);
+    return std::tuple<std::string, bool, std::string>(std::string(request->data.llm_create.lora_name, request->data.llm_create.lora_name_length),
+        request->data.llm_create.is_builtin, group_id);
+}
+
+Expected<Buffer> LLMCreateSerializer::serialize_reply(hailo_status status)
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIReply), BufferStorageParams::create_dma()));
+    GenAIReply *reply = buffer.as_pointer<GenAIReply>();
+
+    reply->type = HailoGenAIActionID::LLM__CREATE;
+    reply->data.llm_create.status = status;
+
+    return buffer;
+}
+
+hailo_status LLMCreateSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    const GenAIReply *reply = serialized_reply.as_pointer<GenAIReply>();
+    CHECK(reply->type == HailoGenAIActionID::LLM__CREATE, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__CREATE), static_cast<int>(reply->type));
+    
+    return reply->data.llm_create.status;
+}
+
+Expected<Buffer> LLMGetDefaultGeneratorParamsSerializer::serialize_request()
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIRequest), BufferStorageParams::create_dma()));
+    GenAIRequest *request = buffer.as_pointer<GenAIRequest>();
+
+    request->type = HailoGenAIActionID::LLM__GET_DEFAULT_GENERATOR_PARAMS;
+
+    return buffer;
+}
+
+hailo_status LLMGetDefaultGeneratorParamsSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    const GenAIRequest *request = serialized_request.as_pointer<GenAIRequest>();
+    CHECK(request->type == HailoGenAIActionID::LLM__GET_DEFAULT_GENERATOR_PARAMS, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GET_DEFAULT_GENERATOR_PARAMS), static_cast<int>(request->type));
+    return HAILO_SUCCESS;
+}
+
+Expected<Buffer> LLMGetDefaultGeneratorParamsSerializer::serialize_reply(const LLMGeneratorParams &default_generator_params, hailo_status status)
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIReply), BufferStorageParams::create_dma()));
+    GenAIReply *reply = buffer.as_pointer<GenAIReply>();
+
+    reply->type = HailoGenAIActionID::LLM__GET_DEFAULT_GENERATOR_PARAMS;
+    reply->data.llm_get_default_generator_params.status = status;
+    reply->data.llm_get_default_generator_params.temperature = default_generator_params.temperature();
+    reply->data.llm_get_default_generator_params.top_p = default_generator_params.top_p();
+    reply->data.llm_get_default_generator_params.top_k = default_generator_params.top_k();
+    reply->data.llm_get_default_generator_params.frequency_penalty = default_generator_params.frequency_penalty();
+    reply->data.llm_get_default_generator_params.max_generated_tokens = default_generator_params.max_generated_tokens();
+    reply->data.llm_get_default_generator_params.do_sample = default_generator_params.do_sample();
+    reply->data.llm_get_default_generator_params.seed = default_generator_params.seed();
+
+    return buffer;
+}
+
+Expected<LLMGeneratorParams> LLMGetDefaultGeneratorParamsSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    const GenAIReply *reply = serialized_reply.as_pointer<GenAIReply>();
+    CHECK(reply->type == HailoGenAIActionID::LLM__GET_DEFAULT_GENERATOR_PARAMS, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GET_DEFAULT_GENERATOR_PARAMS), static_cast<int>(reply->type));
+    CHECK_SUCCESS(reply->data.llm_get_default_generator_params.status, "Failed to get default generator params");
+
+    LLMGeneratorParams res(reply->data.llm_get_default_generator_params.temperature, reply->data.llm_get_default_generator_params.top_p,
+        reply->data.llm_get_default_generator_params.top_k, reply->data.llm_get_default_generator_params.frequency_penalty,
+        reply->data.llm_get_default_generator_params.max_generated_tokens, reply->data.llm_get_default_generator_params.do_sample,
+        reply->data.llm_get_default_generator_params.seed);
+
+    return res;
+}
+
+Expected<Buffer> LLMGeneratorCreateSerializer::serialize_request(const LLMGeneratorParams &params)
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIRequest), BufferStorageParams::create_dma()));
+    GenAIRequest *request = buffer.as_pointer<GenAIRequest>();
+
+    request->type = HailoGenAIActionID::LLM__GENERATOR_CREATE;
+
+    request->data.llm_generator_create.temperature = params.temperature();
+    request->data.llm_generator_create.top_p = params.top_p();
+    request->data.llm_generator_create.top_k = params.top_k();
+    request->data.llm_generator_create.frequency_penalty = params.frequency_penalty();
+    request->data.llm_generator_create.max_generated_tokens = params.max_generated_tokens();
+    request->data.llm_generator_create.do_sample = params.do_sample();
+    request->data.llm_generator_create.seed = params.seed();
+
+    return buffer;
+}
+
+Expected<LLMGeneratorParams> LLMGeneratorCreateSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+     const GenAIRequest *request = serialized_request.as_pointer<GenAIRequest>();
+    CHECK(request->type == HailoGenAIActionID::LLM__GENERATOR_CREATE, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GENERATOR_CREATE), static_cast<int>(request->type));
+ 
+    LLMGeneratorParams res(request->data.llm_generator_create.temperature, request->data.llm_generator_create.top_p,
+        request->data.llm_generator_create.top_k, request->data.llm_generator_create.frequency_penalty,
+        request->data.llm_generator_create.max_generated_tokens, request->data.llm_generator_create.do_sample,
+        request->data.llm_generator_create.seed);
+
+    return res;
+}
+
+Expected<Buffer> LLMGeneratorCreateSerializer::serialize_reply(hailo_status status)
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIReply), BufferStorageParams::create_dma()));
+    GenAIReply *reply = buffer.as_pointer<GenAIReply>();
+
+    reply->type = HailoGenAIActionID::LLM__GENERATOR_CREATE;
+    reply->data.llm_generator_create.status = status;
+
+    return buffer;
+}
+
+hailo_status LLMGeneratorCreateSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    const GenAIReply *reply = serialized_reply.as_pointer<GenAIReply>();
+    CHECK(reply->type == HailoGenAIActionID::LLM__GENERATOR_CREATE, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GENERATOR_CREATE), static_cast<int>(reply->type));
+    return reply->data.llm_generator_create.status;
+}
+
+Expected<Buffer> LLMGeneratorWriteSerializer::serialize_request()
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIRequest), BufferStorageParams::create_dma()));
+    GenAIRequest *request = buffer.as_pointer<GenAIRequest>();
+
+    request->type = HailoGenAIActionID::LLM__GENERATOR_WRITE;
+
+    return buffer;
+}
+
+hailo_status LLMGeneratorWriteSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    const GenAIRequest *request = serialized_request.as_pointer<GenAIRequest>();
+    CHECK(request->type == HailoGenAIActionID::LLM__GENERATOR_WRITE, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GENERATOR_WRITE), static_cast<int>(request->type));
+    return HAILO_SUCCESS;
+}
+
+Expected<Buffer> LLMGeneratorWriteSerializer::serialize_reply(hailo_status status)
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIReply), BufferStorageParams::create_dma()));
+    GenAIReply *reply = buffer.as_pointer<GenAIReply>();
+
+    reply->type = HailoGenAIActionID::LLM__GENERATOR_WRITE;
+    reply->data.llm_generator_write.status = status;
+
+    return buffer;
+}
+
+hailo_status LLMGeneratorWriteSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    const GenAIReply *reply = serialized_reply.as_pointer<GenAIReply>();
+    CHECK(reply->type == HailoGenAIActionID::LLM__GENERATOR_WRITE, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GENERATOR_WRITE), static_cast<int>(reply->type));
+    return reply->data.llm_generator_write.status;
+}
+
+Expected<Buffer> LLMGeneratorGenerateSerializer::serialize_request()
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIRequest), BufferStorageParams::create_dma()));
+    GenAIRequest *request = buffer.as_pointer<GenAIRequest>();
+
+    request->type = HailoGenAIActionID::LLM__GENERATOR_GENERATE;
+
+    return buffer;
+}
+
+hailo_status LLMGeneratorGenerateSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    const GenAIRequest *request = serialized_request.as_pointer<GenAIRequest>();
+    CHECK(request->type == HailoGenAIActionID::LLM__GENERATOR_GENERATE, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GENERATOR_GENERATE), static_cast<int>(request->type));
+    return HAILO_SUCCESS;
+}
+
+Expected<Buffer> LLMGeneratorGenerateSerializer::serialize_reply(hailo_status status)
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIReply), BufferStorageParams::create_dma()));
+    GenAIReply *reply = buffer.as_pointer<GenAIReply>();
+
+    reply->type = HailoGenAIActionID::LLM__GENERATOR_GENERATE;
+    reply->data.llm_generator_generate.status = status;
+
+    return buffer;
+}
+
+hailo_status LLMGeneratorGenerateSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    const GenAIReply *reply = serialized_reply.as_pointer<GenAIReply>();
+    CHECK(reply->type == HailoGenAIActionID::LLM__GENERATOR_GENERATE, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GENERATOR_GENERATE), static_cast<int>(reply->type));
+    return reply->data.llm_generator_generate.status;
+}
+
+Expected<Buffer> LLMGeneratorReadSerializer::serialize_request()
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIRequest), BufferStorageParams::create_dma()));
+    GenAIRequest *request = buffer.as_pointer<GenAIRequest>();
+
+    request->type = HailoGenAIActionID::LLM__GENERATOR_READ;
+
+    return buffer;
+}
+
+hailo_status LLMGeneratorReadSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    const GenAIRequest *request = serialized_request.as_pointer<GenAIRequest>();
+    CHECK(request->type == HailoGenAIActionID::LLM__GENERATOR_READ, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GENERATOR_READ), static_cast<int>(request->type));
+    return HAILO_SUCCESS;
+}
+
+Expected<Buffer> LLMGeneratorReadSerializer::serialize_reply(hailo_status status, const std::string &output, LLMGeneratorCompletion::Status generation_status)
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIReply), BufferStorageParams::create_dma()));
+    GenAIReply *reply = buffer.as_pointer<GenAIReply>();
+
+    reply->type = HailoGenAIActionID::LLM__GENERATOR_READ;
+    reply->data.llm_generator_read.status = status;
+    reply->data.llm_generator_read.output_token_length = output.size();
+    std::copy(output.begin(), output.end(), reply->data.llm_generator_read.output_token);
+    reply->data.llm_generator_read.generation_status = static_cast<uint32_t>(generation_status);
+
+    return buffer;
+}
+
+Expected<std::pair<std::string, LLMGeneratorCompletion::Status>> LLMGeneratorReadSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    const GenAIReply *reply = serialized_reply.as_pointer<GenAIReply>();
+    CHECK(reply->type == HailoGenAIActionID::LLM__GENERATOR_READ, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GENERATOR_READ), static_cast<int>(reply->type));
+    CHECK_SUCCESS(reply->data.llm_generator_read.status);
+
+    return std::make_pair(std::string(reply->data.llm_generator_read.output_token, reply->data.llm_generator_read.output_token_length),
+        static_cast<LLMGeneratorCompletion::Status>(reply->data.llm_generator_read.generation_status));
+}
+
+} /* namespace genai */
+} /* namespace hailort */
--- a/hailort/common/genai/serializer/serializer.hpp
+++ b/hailort/common/genai/serializer/serializer.hpp
@@ -0,0 +1,113 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file serializer.hpp
+ * @brief HailoRT-GenAI protocol serialization
+ **/
+
+#ifndef _HAILO_SERIALIZER_HPP_
+#define _HAILO_SERIALIZER_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/buffer.hpp"
+#include "hailo/expected.hpp"
+#include "common/utils.hpp"
+
+#include "hailo/genai/llm/llm.hpp"
+
+#include "genai_rpc.hpp"
+
+namespace hailort
+{
+namespace genai
+{
+
+// TODO: HRT-15919 - Text2Image Serialization
+#pragma pack(push, 1)
+typedef struct {
+    uint32_t steps_count;
+    uint32_t samples_count;
+    float32_t guidance_scale;
+    uint32_t seed;
+} text2image_generator_params_t;
+
+typedef struct {
+    bool has_negative_prompt;
+    bool has_ip_adapter;
+} text2image_generation_info_t;
+#pragma pack(pop)
+
+struct LLMCreateSerializer
+{
+    LLMCreateSerializer() = delete;
+
+    static Expected<Buffer> serialize_request(const hailo_vdevice_params_t &vdevice_params, const LLMParams &llm_params);
+    static Expected<std::tuple<std::string, bool, std::string>> deserialize_request(const MemoryView &serialized_request); // string - lora_name, bool - model_is_builtin
+
+    static Expected<Buffer> serialize_reply(hailo_status status);
+    static hailo_status deserialize_reply(const MemoryView &serialized_reply);
+};
+
+struct LLMGetDefaultGeneratorParamsSerializer
+{
+    LLMGetDefaultGeneratorParamsSerializer() = delete;
+
+    static Expected<Buffer> serialize_request();
+    static hailo_status deserialize_request(const MemoryView &serialized_request);
+
+    static Expected<Buffer> serialize_reply(const LLMGeneratorParams &default_generator_params, hailo_status status);
+    static Expected<LLMGeneratorParams> deserialize_reply(const MemoryView &serialized_reply);
+};
+
+struct LLMGeneratorCreateSerializer
+{
+    LLMGeneratorCreateSerializer() = delete;
+
+    static Expected<Buffer> serialize_request(const LLMGeneratorParams &params);
+    static Expected<LLMGeneratorParams> deserialize_request(const MemoryView &serialized_request);
+
+    static Expected<Buffer> serialize_reply(hailo_status status);
+    static hailo_status deserialize_reply(const MemoryView &serialized_reply);
+};
+
+struct LLMGeneratorWriteSerializer
+{
+    LLMGeneratorWriteSerializer() = delete;
+
+    static Expected<Buffer> serialize_request();
+    static hailo_status deserialize_request(const MemoryView &serialized_request);
+
+    static Expected<Buffer> serialize_reply(hailo_status status);
+    static hailo_status deserialize_reply(const MemoryView &serialized_reply);
+};
+
+struct LLMGeneratorGenerateSerializer
+{
+    LLMGeneratorGenerateSerializer() = delete;
+
+    static Expected<Buffer> serialize_request();
+    static hailo_status deserialize_request(const MemoryView &serialized_request);
+
+    static Expected<Buffer> serialize_reply(hailo_status status);
+    static hailo_status deserialize_reply(const MemoryView &serialized_reply);
+};
+
+struct LLMGeneratorReadSerializer
+{
+    LLMGeneratorReadSerializer() = delete;
+
+    static Expected<Buffer> serialize_request();
+    static hailo_status deserialize_request(const MemoryView &serialized_request);
+
+    static Expected<Buffer> serialize_reply(hailo_status status, const std::string &output = "",
+        LLMGeneratorCompletion::Status generation_status = LLMGeneratorCompletion::Status::GENERATING);
+    static Expected<std::pair<std::string, LLMGeneratorCompletion::Status>> deserialize_reply(const MemoryView &serialized_reply);
+};
+
+} /* namespace genai */
+
+} /* namespace hailort */
+
+#endif /* _HAILO_SERIALIZER_HPP_ */
--- a/hailort/common/genai/session_wrapper/session_wrapper.hpp
+++ b/hailort/common/genai/session_wrapper/session_wrapper.hpp
@@ -0,0 +1,78 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file session_wrapper.hpp
+ * @brief a wrapper for session
+ **/
+
+#ifndef _HAILO_COMMON_GENAI_SESSION_WRAPPER_HPP_
+#define _HAILO_COMMON_GENAI_SESSION_WRAPPER_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/buffer.hpp"
+#include "hailo/hailo_session.hpp"
+#include "common/utils.hpp"
+
+#include "common/genai/serializer/serializer.hpp"
+
+namespace hailort
+{
+namespace genai
+{
+
+class SessionWrapper final
+{
+public:
+    SessionWrapper(std::shared_ptr<Session> session) : m_session(session) {}
+    ~SessionWrapper() = default;
+
+    Expected<std::shared_ptr<Buffer>> read(std::chrono::milliseconds timeout = Session::DEFAULT_READ_TIMEOUT)
+    {
+        TimeoutGuard timeout_guard(timeout);
+        size_t size_to_read = 0;
+        CHECK_SUCCESS_AS_EXPECTED(m_session->read(reinterpret_cast<uint8_t*>(&size_to_read),
+            sizeof(size_to_read), timeout_guard.get_remaining_timeout()));
+
+        TRY(auto buffer, Buffer::create_shared(size_to_read, BufferStorageParams::create_dma()));
+        CHECK_SUCCESS(m_session->read(buffer->data(), size_to_read, timeout_guard.get_remaining_timeout()));
+
+        return buffer;
+    }
+
+    Expected<size_t> read(MemoryView buffer, std::chrono::milliseconds timeout = Session::DEFAULT_READ_TIMEOUT)
+    {
+        TimeoutGuard timeout_guard(timeout);
+        size_t size_to_read = 0;
+        CHECK_SUCCESS_AS_EXPECTED(m_session->read(reinterpret_cast<uint8_t*>(&size_to_read),
+            sizeof(size_to_read), timeout_guard.get_remaining_timeout()));
+
+        CHECK(size_to_read <= buffer.size(), HAILO_INVALID_OPERATION,
+            "Read buffer is smaller then necessary. Buffer size = {}, generation size = {}",
+            buffer.size(), size_to_read);
+
+        CHECK_SUCCESS(m_session->read(buffer.data(), size_to_read, timeout_guard.get_remaining_timeout()));
+        return size_to_read;
+    }
+
+    hailo_status write(MemoryView buffer, std::chrono::milliseconds timeout = Session::DEFAULT_WRITE_TIMEOUT)
+    {
+        TimeoutGuard timeout_guard(timeout);
+        // First we send the buffer's size. Then the buffer itself.
+        // TODO: Use hrpc protocol
+        size_t size = buffer.size();
+        CHECK_SUCCESS(m_session->write(reinterpret_cast<const uint8_t*>(&size), sizeof(size), timeout_guard.get_remaining_timeout()));
+        CHECK_SUCCESS(m_session->write(buffer.data(), size, timeout_guard.get_remaining_timeout()));
+
+        return HAILO_SUCCESS;
+    }
+
+private:
+    std::shared_ptr<Session> m_session;
+};
+
+} /* namespace genai */
+} /* namespace hailort */
+
+#endif /* _HAILO_COMMON_GENAI_SESSION_WRAPPER_HPP_ */
--- a/hailort/common/internal_env_vars.hpp
+++ b/hailort/common/internal_env_vars.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
@@ -24,9 +24,6 @@ namespace hailort
 #define HAILO_SERVICE_SHARED_MEMORY_ENV_VAR ("HAILO_SERVICE_SHARED_MEMORY_OFF")
 #define HAILO_SERVICE_SHARED_MEMORY_OFF "1"

-/* Defines a costum pcie port for raw-connection */
-#define HAILO_CONNECTION_PCIE_PORT_ENV_VAR ("HAILO_CONNECTION_PCIE_PORT")
-
 /* Forces the client to use socket-based communication on a specific address. if not set, socket communicaiton wont be used. */
 #define HAILO_SOCKET_COM_ADDR_CLIENT_ENV_VAR ("HAILO_SOCKET_COM_ADDR_CLIENT")

@@ -36,11 +33,11 @@ namespace hailort
 /* Forces Hailo session based on socket to use a specific device. This env var should be set to the iface name (i.e eth0)  */
 #define HAILO_SOCKET_BIND_TO_INTERFACE_ENV_VAR ("HAILO_SOCKET_BIND_TO_INTERFACE")

-/* HAILO_SOCKET_COM_ADDR_CLIENT_ENV_VAR and HAILO_SOCKET_COM_ADDR_SERVER_ENV_VAR can be set to either ip:port ("X.X.X.X:P"),
+/* HAILO_SOCKET_COM_ADDR_CLIENT_ENV_VAR and HAILO_SOCKET_COM_ADDR_SERVER_ENV_VAR can be set to either <ip> ("X.X.X.X"),
    or to HAILO_SOCKET_COM_ADDR_UNIX_SOCKET which forces working with unix-socket*/
 #define HAILO_SOCKET_COM_ADDR_UNIX_SOCKET ("localhost")

-/* Overrides hRPC requests timeout. value in seconds */
+/* Overrides hRPC/gRPC requests timeout. value in seconds */
 #define HAILO_REQUEST_TIMEOUT_SECONDS ("HAILO_REQUEST_TIMEOUT_SECONDS")

 /* General */
@@ -104,12 +101,28 @@ namespace hailort
 /* Forces using descriptor-lists instead of CCB for inter-context-channels on h1x devices */
 #define HAILO_FORCE_INFER_CONTEXT_CHANNEL_OVER_DESC_ENV_VAR ("HAILO_FORCE_INFER_CONTEXT_CHANNEL_OVER_DESC")

+/* Determines the size of each mapped buffer into which the ccws section will be splitted to.
+    Relevant only when the aligned_ccws feature is enbabled */
+#define HAILO_ALIGNED_CCWS_MAPPED_BUFFER_SIZE_ENV_VAR ("HAILO_ALIGNED_CCWS_MAPPED_BUFFER_SIZE")
+#define HAILO_ALIGNED_CCWS_MAPPED_BUFFER_SIZE (2 * 1024 * 1024)
+
+/* Forces copying the hef file content to a mapped buffer before configuring it's network groups.
+    When working with Hef as a file, we need this copy in order to work with the aligned ccws feature */
+#define HAILO_COPY_HEF_CONTENT_TO_A_MAPPED_BUFFER_PRE_CONFIGURE_ENV_VAR ("HAILO_COPY_HEF_CONTENT_TO_A_MAPPED_BUFFER_PRE_CONFIGURE")
+
+/* Disables the aligned ccws feature - in case this env var is set, the aligned_ccws feature won't be used.
+    Instead - we will alocate aligned config buffers and will copy the CCWs to them */
+#define HAILO_DISABLE_ALIGNED_CCWS_ENV_VAR ("HAILO_DISABLE_ALIGNED_CCWS")
+
 /* Forces using descriptor-lists instead of CCB for ddr-channels on h1x devices */
 #define HAILO_FORCE_DDR_CHANNEL_OVER_CCB_ENV_VAR ("HAILO_FORCE_DDR_CHANNEL_OVER_CCB")

 /* Sets the default power-mode of the ConfiguredNetworkGroups to `HAILO_POWER_MODE_ULTRA_PERFORMANCE` */
 #define FORCE_POWER_MODE_ULTRA_PERFORMANCE_ENV_VAR ("FORCE_POWER_MODE_ULTRA_PERFORMANCE")

+/* Set HW infer Tool to use CCB for Boundary Channels*/
+#define HAILO_HW_INFER_BOUNDARY_CHANNELS_OVER_CCB_ENV_VAR ("HAILO_HW_INFER_BOUNDARY_CHANNELS_OVER_CCB")
+
 } /* namespace hailort */

 #endif /* HAILO_INTERNAL_ENV_VARS_HPP_ */
--- a/hailort/common/latency_meter.hpp
+++ b/hailort/common/latency_meter.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/logger_macros.hpp
+++ b/hailort/common/logger_macros.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/mmap_buffer.hpp
+++ b/hailort/common/mmap_buffer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/os/posix/ethernet_utils.cpp
+++ b/hailort/common/os/posix/ethernet_utils.cpp
@@ -1,3 +1,7 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
 #include <stdio.h>
 #include <arpa/inet.h>
 #include <netinet/in.h>
--- a/hailort/common/os/posix/file_descriptor.cpp
+++ b/hailort/common/os/posix/file_descriptor.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/os/posix/filesystem.cpp
+++ b/hailort/common/os/posix/filesystem.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/os/posix/linux/event_os_specific.cpp
+++ b/hailort/common/os/posix/linux/event_os_specific.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file event_os_specific.cpp
 * @brief Event/semaphore OS specific implementation for linux using eventfd
--- a/hailort/common/os/posix/mmap_buffer.cpp
+++ b/hailort/common/os/posix/mmap_buffer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/os/posix/os_utils.cpp
+++ b/hailort/common/os/posix/os_utils.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file os_utils.cpp
 * @brief Utilities for Posix methods
--- a/hailort/common/os/posix/process.cpp
+++ b/hailort/common/os/posix/process.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/os/posix/qnx/event_os_specific.cpp
+++ b/hailort/common/os/posix/qnx/event_os_specific.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file event_os_specific.cpp
 * @brief Event/semaphore OS specific implementation for qnx using pevents
--- a/hailort/common/os/posix/shared_memory_buffer.cpp
+++ b/hailort/common/os/posix/shared_memory_buffer.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file shared_memory_buffer.cpp
 * @brief Posix Shared memory implementation
@@ -33,7 +33,7 @@ Expected<SharedMemoryBufferPtr> SharedMemoryBuffer::create(size_t size, const st
    CHECK_AS_EXPECTED(res != -1, HAILO_INTERNAL_FAILURE, "Failed to set size of shared memory object, errno = {}", errno);

    TRY(auto mmapped_buffer, MmapBuffer<void>::create_file_map(size, shm_fd, 0));
-    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(shm_fd), std::move(mmapped_buffer), true);
+    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(mmapped_buffer), true);
    CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);

    return result;
@@ -46,7 +46,7 @@ Expected<SharedMemoryBufferPtr> SharedMemoryBuffer::open(size_t size, const std:
    auto shm_fd = FileDescriptor(shm_segment_fd);

    TRY(auto mmapped_buffer, MmapBuffer<void>::create_file_map(size, shm_fd, 0));
-    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(shm_fd), std::move(mmapped_buffer), false);
+    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(mmapped_buffer), false);
    CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);

    return result;
--- a/hailort/common/os/posix/socket.cpp
+++ b/hailort/common/os/posix/socket.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
@@ -126,7 +126,15 @@ Expected<Socket> Socket::accept()
 hailo_status Socket::connect(const sockaddr *addr, socklen_t len)
 {
    int ret = ::connect(m_socket_fd, addr, len);
-    CHECK(0 == ret, HAILO_ETH_FAILURE, "Failed to connect to socket {}", errno);
+    if (0 != ret) {
+        switch (errno) {
+        case ECONNREFUSED:
+            return HAILO_CONNECTION_REFUSED;
+        default:
+            LOGGER__ERROR("Failed to connect to socket {}", errno);
+            return HAILO_ETH_FAILURE;
+        }
+    }
    return HAILO_SUCCESS;
 }

--- a/hailort/common/os/posix/traffic_control.cpp
+++ b/hailort/common/os/posix/traffic_control.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/os/posix/traffic_control.hpp
+++ b/hailort/common/os/posix/traffic_control.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/os/windows/ethernet_utils.cpp
+++ b/hailort/common/os/windows/ethernet_utils.cpp
@@ -1,3 +1,7 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/

 #include "common/ethernet_utils.hpp"

--- a/hailort/common/os/windows/event_os_specific.cpp
+++ b/hailort/common/os/windows/event_os_specific.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file event_os_specific.cpp
 * @brief Event/semaphore OS specific implementation for windows using event/semaphore HANDLE
--- a/hailort/common/os/windows/file_descriptor.cpp
+++ b/hailort/common/os/windows/file_descriptor.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/os/windows/filesystem.cpp
+++ b/hailort/common/os/windows/filesystem.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/os/windows/mmap_buffer.cpp
+++ b/hailort/common/os/windows/mmap_buffer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/os/windows/named_mutex_guard.cpp
+++ b/hailort/common/os/windows/named_mutex_guard.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file named_mutex_guard.hpp
 * @brief Named mutex guard implementation
--- a/hailort/common/os/windows/named_mutex_guard.hpp
+++ b/hailort/common/os/windows/named_mutex_guard.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file named_mutex_guard.hpp
 * @brief Named mutex guard
--- a/hailort/common/os/windows/os_utils.cpp
+++ b/hailort/common/os/windows/os_utils.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file os_utils.cpp
 * @brief Utilities for Windows methods
--- a/hailort/common/os/windows/process.cpp
+++ b/hailort/common/os/windows/process.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/os/windows/shared_memory_buffer.cpp
+++ b/hailort/common/os/windows/shared_memory_buffer.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file shared_memory_buffer.cpp
 * @brief Shared memory implementaion in Windows. 
@@ -22,11 +22,11 @@ Expected<SharedMemoryBufferPtr> SharedMemoryBuffer::create(size_t size, const st
    HANDLE handle_map_file = CreateFileMapping(INVALID_HANDLE_VALUE, nullptr, PAGE_READWRITE, 0,
            static_cast<DWORD>(size), static_cast<LPCSTR>(shm_name.c_str()));
    CHECK_AS_EXPECTED((handle_map_file != nullptr), HAILO_INTERNAL_FAILURE, "Failed to create shared memory object, error = {}", GetLastError());
-    
+
    auto shm_fd = FileDescriptor(handle_map_file);
    TRY(auto mmapped_buffer, MmapBuffer<void>::create_file_map(size, shm_fd, 0));

-    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(shm_fd), std::move(mmapped_buffer), true);
+    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(mmapped_buffer), true);
    CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);

    return result;
@@ -40,7 +40,7 @@ Expected<SharedMemoryBufferPtr> SharedMemoryBuffer::open(size_t size, const std:
    auto shm_fd = FileDescriptor(handle_map_file);
    TRY(auto mmapped_buffer, MmapBuffer<void>::create_file_map(size, shm_fd, 0));

-    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(shm_fd), std::move(mmapped_buffer), false);
+    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(mmapped_buffer), false);
    CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);

    return result;
--- a/hailort/common/os/windows/socket.cpp
+++ b/hailort/common/os/windows/socket.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/os/windows/string_conversion.cpp
+++ b/hailort/common/os/windows/string_conversion.cpp
@@ -1,3 +1,7 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
 #include <vector>

 #include "common/os/windows/string_conversion.hpp"
--- a/hailort/common/os/windows/string_conversion.hpp
+++ b/hailort/common/os/windows/string_conversion.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/os/windows/virtual_alloc_guard.cpp
+++ b/hailort/common/os/windows/virtual_alloc_guard.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file virtual_alloc_guard.cpp
 * @brief Guard object for VirtualAlloc and VirtualFree
--- a/hailort/common/os/windows/virtual_alloc_guard.hpp
+++ b/hailort/common/os/windows/virtual_alloc_guard.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file virtual_alloc_guard.hpp
 * @brief Guard object for VirtualAlloc and VirtualFree (only for windows os).
--- a/hailort/common/os_utils.hpp
+++ b/hailort/common/os_utils.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file os_utils.hpp
 * @brief Utilities for OS methods
--- a/hailort/common/process.hpp
+++ b/hailort/common/process.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/runtime_statistics_internal.hpp
+++ b/hailort/common/runtime_statistics_internal.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/shared_memory_buffer.hpp
+++ b/hailort/common/shared_memory_buffer.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file shared_memory_buffer.hpp
 * @brief Shared memory buffer
@@ -43,16 +43,14 @@ public:
    SharedMemoryBuffer &operator=(const SharedMemoryBuffer &) = delete;
    virtual ~SharedMemoryBuffer();

-    SharedMemoryBuffer(const std::string &shm_name, FileDescriptor &&shm_fd, MmapBuffer<void> &&shm_mmap_buffer, bool memory_owner) :
+    SharedMemoryBuffer(const std::string &shm_name, MmapBuffer<void> &&shm_mmap_buffer, bool memory_owner) :
        m_shm_name(shm_name),
-        m_shm_fd(std::move(shm_fd)),
        m_shm_mmap_buffer(std::move(shm_mmap_buffer)),
        m_memory_owner(memory_owner)
    {}

    SharedMemoryBuffer(SharedMemoryBuffer&& other) noexcept :
        m_shm_name(std::exchange(other.m_shm_name, "")),
-        m_shm_fd(std::move(other.m_shm_fd)),
        m_shm_mmap_buffer(std::move(other.m_shm_mmap_buffer)),
        m_memory_owner(std::exchange(other.m_memory_owner, false))
    {}
@@ -71,7 +69,6 @@ public:

 private:
    std::string m_shm_name;
-    FileDescriptor m_shm_fd;
    MmapBuffer<void> m_shm_mmap_buffer;
    bool m_memory_owner;
 };
--- a/hailort/common/socket.hpp
+++ b/hailort/common/socket.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/thread_pool.hpp
+++ b/hailort/common/thread_pool.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/thread_safe_queue.hpp
+++ b/hailort/common/thread_safe_queue.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/utils.cpp
+++ b/hailort/common/utils.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/hailort/common/utils.hpp
+++ b/hailort/common/utils.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
@@ -298,6 +298,7 @@ inline hailo_status get_status(const Expected<T> &exp)

 // Macros that check status. If status is 'valid_error', return without printing error to the prompt.
 #define CHECK_EXPECTED_WITH_ACCEPTABLE_STATUS(valid_error, exp, ...) if (valid_error == (exp).status()) {return make_unexpected(valid_error);} CHECK_SUCCESS(exp, __VA_ARGS__);
+#define CHECK_SUCCESS_WITH_ACCEPTABLE_STATUS(valid_error, status, ...) if ((valid_error) == (status)) {return make_unexpected(valid_error);} CHECK_SUCCESS(status, __VA_ARGS__);


 #define __HAILO_CONCAT(x, y) x ## y
@@ -397,6 +398,37 @@ static inline bool is_env_variable_on(const char *env_var_name, const std::strin
    return ((nullptr != env_var) && (strncmp(env_var, required_value.c_str(), required_value.size()) == 0));
 }

+static inline Expected<size_t> get_env_variable_as_size(const char *env_var_name) {
+    const char *env_val = std::getenv(env_var_name);
+    if (!env_val) {
+        return make_unexpected(HAILO_NOT_FOUND);
+    }
+
+    static const int DECIMAL_BASE = 10;
+    errno = 0;
+    char *end = nullptr;
+    size_t result = std::strtoull(env_val, &end, DECIMAL_BASE);
+
+    /*
+    * Check if the conversion succeeded completely:
+    * If an error occurs during conversion (for example, due to overflow), std::strtoull will set errno to a non-zero value.
+    * For a successful conversion, std::strtoull should consume the entire string, meaning that the character pointed
+    * to by 'end' must be the null terminator ('\0').
+    * Thus, a successful conversion requires both errno == 0 and *end == '\0'.
+    */
+    if (errno != 0 || (*end != '\0')) {
+        LOGGER__ERROR("Failed to parse environment variable HAILO_ALIGNED_CCWS_MAPPED_BUFFER_SIZE");
+        return make_unexpected(HAILO_INVALID_ARGUMENT);
+    }
+
+    return Expected<size_t>(result);
+}
+
+// When moving to C++17, use std::clamp
+constexpr size_t clamp(size_t v, size_t lo, size_t hi) {
+    return (v < lo) ? lo : (v > hi) ? hi : v;
+}
+
 static inline Expected<std::string> get_env_variable(const std::string &env_var_name)
 {
    const auto env_var = std::getenv(env_var_name.c_str());
@@ -413,6 +445,23 @@ static inline Expected<std::string> get_env_variable(const std::string &env_var_
    return Expected<std::string>(result);
 }

+template <typename T>
+Expected<hailo_format_type_t> get_hailo_format_type()
+{
+    static const std::unordered_map<size_t, hailo_format_type_t> type_map = {
+        {typeid(uint8_t).hash_code(), HAILO_FORMAT_TYPE_UINT8},
+        {typeid(uint16_t).hash_code(), HAILO_FORMAT_TYPE_UINT16},
+        {typeid(float32_t).hash_code(), HAILO_FORMAT_TYPE_FLOAT32}
+    };
+
+    auto it = type_map.find(typeid(T).hash_code());
+    if (it != type_map.end()) {
+        auto result = it->second;
+        return result;
+    }
+    return make_unexpected(HAILO_NOT_FOUND);
+}
+
 class CRC32 {
 public:
    CRC32() {
@@ -601,6 +650,25 @@ private:
    }
 };

+class TimeoutGuard final
+{
+public:
+    explicit TimeoutGuard(std::chrono::milliseconds total_timeout)
+        : m_start_time(std::chrono::steady_clock::now()), m_total_timeout(total_timeout) {}
+
+    std::chrono::milliseconds get_remaining_timeout() const {
+        auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - m_start_time);
+        if (elapsed >= m_total_timeout) {
+            return std::chrono::milliseconds(0); // Timeout exceeded
+        }
+        return m_total_timeout - elapsed;
+    }
+
+private:
+    std::chrono::steady_clock::time_point m_start_time;
+    std::chrono::milliseconds m_total_timeout;
+};
+
 } /* namespace hailort */

 #endif /* HAILO_UTILS_H_ */
--- a/hailort/drivers/common/hailo_ioctl_common.h
+++ b/hailort/drivers/common/hailo_ioctl_common.h
@@ -1,14 +1,14 @@
 // SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) AND MIT
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 **/

 #ifndef _HAILO_IOCTL_COMMON_H_
 #define _HAILO_IOCTL_COMMON_H_

 #define HAILO_DRV_VER_MAJOR 4
-#define HAILO_DRV_VER_MINOR 20
-#define HAILO_DRV_VER_REVISION 1
+#define HAILO_DRV_VER_MINOR 21
+#define HAILO_DRV_VER_REVISION 0

 #define _STRINGIFY_EXPANDED( x ) #x
 #define _STRINGIFY_NUMBER( x ) _STRINGIFY_EXPANDED(x)
@@ -33,21 +33,20 @@
 #define INVALID_DRIVER_HANDLE_VALUE     ((uintptr_t)-1)

 // Used by windows and unix driver to raise the right CPU control handle to the FW. The same as in pcie_service FW
-#define FW_ACCESS_CORE_CPU_CONTROL_SHIFT    (1)
-#define FW_ACCESS_CORE_CPU_CONTROL_MASK     (1 << FW_ACCESS_CORE_CPU_CONTROL_SHIFT)
-#define FW_ACCESS_CONTROL_INTERRUPT_SHIFT   (0)
-#define FW_ACCESS_APP_CPU_CONTROL_MASK      (1 << FW_ACCESS_CONTROL_INTERRUPT_SHIFT)
-#define FW_ACCESS_DRIVER_SHUTDOWN_SHIFT     (2)
-#define FW_ACCESS_DRIVER_SHUTDOWN_MASK      (1 << FW_ACCESS_DRIVER_SHUTDOWN_SHIFT)
-// HRT-15790 TODO: separate nnc interrupts and soc interrupts
-#define FW_ACCESS_SOFT_RESET_SHIFT          (3)
-#define FW_ACCESS_SOFT_RESET_MASK           (1 << FW_ACCESS_SOFT_RESET_SHIFT)
+enum hailo_pcie_nnc_interrupt_masks {
+    FW_ACCESS_APP_CPU_CONTROL_MASK    =  (1 << 0),
+    FW_ACCESS_CORE_CPU_CONTROL_MASK   =  (1 << 1),
+    FW_ACCESS_DRIVER_SHUTDOWN_MASK    =  (1 << 2),
+    FW_ACCESS_SOFT_RESET_MASK         =  (1 << 3),
+};

-#define FW_ACCESS_SOC_CONTROL_SHIFT         (3)
-#define FW_ACCESS_SOC_CONTROL_MASK          (1 << FW_ACCESS_SOC_CONTROL_SHIFT)
+enum hailo_pcie_soc_interrupt_masks {
+    FW_ACCESS_SOC_CONTROL_MASK       =   (1 << 3),
+};

 #define INVALID_VDMA_CHANNEL                (0xff)

+#define HAILO_DMA_DIRECTION_EQUALS(a, b) (a == HAILO_DMA_BIDIRECTIONAL || b == HAILO_DMA_BIDIRECTIONAL || a == b)

 #if !defined(__cplusplus) && defined(NTDDI_VERSION)
 #include <wdm.h>
@@ -257,16 +256,40 @@ struct hailo_write_action_list_params {
 };

 /* structure used in ioctl HAILO_DESC_LIST_BIND_VDMA_BUFFER */
+/**
+ * Programs the descriptions list (desc_handle), starting from starting_desc, with the given buffer.
+ * The buffer is referenced by buffer_handle (the base buffer), size, offset and batch_size.
+ * The ioctl will start at offset, and will program `size` bytes in chunks of `batch_size` bytes.
+ *
+ * For example, if buffer_offset is 0x1000, buffer_size=0x300, batch_size=2, and desc_page_size is 0x200 (desc
+ * page size is taken from the descriptors list), we program the following pattern:
+ *   desc[starting_desc] =   { .address = base_buffer+0x1000, .size= 0x200 }
+ *   desc[starting_desc+1] = { .address = base_buffer+0x1200, .size= 0x100 }
+ *   desc[starting_desc+2] = { .address = base_buffer+0x1400, .size= 0x200 }
+ *   desc[starting_desc+3] = { .address = base_buffer+0x1600, .size= 0x100 }
+ *
+ * The stride is the amount of bytes to really program.
+ * If the stride is 0, the stride is calculated as the desc_page_size.
+ * Else, the stride is the given stride.
+ * The stride must be <= desc_page_size.
+ *
+ * For example, if stride=108, buffer_size=0x600 and desc_page_size is 0x200 the pattern will be:
+ *   desc[starting_desc] =   { .address = base_buffer, .size= 0x108 }
+ *   desc[starting_desc+1] = { .address = base_buffer+0x200, .size= 0x108 }
+ *   desc[starting_desc+2] = { .address = base_buffer+0x400, .size= 0x108 }
+ */
 struct hailo_desc_list_program_params {
    size_t buffer_handle;       // in
    size_t buffer_size;         // in
    size_t buffer_offset;       // in
+    uint32_t batch_size;        // in
    uintptr_t desc_handle;      // in
    uint8_t channel_index;      // in
    uint32_t starting_desc;     // in
    bool should_bind;           // in
    enum hailo_vdma_interrupts_domain last_interrupts_domain;  // in
    bool is_debug;              // in
+    uint32_t stride;            // in
 };

 /* structure used in ioctl HAILO_VDMA_ENABLE_CHANNELS */
@@ -284,11 +307,12 @@ struct hailo_vdma_disable_channels_params {
 struct hailo_vdma_interrupts_channel_data {
    uint8_t engine_index;
    uint8_t channel_index;
-    bool is_active;                 // If not activate, num_processed is ignored.
-    uint8_t transfers_completed;    // Number of transfers completed.
-    uint8_t host_error;             // Channel errors bits on source side
-    uint8_t device_error;           // Channel errors bits on dest side
-    bool validation_success;        // If the validation of the channel was successful
+
+#define HAILO_VDMA_TRANSFER_DATA_CHANNEL_NOT_ACTIVE  (0xff)
+#define HAILO_VDMA_TRANSFER_DATA_CHANNEL_WITH_ERROR  (0xfe)
+
+    // Either amount of transfers done or one of the above defines
+    uint8_t data;
 };

 struct hailo_vdma_interrupts_wait_params {
@@ -406,6 +430,7 @@ enum hailo_board_type {
    HAILO_BOARD_TYPE_HAILO15L,
    HAILO_BOARD_TYPE_HAILO10H,
    HAILO_BOARD_TYPE_HAILO10H_LEGACY,
+    HAILO_BOARD_TYPE_MARS,
    HAILO_BOARD_TYPE_COUNT,

    /** Max enum value to maintain ABI Integrity */
@@ -486,14 +511,15 @@ struct hailo_free_continuous_buffer_params {

 /* structures used in ioctl HAILO_VDMA_LAUNCH_TRANSFER */
 struct hailo_vdma_transfer_buffer {
-    size_t mapped_buffer_handle;       // in
-    uint32_t offset;                   // in
-    uint32_t size;                     // in
+    enum hailo_dma_buffer_type buffer_type; // in
+    uintptr_t addr_or_fd;                   // in
+    uint32_t size;                          // in
 };

-// We allow maximum 2 buffers per transfer since we may have an extra buffer 
-// to make sure each buffer is aligned to page size.
-#define HAILO_MAX_BUFFERS_PER_SINGLE_TRANSFER (2)
+// The size is a tradeoff between ioctl/stack buffers size and the amount of buffers we
+// want to transfer. (If user mode wants to transfer more buffers, it should call the
+// ioctl multiple times).
+#define HAILO_MAX_BUFFERS_PER_SINGLE_TRANSFER (8)

 struct hailo_vdma_launch_transfer_params {
    uint8_t engine_index;                                               // in
@@ -512,9 +538,6 @@ struct hailo_vdma_launch_transfer_params {

    bool is_debug;                                                      // in, if set program hw to send
                                                                        // more info (e.g desc complete status)
-
-    uint32_t descs_programed;                                           // out, amount of descriptors programed.
-    int launch_transfer_status;                                         // out, status of the launch transfer call. (only used in case of error)
 };

 /* structure used in ioctl HAILO_SOC_CONNECT */
@@ -638,7 +661,7 @@ enum hailo_vdma_ioctl_code {
 #define HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC    _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC_CODE,      struct hailo_allocate_continuous_buffer_params)
 #define HAILO_VDMA_CONTINUOUS_BUFFER_FREE     _IOR_(HAILO_VDMA_IOCTL_MAGIC,  HAILO_VDMA_CONTINUOUS_BUFFER_FREE_CODE,       struct hailo_free_continuous_buffer_params)

-#define HAILO_VDMA_LAUNCH_TRANSFER           _IOWR_(HAILO_VDMA_IOCTL_MAGIC,  HAILO_VDMA_LAUNCH_TRANSFER_CODE,              struct hailo_vdma_launch_transfer_params)
+#define HAILO_VDMA_LAUNCH_TRANSFER            _IOR_(HAILO_VDMA_IOCTL_MAGIC,  HAILO_VDMA_LAUNCH_TRANSFER_CODE,              struct hailo_vdma_launch_transfer_params)

 enum hailo_nnc_ioctl_code {
    HAILO_FW_CONTROL_CODE,
@@ -662,14 +685,14 @@ enum hailo_nnc_ioctl_code {
 enum hailo_soc_ioctl_code {
    HAILO_SOC_IOCTL_CONNECT_CODE,
    HAILO_SOC_IOCTL_CLOSE_CODE,
-
+    HAILO_SOC_IOCTL_POWER_OFF_CODE,
    // Must be last
    HAILO_SOC_IOCTL_MAX_NR,
 };

 #define HAILO_SOC_CONNECT       _IOWR_(HAILO_SOC_IOCTL_MAGIC, HAILO_SOC_IOCTL_CONNECT_CODE, struct hailo_soc_connect_params)
 #define HAILO_SOC_CLOSE         _IOR_(HAILO_SOC_IOCTL_MAGIC,  HAILO_SOC_IOCTL_CLOSE_CODE,   struct hailo_soc_close_params)
-
+#define HAILO_SOC_POWER_OFF     _IO_(HAILO_SOC_IOCTL_MAGIC,   HAILO_SOC_IOCTL_POWER_OFF_CODE)

 enum hailo_pci_ep_ioctl_code {
    HAILO_PCI_EP_ACCEPT_CODE,
--- a/hailort/hailort_server/CMakeLists.txt
+++ b/hailort/hailort_server/CMakeLists.txt
@@ -1,38 +0,0 @@
-cmake_minimum_required(VERSION 3.5.0)
-
-set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(Threads REQUIRED)
-
-set(HAILORT_SERVER_SOURCES
-    hailort_server.cpp
-    ${HRPC_CPP_SOURCES}
-    ${HRPC_PROTOCOL_CPP_SOURCES}
-    ${HAILORT_SERVICE_DIR}/cng_buffer_pool.cpp
-    ${DRIVER_OS_DIR}/driver_os_specific.cpp
-    ${HAILORT_SRC_DIR}/vdma/pcie_session.cpp
-    ${HAILORT_SRC_DIR}/vdma/memory/descriptor_list.cpp
-    ${HAILORT_SRC_DIR}/vdma/memory/mapped_buffer.cpp
-    ${HAILORT_SRC_DIR}/vdma/memory/dma_able_buffer.cpp
-    ${HAILORT_SRC_DIR}/vdma/driver/hailort_driver.cpp
-    ${HAILORT_SRC_DIR}/vdma/channel/interrupts_dispatcher.cpp
-    ${HAILORT_SRC_DIR}/vdma/channel/transfer_launcher.cpp
-    ${HAILORT_SRC_DIR}/vdma/channel/boundary_channel.cpp
-    ${HAILORT_SRC_DIR}/vdma/channel/channels_group.cpp
-    ${HAILORT_SRC_DIR}/vdma/channel/transfer_common.cpp
-)
-
-add_executable(hailort_server ${HAILORT_SERVER_SOURCES})
-target_include_directories(hailort_server PRIVATE
-    ${HAILORT_SRC_DIR}
-    ${COMMON_INC_DIR}
-    ${DRIVER_INC_DIR}
-)
-target_compile_options(hailort_server PRIVATE ${HAILORT_COMPILE_OPTIONS})
-set_property(TARGET hailort_server PROPERTY CXX_STANDARD 14)
-set_property(TARGET hailort_server PROPERTY INSTALL_RPATH "$ORIGIN" "../lib/") # Link with a relative libhailort
-target_link_libraries(hailort_server PRIVATE
-    libhailort
-    Threads::Threads
-    rpc_proto
-    hailort_common
-)
--- a/hailort/hailort_server/hailort_server.cpp
+++ b/hailort/hailort_server/hailort_server.cpp
@@ -1,808 +0,0 @@
-/**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file hailo_server.cpp
- * @brief Hailo Server
- **/
-
-#include "hailort_server.hpp"
-#include "hailo/hailort.h"
-#include "hrpc/server.hpp"
-#include "hailo/vdevice.hpp"
-#include "hrpc_protocol/serializer.hpp"
-#include "net_flow/ops/nms_post_process.hpp"
-#include "hailort_service/service_resource_manager.hpp"
-#include "common/thread_safe_queue.hpp"
-#include "hrpc/connection_context.hpp"
-#include "vdma/pcie_session.hpp"
-
-#include <spdlog/spdlog.h>
-#include <spdlog/sinks/stdout_color_sinks.h>
-
-using namespace hailort;
-
-// TODO: These macros should be merged with the grpc macros, also change them to TRY
-#define CHECK_EXPECTED_AS_HRPC_STATUS(_exepcted, T) \
-    do { \
-        if (!_exepcted) { \
-            LOGGER__ERROR("CHECK_EXPECTED_AS_HRPC_STATUS failed, status: {}", _exepcted.status()); \
-            auto reply = T::serialize_reply(_exepcted.status()); \
-            if (reply) return reply; \
-            LOGGER__CRITICAL("Failed to create reply with status: {}", reply.status()); \
-            return make_unexpected(HAILO_INTERNAL_FAILURE); \
-        } \
-    } while (0)
-#define CHECK_SUCCESS_AS_HRPC_STATUS(_status, T) \
-    do { \
-        if (_status != HAILO_SUCCESS) { \
-            LOGGER__ERROR("CHECK_SUCCESS_AS_HRPC_STATUS failed, status: {}", _status); \
-            auto reply = T::serialize_reply(_status); \
-            if (reply) return reply; \
-            LOGGER__CRITICAL("Failed to create reply with status: {}", reply.status()); \
-            return make_unexpected(HAILO_INTERNAL_FAILURE); \
-        } \
-    } while (0)
-#define CHECK_AS_HRPC_STATUS(_cond, _status, T) \
-    do { \
-        if (!(_cond)) { \
-            LOGGER__ERROR("CHECK_AS_HRPC_STATUS failed, status: {}", _status); \
-            auto reply = T::serialize_reply(_status); \
-            if (reply) return reply; \
-            LOGGER__CRITICAL("Failed to create reply with status: {}", reply.status()); \
-            return make_unexpected(HAILO_INTERNAL_FAILURE); \
-        } \
-    } while (0)
-
-#define __HAILO_CONCAT(x, y) x ## y
-#define _HAILO_CONCAT(x, y) __HAILO_CONCAT(x, y)
-
-#define _TRY_AS_HRPC_STATUS(expected_var_name, var_decl, expr, ...) \
-    auto expected_var_name = (expr); \
-    CHECK_EXPECTED_AS_HRPC_STATUS(expected_var_name, __VA_ARGS__); \
-    var_decl = expected_var_name.release()
-
-#define TRY_AS_HRPC_STATUS(var_decl, expr, ...) _TRY_AS_HRPC_STATUS(_HAILO_CONCAT(__expected, __COUNTER__), var_decl, expr, __VA_ARGS__)
-
-#ifdef NDEBUG
-#define LOGGER_PATTERN ("[%n] [%^%l%$] %v")
-#else
-#define LOGGER_PATTERN ("[%Y-%m-%d %X.%e] [%P] [%t] [%n] [%^%l%$] [%s:%#] [%!] %v")
-#endif
-
-// TODO: Benchmark this factor (HRT-15727)
-#define ASYNC_QUEUE_SIZE_FACTOR (2) // double buffer
-
-struct InferModelInfo
-{
-    std::unordered_map<std::string, size_t> input_streams_sizes;
-    std::unordered_map<std::string, size_t> output_streams_sizes;
-    std::vector<std::string> inputs_names;
-    std::vector<std::string> outputs_names;
-};
-
-void init_logger(const std::string &name)
-{
-    auto console_sink = make_shared_nothrow<spdlog::sinks::stderr_color_sink_mt>();
-    console_sink->set_level(spdlog::level::info);
-    console_sink->set_pattern(LOGGER_PATTERN);
-    spdlog::set_default_logger(make_shared_nothrow<spdlog::logger>(name, console_sink));
-}
-
-void HailoRTServer::cleanup_infer_model_hef_buffers(const std::vector<uint32_t> &infer_model_handles)
-{
-    for (const auto &infer_model_handle : infer_model_handles) {
-        auto hef_buffers_iter = m_hef_buffers_per_infer_model.find(infer_model_handle);
-        if (m_hef_buffers_per_infer_model.end() != hef_buffers_iter) {
-            m_hef_buffers_per_infer_model.erase(infer_model_handle);
-        }
-    }
-}
-
-void HailoRTServer::cleanup_cim_buffer_pools(const std::vector<uint32_t> &cim_handles)
-{
-    std::lock_guard<std::mutex> lock(m_buffer_pool_mutex);
-    for (const auto &cim_handle : cim_handles) {
-        m_buffer_pool_per_cim.erase(cim_handle);
-    }
-}
-
-hailo_status HailoRTServer::cleanup_client_resources(RpcConnection client_connection)
-{
-    std::set<uint32_t> pids = {SINGLE_CLIENT_PID};
-    auto cim_handles = ServiceResourceManager<ConfiguredInferModel>::get_instance().resources_handles_by_pids(pids);
-    (void)ServiceResourceManager<ConfiguredInferModel>::get_instance().release_by_pid(SINGLE_CLIENT_PID);
-    cleanup_cim_buffer_pools(cim_handles);
-
-    auto infer_model_handles = ServiceResourceManager<InferModel>::get_instance().resources_handles_by_pids(pids);
-    (void)ServiceResourceManager<InferModelInfo>::get_instance().release_by_pid(SINGLE_CLIENT_PID);
-    (void)ServiceResourceManager<InferModel>::get_instance().release_by_pid(SINGLE_CLIENT_PID);
-    cleanup_infer_model_hef_buffers(infer_model_handles);
-    m_infer_model_to_info_id.clear();
-
-    (void)ServiceResourceManager<VDevice>::get_instance().release_by_pid(SINGLE_CLIENT_PID);
-    CHECK_SUCCESS(client_connection.close());
-    return HAILO_SUCCESS;
-}
-
-Expected<std::unique_ptr<HailoRTServer>> HailoRTServer::create_unique()
-{
-    TRY(auto connection_context, ConnectionContext::create_server_shared());
-    TRY(auto callbacks_queue_shutdown_event, Event::create_shared(Event::State::not_signalled));
-    auto callbacks_done_queue = SpscQueue<FinishedInferRequest>::create_shared(PcieSession::MAX_ONGOING_TRANSFERS, callbacks_queue_shutdown_event);
-    CHECK_NOT_NULL_AS_EXPECTED(callbacks_done_queue, HAILO_OUT_OF_HOST_MEMORY);
-
-    auto res = make_unique_nothrow<HailoRTServer>(connection_context, callbacks_done_queue, callbacks_queue_shutdown_event);
-    CHECK_NOT_NULL(res, HAILO_OUT_OF_HOST_MEMORY);
-    return res;
-}
-
-HailoRTServer::HailoRTServer(std::shared_ptr<ConnectionContext> connection_context,
-    std::shared_ptr<SpscQueue<FinishedInferRequest>> callbacks_done_queue,
-    EventPtr callbacks_queue_shutdown_event) : Server(connection_context), m_callbacks_done_queue(callbacks_done_queue),
-    m_callbacks_queue_shutdown_event(callbacks_queue_shutdown_event)
-{
-    m_callbacks_thread = std::thread([this] {
-        auto status = callbacks_thread_loop();
-        if (HAILO_SUCCESS != status) {
-            LOGGER__CRITICAL("Callback thread has failed with status {}. Server should restart!", status);
-        }
-    });
-}
-
-hailo_status HailoRTServer::callbacks_thread_loop()
-{
-    while (true) {
-        auto request = m_callbacks_done_queue->dequeue(std::chrono::milliseconds(HAILO_INFINITE));
-        if (HAILO_SHUTDOWN_EVENT_SIGNALED == request.status()) {
-            break;
-        }
-        CHECK_EXPECTED_AS_STATUS(request);
-
-        auto status = trigger_callback(request->callback_id, request->completion_info.status, request->configured_infer_model_handle,
-            request->connection, [this, &request] (RpcConnection connection) -> hailo_status {
-            if (HAILO_SUCCESS == request->completion_info.status) {
-                for (auto output : request->outputs) {
-                    auto status = connection.wait_for_write_buffer_async_ready(output->size(), SERVER_TIMEOUT);
-                    CHECK_SUCCESS(status);
-
-                    status = connection.write_buffer_async(MemoryView(*output), [output] (hailo_status status) {
-                        (void)output; // capturing output so it won't be freed before the callback is called
-                        if (HAILO_SUCCESS != status) {
-                            LOGGER__ERROR("Failed to write buffer, status = {}", status);
-                        }
-                    });
-                    CHECK_SUCCESS(status);
-                }
-
-                std::lock_guard<std::mutex> lock(m_buffer_pool_mutex);
-                for (uint32_t i = 0; i < request->outputs.size(); i++) {
-                    if (m_buffer_pool_per_cim.contains(request->configured_infer_model_handle)) {
-                        auto status = m_buffer_pool_per_cim.at(request->configured_infer_model_handle)->return_to_pool(request->outputs_names[i], request->outputs[i]);
-                        CHECK_SUCCESS(status);
-                    }
-                }
-            }
-            return HAILO_SUCCESS;
-        });
-        // HAILO_COMMUNICATION_CLOSED means the client disconnected. Server doesn't need to restart in this case.
-        if (status != HAILO_COMMUNICATION_CLOSED) {
-            CHECK_SUCCESS(status);
-        }
-    }
-    return HAILO_SUCCESS;
-}
-
-HailoRTServer::~HailoRTServer()
-{
-    auto status = m_callbacks_queue_shutdown_event->signal();
-    if (HAILO_SUCCESS != status) {
-        LOGGER__CRITICAL("Failed to signal shutdown event, status = {}", status);
-    }
-
-    if (m_callbacks_thread.joinable()) {
-        m_callbacks_thread.join();
-    }
-}
-
-int main()
-{
-    init_logger("HailoRT-Server");
-    TRY(auto server, HailoRTServer::create_unique());
-    Dispatcher dispatcher;
-
-    // TODO: add a server implementation class, with resources heiracrhy and more
-    auto &infer_model_to_info_id = server->infer_model_to_info_id();
-    auto &buffer_pool_per_cim = server->buffer_pool_per_cim();
-
-    // Because the infer model is created with a hef buffer, we need to keep the buffer until the configure stage.
-    // Here I keep it until the infer model is destroyed
-    auto &hef_buffers = server->hef_buffers();
-
-    dispatcher.register_action(HailoRpcActionID::VDEVICE__CREATE,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        TRY_AS_HRPC_STATUS(auto vdevice_params, CreateVDeviceSerializer::deserialize_request(request), CreateVDeviceSerializer);
-        TRY_AS_HRPC_STATUS(auto vdevice, VDevice::create(vdevice_params.get()), CreateVDeviceSerializer);
-
-        auto &manager = ServiceResourceManager<VDevice>::get_instance();
-        auto id = manager.register_resource(SINGLE_CLIENT_PID, std::move(vdevice));
-        auto reply = CreateVDeviceSerializer::serialize_reply(HAILO_SUCCESS, id);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::VDEVICE__DESTROY,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &manager = ServiceResourceManager<VDevice>::get_instance();
-        TRY_AS_HRPC_STATUS(auto vdevice_handle, DestroyVDeviceSerializer::deserialize_request(request), DestroyVDeviceSerializer);
-        (void)manager.release_resource(vdevice_handle, SINGLE_CLIENT_PID);
-        TRY_AS_HRPC_STATUS(auto reply, DestroyVDeviceSerializer::serialize_reply(HAILO_SUCCESS), DestroyVDeviceSerializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::VDEVICE__CREATE_INFER_MODEL,
-    [&hef_buffers] (const MemoryView &request, ServerContextPtr server_context) -> Expected<Buffer> {
-        TRY_AS_HRPC_STATUS(auto tuple, CreateInferModelSerializer::deserialize_request(request), CreateInferModelSerializer);
-        auto vdevice_handle = std::get<0>(tuple);
-        uint64_t hef_size = std::get<1>(tuple);
-        auto name = std::get<2>(tuple);
-
-        assert(hef_size <= SIZE_MAX);
-        TRY_AS_HRPC_STATUS(auto hef_buffer, Buffer::create(static_cast<size_t>(hef_size), BufferStorageParams::create_dma()),
-            CreateInferModelSerializer);
-
-        auto status = server_context->connection().read_buffer(MemoryView(hef_buffer));
-        CHECK_SUCCESS_AS_HRPC_STATUS(status, CreateInferModelSerializer);
-
-        auto &vdevice_manager = ServiceResourceManager<VDevice>::get_instance();
-        auto lambda = [view = MemoryView(hef_buffer), &name] (std::shared_ptr<VDevice> vdevice) {
-            return vdevice->create_infer_model(view, name);
-        };
-        auto infer_model = vdevice_manager.execute<Expected<std::shared_ptr<InferModel>>>(vdevice_handle, lambda);
-        CHECK_EXPECTED_AS_HRPC_STATUS(infer_model, CreateInferModelSerializer);
-
-        auto &infer_model_manager = ServiceResourceManager<InferModel>::get_instance();
-        auto infer_model_id = infer_model_manager.register_resource(SINGLE_CLIENT_PID, std::move(infer_model.release()));
-        hef_buffers.emplace(infer_model_id, std::move(hef_buffer));
-
-        TRY_AS_HRPC_STATUS(auto reply, CreateInferModelSerializer::serialize_reply(HAILO_SUCCESS, infer_model_id), CreateInferModelSerializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::INFER_MODEL__DESTROY,
-    [&hef_buffers] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &manager = ServiceResourceManager<InferModel>::get_instance();
-        TRY_AS_HRPC_STATUS(auto infer_model_handle, DestroyInferModelSerializer::deserialize_request(request), DestroyInferModelSerializer);
-        hef_buffers.erase(infer_model_handle);
-        (void)manager.release_resource(infer_model_handle, SINGLE_CLIENT_PID);
-        TRY_AS_HRPC_STATUS(auto reply, DestroyInferModelSerializer::serialize_reply(HAILO_SUCCESS), DestroyInferModelSerializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::INFER_MODEL__CREATE_CONFIGURED_INFER_MODEL,
-    [&buffer_pool_per_cim, &infer_model_to_info_id]
-    (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &infer_model_manager = ServiceResourceManager<InferModel>::get_instance();
-
-        TRY_AS_HRPC_STATUS(auto request_params, CreateConfiguredInferModelSerializer::deserialize_request(request), CreateConfiguredInferModelSerializer);
-        const auto &infer_model_handle = request_params.infer_model_handle;
-        const auto &vdevice_handle = request_params.vdevice_handle;
-
-        auto lambda = [&request_params] (std::shared_ptr<InferModel> infer_model) -> Expected<ConfiguredInferModel> {
-            const auto &input_streams_formats = request_params.input_streams_params;
-            const auto &output_streams_formats = request_params.output_streams_params;
-            for (const auto &input_stream_format : input_streams_formats) {
-                TRY(auto input, infer_model->input(input_stream_format.first));
-
-                input.set_format_order(static_cast<hailo_format_order_t>(input_stream_format.second.format_order));
-                input.set_format_type(static_cast<hailo_format_type_t>(input_stream_format.second.format_type));
-                if (INVALID_NMS_CONFIG != input_stream_format.second.nms_score_threshold) {
-                    input.set_nms_score_threshold(input_stream_format.second.nms_score_threshold);
-                }
-                if (INVALID_NMS_CONFIG != input_stream_format.second.nms_iou_threshold) {
-                    input.set_nms_iou_threshold(input_stream_format.second.nms_iou_threshold);
-                }
-                if (static_cast<uint32_t>(INVALID_NMS_CONFIG) != input_stream_format.second.nms_max_proposals_per_class) {
-                    input.set_nms_max_proposals_per_class(input_stream_format.second.nms_max_proposals_per_class);
-                }
-                if (static_cast<uint32_t>(INVALID_NMS_CONFIG) != input_stream_format.second.nms_max_proposals_total) {
-                    input.set_nms_max_proposals_total(input_stream_format.second.nms_max_proposals_total);
-                }
-                if (static_cast<uint32_t>(INVALID_NMS_CONFIG) != input_stream_format.second.nms_max_accumulated_mask_size) {
-                    input.set_nms_max_accumulated_mask_size(input_stream_format.second.nms_max_accumulated_mask_size);
-                }
-            }
-
-            for (const auto &output_stream_format : output_streams_formats) {
-                TRY(auto output, infer_model->output(output_stream_format.first));
-                output.set_format_order(static_cast<hailo_format_order_t>(output_stream_format.second.format_order));
-                output.set_format_type(static_cast<hailo_format_type_t>(output_stream_format.second.format_type));
-                if (INVALID_NMS_CONFIG != output_stream_format.second.nms_score_threshold) {
-                    output.set_nms_score_threshold(output_stream_format.second.nms_score_threshold);
-                }
-                if (INVALID_NMS_CONFIG != output_stream_format.second.nms_iou_threshold) {
-                    output.set_nms_iou_threshold(output_stream_format.second.nms_iou_threshold);
-                }
-                if (static_cast<uint32_t>(INVALID_NMS_CONFIG) != output_stream_format.second.nms_max_proposals_per_class) {
-                    output.set_nms_max_proposals_per_class(output_stream_format.second.nms_max_proposals_per_class);
-                }
-                if (static_cast<uint32_t>(INVALID_NMS_CONFIG) != output_stream_format.second.nms_max_proposals_total) {
-                    output.set_nms_max_proposals_total(output_stream_format.second.nms_max_proposals_total);
-                }
-                if (static_cast<uint32_t>(INVALID_NMS_CONFIG) != output_stream_format.second.nms_max_accumulated_mask_size) {
-                    output.set_nms_max_accumulated_mask_size(output_stream_format.second.nms_max_accumulated_mask_size);
-                }
-            }
-
-            infer_model->set_batch_size(request_params.batch_size);
-            infer_model->set_power_mode(request_params.power_mode);
-            infer_model->set_hw_latency_measurement_flags(request_params.latency_flag);
-
-            return infer_model->configure();
-        };
-
-        auto configured_infer_model = infer_model_manager.execute<Expected<ConfiguredInferModel>>(infer_model_handle, lambda);
-        CHECK_EXPECTED_AS_HRPC_STATUS(configured_infer_model, CreateConfiguredInferModelSerializer);
-
-        TRY_AS_HRPC_STATUS(auto async_queue_size, configured_infer_model->get_async_queue_size(), CreateConfiguredInferModelSerializer);
-        auto set_model_info_lambda = [] (std::shared_ptr<InferModel> infer_model) -> Expected<std::shared_ptr<InferModelInfo>> {
-            auto infer_model_info = make_shared_nothrow<InferModelInfo>();
-            CHECK_NOT_NULL_AS_EXPECTED(infer_model_info, HAILO_OUT_OF_HOST_MEMORY);
-
-            for (const auto &input : infer_model->inputs()) {
-                infer_model_info->input_streams_sizes.emplace(input.name(), input.get_frame_size());
-                infer_model_info->inputs_names.push_back(input.name());
-            }
-            for (const auto &output : infer_model->outputs()) {
-                infer_model_info->output_streams_sizes.emplace(output.name(), output.get_frame_size());
-                infer_model_info->outputs_names.push_back(output.name());
-            }
-            return infer_model_info;
-        };
-        auto model_info = infer_model_manager.execute<Expected<std::shared_ptr<InferModelInfo>>>(infer_model_handle, set_model_info_lambda);
-        CHECK_EXPECTED_AS_HRPC_STATUS(model_info, CreateConfiguredInferModelSerializer);
-
-        auto &infer_model_infos_manager = ServiceResourceManager<InferModelInfo>::get_instance();
-        auto infer_model_info_id = infer_model_infos_manager.register_resource(SINGLE_CLIENT_PID, std::move(model_info.release()));
-
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-        auto cim_id = cim_manager.register_resource(SINGLE_CLIENT_PID,
-            std::move(make_shared_nothrow<ConfiguredInferModel>(configured_infer_model.release())));
-
-        auto buffer_pool = ServiceNetworkGroupBufferPool::create(vdevice_handle);
-        CHECK_EXPECTED_AS_HRPC_STATUS(buffer_pool, CreateConfiguredInferModelSerializer);
-
-        auto buffer_pool_ptr = buffer_pool.release();
-        auto get_infer_model_info_lambda = [] (std::shared_ptr<InferModelInfo> infer_model_info) {
-            return *infer_model_info;
-        };
-        auto infer_model_info = infer_model_infos_manager.execute<Expected<InferModelInfo>>(infer_model_info_id, get_infer_model_info_lambda);
-        CHECK_EXPECTED_AS_HRPC_STATUS(infer_model_info, CreateConfiguredInferModelSerializer);
-
-        for (const auto &input_name : infer_model_info->inputs_names) {
-            auto status = buffer_pool_ptr->allocate_pool(input_name, HAILO_DMA_BUFFER_DIRECTION_D2H,
-                infer_model_info->input_streams_sizes[input_name], async_queue_size * ASYNC_QUEUE_SIZE_FACTOR);
-            CHECK_SUCCESS_AS_HRPC_STATUS(status, CreateConfiguredInferModelSerializer);
-        }
-        for (const auto &output_name : infer_model_info->outputs_names) {
-            auto status = buffer_pool_ptr->allocate_pool(output_name, HAILO_DMA_BUFFER_DIRECTION_H2D,
-                infer_model_info->output_streams_sizes[output_name], async_queue_size * ASYNC_QUEUE_SIZE_FACTOR);
-            CHECK_SUCCESS_AS_HRPC_STATUS(status, CreateConfiguredInferModelSerializer);
-        }
-        buffer_pool_per_cim.emplace(cim_id, buffer_pool_ptr);
-
-        infer_model_to_info_id[infer_model_handle] = infer_model_info_id;
-        TRY_AS_HRPC_STATUS(auto reply,
-            CreateConfiguredInferModelSerializer::serialize_reply(HAILO_SUCCESS, cim_id, static_cast<uint32_t>(async_queue_size)),
-            CreateConfiguredInferModelSerializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__DESTROY,
-    [&server] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-        TRY_AS_HRPC_STATUS(auto configured_infer_model_handle, DestroyConfiguredInferModelSerializer::deserialize_request(request), DestroyInferModelSerializer);
-
-        auto shutdown_lambda = [] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            configured_infer_model->shutdown();
-            return HAILO_SUCCESS;
-        };
-        manager.execute<hailo_status>(configured_infer_model_handle, shutdown_lambda);
-        server->cleanup_cim_buffer_pools({ configured_infer_model_handle });
-        (void)manager.release_resource(configured_infer_model_handle, SINGLE_CLIENT_PID);
-        TRY_AS_HRPC_STATUS(auto reply, DestroyConfiguredInferModelSerializer::serialize_reply(HAILO_SUCCESS), DestroyInferModelSerializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__SET_SCHEDULER_TIMEOUT,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-        TRY_AS_HRPC_STATUS(auto tuple, SetSchedulerTimeoutSerializer::deserialize_request(request), SetSchedulerTimeoutSerializer);
-        const auto &configured_infer_model_handle = std::get<0>(tuple);
-        const auto &timeout = std::get<1>(tuple);
-        auto lambda = [timeout] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            return configured_infer_model->set_scheduler_timeout(timeout);
-        };
-        auto status = cim_manager.execute<hailo_status>(configured_infer_model_handle, lambda);
-        TRY_AS_HRPC_STATUS(auto reply, SetSchedulerTimeoutSerializer::serialize_reply(status), SetSchedulerTimeoutSerializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__SET_SCHEDULER_THRESHOLD,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-        TRY_AS_HRPC_STATUS(auto tuple, SetSchedulerThresholdSerializer::deserialize_request(request), SetSchedulerThresholdSerializer);
-        const auto &configured_infer_model_handle = std::get<0>(tuple);
-        const auto &threshold = std::get<1>(tuple);
-        auto lambda = [threshold] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            return configured_infer_model->set_scheduler_threshold(threshold);
-        };
-        auto status = cim_manager.execute<hailo_status>(configured_infer_model_handle, lambda);
-        TRY_AS_HRPC_STATUS(auto reply, SetSchedulerThresholdSerializer::serialize_reply(status), SetSchedulerThresholdSerializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__SET_SCHEDULER_PRIORITY,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-        TRY_AS_HRPC_STATUS(auto tuple, SetSchedulerPrioritySerializer::deserialize_request(request), SetSchedulerPrioritySerializer);
-        const auto &configured_infer_model_handle = std::get<0>(tuple);
-        const auto &priority = std::get<1>(tuple);
-        auto lambda = [priority] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            return configured_infer_model->set_scheduler_priority(static_cast<uint8_t>(priority));
-        };
-        auto status = cim_manager.execute<hailo_status>(configured_infer_model_handle, lambda);
-        TRY_AS_HRPC_STATUS(auto reply, SetSchedulerPrioritySerializer::serialize_reply(status), SetSchedulerPrioritySerializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__GET_HW_LATENCY_MEASUREMENT,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-
-        auto configured_infer_model_handle = GetHwLatencyMeasurementSerializer::deserialize_request(request);
-        CHECK_EXPECTED_AS_HRPC_STATUS(configured_infer_model_handle, GetHwLatencyMeasurementSerializer);
-
-        auto lambda = [] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            return configured_infer_model->get_hw_latency_measurement();
-        };
-
-        auto latency_measurement_result = cim_manager.execute<Expected<LatencyMeasurementResult>>(configured_infer_model_handle.value(), lambda);
-        if (HAILO_NOT_AVAILABLE ==  latency_measurement_result.status()) {
-            return GetHwLatencyMeasurementSerializer::serialize_reply(HAILO_NOT_AVAILABLE);
-        }
-        CHECK_EXPECTED_AS_HRPC_STATUS(latency_measurement_result, GetHwLatencyMeasurementSerializer);
-
-        uint32_t avg_hw_latency = static_cast<uint32_t>(latency_measurement_result.value().avg_hw_latency.count());
-        TRY_AS_HRPC_STATUS(auto reply, GetHwLatencyMeasurementSerializer::serialize_reply(latency_measurement_result.status(), avg_hw_latency), GetHwLatencyMeasurementSerializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__ACTIVATE,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-
-        auto configured_infer_model_handle = ActivateSerializer::deserialize_request(request);
-        CHECK_EXPECTED_AS_HRPC_STATUS(configured_infer_model_handle, ActivateSerializer);
-
-        auto lambda = [] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            return configured_infer_model->activate();
-        };
-
-        auto status = cim_manager.execute<hailo_status>(configured_infer_model_handle.value(), lambda);
-        TRY_AS_HRPC_STATUS(auto reply, ActivateSerializer::serialize_reply(status), ActivateSerializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__DEACTIVATE,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-
-        auto configured_infer_model_handle = DeactivateSerializer::deserialize_request(request);
-        CHECK_EXPECTED_AS_HRPC_STATUS(configured_infer_model_handle, DeactivateSerializer);
-
-        auto lambda = [] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            return configured_infer_model->deactivate();
-        };
-
-        auto status = cim_manager.execute<hailo_status>(configured_infer_model_handle.value(), lambda);
-        TRY_AS_HRPC_STATUS(auto reply, DeactivateSerializer::serialize_reply(status), DeactivateSerializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__SHUTDOWN,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-
-        auto configured_infer_model_handle = ShutdownSerializer::deserialize_request(request);
-        CHECK_EXPECTED_AS_HRPC_STATUS(configured_infer_model_handle, ShutdownSerializer);
-
-        auto lambda = [] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            return configured_infer_model->shutdown();
-        };
-
-        auto status = cim_manager.execute<hailo_status>(configured_infer_model_handle.value(), lambda);
-        TRY_AS_HRPC_STATUS(auto reply, ShutdownSerializer::serialize_reply(status), ShutdownSerializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__RUN_ASYNC,
-    [&infer_model_to_info_id, &buffer_pool_per_cim, callbacks_done_queue = server->callbacks_done_queue()]
-    (const MemoryView &request, ServerContextPtr server_context) -> Expected<Buffer> {
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-        auto bindings_lambda = [] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            return configured_infer_model->create_bindings();
-        };
-        TRY_AS_HRPC_STATUS(auto request_struct, RunAsyncSerializer::deserialize_request(request), RunAsyncSerializer);
-        auto configured_infer_model_handle = request_struct.configured_infer_model_handle;
-        auto infer_model_handle = request_struct.infer_model_handle;
-        auto callback_id = request_struct.callback_handle;
-
-        auto bindings = cim_manager.execute<Expected<ConfiguredInferModel::Bindings>>(configured_infer_model_handle, bindings_lambda);
-        CHECK_EXPECTED_AS_HRPC_STATUS(bindings, RunAsyncSerializer);
-
-        auto infer_model_info_lambda = [] (std::shared_ptr<InferModelInfo> infer_model_info) {
-            return *infer_model_info;
-        };
-        auto &infer_model_infos_manager = ServiceResourceManager<InferModelInfo>::get_instance();
-        auto infer_model_info = infer_model_infos_manager.execute<Expected<InferModelInfo>>(infer_model_to_info_id[infer_model_handle],
-            infer_model_info_lambda);
-        CHECK_EXPECTED_AS_HRPC_STATUS(infer_model_info, RunAsyncSerializer);
-
-        std::vector<BufferPtr> inputs; // TODO: add infer vector pool
-        inputs.reserve(infer_model_info->inputs_names.size());
-        uint32_t buffer_size_index = 0;
-
-        for (const auto &input_name : infer_model_info->inputs_names) {
-            TRY_AS_HRPC_STATUS(auto input, bindings->input(input_name), RunAsyncSerializer);
-
-            TRY_AS_HRPC_STATUS(auto buffer_ptr, buffer_pool_per_cim.at(configured_infer_model_handle)->acquire_buffer(input_name),
-                RunAsyncSerializer);
-
-            uint32_t read_size = 0;
-            while (read_size < buffer_ptr->size()) {
-                uint32_t current_size = request_struct.input_buffer_sizes[buffer_size_index++];
-                CHECK_AS_HRPC_STATUS(read_size + current_size <= buffer_ptr->size(), HAILO_INTERNAL_FAILURE,
-                    RunAsyncSerializer);
-
-                auto status = server_context->connection().read_buffer(MemoryView(buffer_ptr->data() + read_size, current_size));
-                CHECK_SUCCESS_AS_HRPC_STATUS(status, RunAsyncSerializer);
-
-                read_size += current_size;
-            }
-
-            inputs.emplace_back(buffer_ptr);
-            auto status = input.set_buffer(MemoryView(*buffer_ptr));
-            CHECK_SUCCESS_AS_HRPC_STATUS(status, RunAsyncSerializer);
-        }
-
-        std::vector<BufferPtr> outputs; // TODO: add infer vector pool
-        outputs.reserve(infer_model_info->outputs_names.size());
-        for (const auto &output_name : infer_model_info->outputs_names) {
-            TRY_AS_HRPC_STATUS(auto buffer_ptr, buffer_pool_per_cim.at(configured_infer_model_handle)->acquire_buffer(output_name),
-                RunAsyncSerializer);
-
-            auto output = bindings->output(output_name);
-            CHECK_EXPECTED_AS_HRPC_STATUS(output, RunAsyncSerializer);
-
-            auto status = output->set_buffer(MemoryView(buffer_ptr->data(), buffer_ptr->size()));
-            CHECK_SUCCESS_AS_HRPC_STATUS(status, RunAsyncSerializer);
-
-            outputs.emplace_back(buffer_ptr);
-        }
-
-        auto infer_lambda =
-            [bindings = bindings.release(), callback_id, server_context, inputs, outputs, &buffer_pool_per_cim, configured_infer_model_handle,
-                infer_model_info, callbacks_done_queue]
-            (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-                return configured_infer_model->run_async(bindings,
-                    [callback_id, server_context, inputs, outputs, &buffer_pool_per_cim, configured_infer_model_handle, infer_model_info,
-                    callbacks_done_queue]
-                        (const AsyncInferCompletionInfo &completion_info) {
-                    for (uint32_t i = 0; i < inputs.size(); i++) {
-                        auto status = buffer_pool_per_cim.at(configured_infer_model_handle)->return_to_pool(infer_model_info->inputs_names[i], inputs[i]);
-                        if (HAILO_SUCCESS != status) {
-                            LOGGER__ERROR("Failed to return buffer to pool, status = {}", status);
-                        }
-                    }
-
-                    FinishedInferRequest request;
-                    request.connection = server_context->connection();
-                    request.completion_info = completion_info;
-                    request.callback_id = callback_id;
-                    request.configured_infer_model_handle = configured_infer_model_handle;
-                    request.outputs = std::move(outputs);
-                    request.outputs_names = infer_model_info->outputs_names;
-                    auto status = callbacks_done_queue->enqueue(std::move(request));
-                    if (HAILO_SUCCESS != status) {
-                        LOGGER__ERROR("Failed to enqueue to infer requests queue, status = {}", status);
-                    }
-                });
-            };
-        auto job = cim_manager.execute<Expected<AsyncInferJob>>(configured_infer_model_handle, infer_lambda);
-        CHECK_EXPECTED_AS_HRPC_STATUS(job, RunAsyncSerializer);
-
-        job->detach();
-
-        TRY_AS_HRPC_STATUS(auto reply, RunAsyncSerializer::serialize_reply(HAILO_SUCCESS), RunAsyncSerializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__CREATE,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto status = CreateDeviceSerializer::deserialize_request(request);
-        CHECK_SUCCESS_AS_HRPC_STATUS(status, CreateDeviceSerializer);
-
-        TRY_AS_HRPC_STATUS(auto device, Device::create(), CreateDeviceSerializer);
-
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        auto id = manager.register_resource(SINGLE_CLIENT_PID, std::move(device));
-        auto reply = CreateDeviceSerializer::serialize_reply(HAILO_SUCCESS, id);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__DESTROY,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        TRY_AS_HRPC_STATUS(auto device_handle, DestroyDeviceSerializer::deserialize_request(request), DestroyDeviceSerializer);
-        (void)manager.release_resource(device_handle, SINGLE_CLIENT_PID);
-        TRY_AS_HRPC_STATUS(auto reply, DestroyDeviceSerializer::serialize_reply(HAILO_SUCCESS), DestroyDeviceSerializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__IDENTIFY,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        TRY_AS_HRPC_STATUS(auto device_handle, IdentifyDeviceSerializer::deserialize_request(request), IdentifyDeviceSerializer);
-
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        auto device_lambda = [] (std::shared_ptr<Device> device) {
-            return device->identify();
-        };
-        TRY_AS_HRPC_STATUS(auto identity,
-            manager.execute<Expected<hailo_device_identity_t>>(device_handle, device_lambda), IdentifyDeviceSerializer);
-        TRY_AS_HRPC_STATUS(auto reply, IdentifyDeviceSerializer::serialize_reply(HAILO_SUCCESS, identity), IdentifyDeviceSerializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__EXTENDED_INFO,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        using Serializer = ExtendedDeviceInfoSerializer;
-        using ActionReturnType = hailo_extended_device_information_t;
-
-        TRY_AS_HRPC_STATUS(auto device_handle, Serializer::deserialize_request(request), Serializer);
-
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        auto device_lambda = [] (std::shared_ptr<Device> device) {
-            return device->get_extended_device_information();
-        };
-        TRY_AS_HRPC_STATUS(auto extended_info,
-            manager.execute<Expected<ActionReturnType>>(device_handle, device_lambda), Serializer);
-        TRY_AS_HRPC_STATUS(auto reply, Serializer::serialize_reply(HAILO_SUCCESS, extended_info), Serializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__GET_CHIP_TEMPERATURE,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        using Serializer = GetChipTemperatureSerializer;
-        using ActionReturnType = hailo_chip_temperature_info_t;
-
-        TRY_AS_HRPC_STATUS(auto device_handle, Serializer::deserialize_request(request), Serializer);
-
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        auto device_lambda = [] (std::shared_ptr<Device> device) {
-            return device->get_chip_temperature();
-        };
-
-        TRY_AS_HRPC_STATUS(auto info, manager.execute<Expected<ActionReturnType>>(device_handle, device_lambda), Serializer);
-        TRY_AS_HRPC_STATUS(auto reply, Serializer::serialize_reply(HAILO_SUCCESS, info), Serializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__POWER_MEASUREMENT,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        using Serializer = PowerMeasurementSerializer;
-        using ActionReturnType = float32_t;
-
-        TRY_AS_HRPC_STATUS(auto tuple, Serializer::deserialize_request(request), Serializer);
-
-        auto device_handle = std::get<0>(tuple);
-        auto dvm = std::get<1>(tuple);
-        auto power_measurement_type = std::get<2>(tuple);
-
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        auto device_lambda = [dvm, power_measurement_type] (std::shared_ptr<Device> device) {
-            return device->power_measurement(
-                static_cast<hailo_dvm_options_t>(dvm),
-                static_cast<hailo_power_measurement_types_t>(power_measurement_type));
-        };
-
-        TRY_AS_HRPC_STATUS(auto info, manager.execute<Expected<ActionReturnType>>(device_handle, device_lambda), Serializer);
-        TRY_AS_HRPC_STATUS(auto reply, Serializer::serialize_reply(HAILO_SUCCESS, info), Serializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__SET_POWER_MEASUREMENT,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        using Serializer = SetPowerMeasurementSerializer;
-        using ActionReturnType = hailo_status;
-
-        TRY_AS_HRPC_STATUS(auto tuple, Serializer::deserialize_request(request), Serializer);
-
-        auto device_handle = std::get<0>(tuple);
-        auto dvm = std::get<1>(tuple);
-        auto power_measurement_type = std::get<2>(tuple);
-
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        auto device_lambda = [dvm, power_measurement_type] (std::shared_ptr<Device> device) {
-            constexpr hailo_measurement_buffer_index_t not_used_buffer_index = HAILO_MEASUREMENT_BUFFER_INDEX_MAX_ENUM;
-            return device->set_power_measurement(
-                not_used_buffer_index, /* Relevant only for H8. Not used in H10 */
-                static_cast<hailo_dvm_options_t>(dvm),
-                static_cast<hailo_power_measurement_types_t>(power_measurement_type));
-        };
-
-        CHECK_SUCCESS_AS_HRPC_STATUS(manager.execute<ActionReturnType>(device_handle, device_lambda), Serializer);
-        TRY_AS_HRPC_STATUS(auto reply, Serializer::serialize_reply(HAILO_SUCCESS), Serializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__START_POWER_MEASUREMENT,
-    [] (const MemoryView &request, ServerContextPtr) -> Expected<Buffer> {
-        using Serializer = SetPowerMeasurementSerializer;
-        using ActionReturnType = hailo_status;
-
-        TRY_AS_HRPC_STATUS(auto tuple, Serializer::deserialize_request(request), Serializer);
-
-        auto device_handle = std::get<0>(tuple);
-        auto averaging_factor = std::get<1>(tuple);
-        auto sampling_period = std::get<2>(tuple);
-
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        auto device_lambda = [sampling_period, averaging_factor] (std::shared_ptr<Device> device) {
-            return device->start_power_measurement(
-                static_cast<hailo_averaging_factor_t>(averaging_factor),
-                static_cast<hailo_sampling_period_t>(sampling_period));
-        };
-
-        CHECK_SUCCESS_AS_HRPC_STATUS(manager.execute<ActionReturnType>(device_handle, device_lambda), Serializer);
-        TRY_AS_HRPC_STATUS(auto reply, Serializer::serialize_reply(HAILO_SUCCESS), Serializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__GET_POWER_MEASUREMENT,
-    [] (const MemoryView &request, ServerContextPtr) -> Expected<Buffer> {
-        using Serializer = GetPowerMeasurementSerializer;
-        using ActionReturnType = hailo_power_measurement_data_t;
-
-        TRY_AS_HRPC_STATUS(auto tuple, Serializer::deserialize_request(request), Serializer);
-
-        auto device_handle = std::get<0>(tuple);
-        auto should_clear = std::get<1>(tuple);
-
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        auto device_lambda = [should_clear] (std::shared_ptr<Device> device) {
-			constexpr hailo_measurement_buffer_index_t unused_buffer_index = HAILO_MEASUREMENT_BUFFER_INDEX_MAX_ENUM;
-            return device->get_power_measurement(unused_buffer_index, should_clear);
-        };
-
-        TRY_AS_HRPC_STATUS(auto info, manager.execute<Expected<ActionReturnType>>(device_handle, device_lambda), Serializer);
-        TRY_AS_HRPC_STATUS(auto reply, Serializer::serialize_reply(HAILO_SUCCESS, info), Serializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__STOP_POWER_MEASUREMENT,
-    [] (const MemoryView &request, ServerContextPtr) -> Expected<Buffer> {
-        using Serializer = StopPowerMeasurementSerializer;
-        using ActionReturnType = hailo_status;
-
-        TRY_AS_HRPC_STATUS(auto device_handle, Serializer::deserialize_request(request), Serializer);
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-
-        auto device_lambda = [] (std::shared_ptr<Device> device) {
-            return device->stop_power_measurement();
-        };
-
-        CHECK_SUCCESS_AS_HRPC_STATUS(manager.execute<ActionReturnType>(device_handle, device_lambda), Serializer);
-        TRY_AS_HRPC_STATUS(auto reply, Serializer::serialize_reply(HAILO_SUCCESS), Serializer);
-
-        return reply;
-    });
-
-    server->set_dispatcher(dispatcher);
-    auto status = server->serve();
-    if (status != HAILO_SUCCESS) {
-        LOGGER__ERROR("Error in serve, status = {}", status);
-        return status;
-    }
-
-    return 0;
-}
--- a/hailort/hailort_server/hailort_server.hpp
+++ b/hailort/hailort_server/hailort_server.hpp
@@ -1,68 +0,0 @@
-#ifndef HAILORT_SERVER_HPP_
-/**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file hailort_server.hpp
- * @brief RPC Hailort Server Header
- **/
-
-#define HAILORT_SERVER_HPP_
-
-#include "hrpc/server.hpp"
-#include "hailort_service/cng_buffer_pool.hpp"
-#include "hailo/infer_model.hpp"
-#include "utils/thread_safe_map.hpp"
-
-namespace hailort
-{
-
-using infer_model_handle_t = uint32_t;
-
-struct FinishedInferRequest
-{
-public:
-    FinishedInferRequest() : completion_info(HAILO_UNINITIALIZED) {}
-    RpcConnection connection;
-    hailort::AsyncInferCompletionInfo completion_info;
-    uint32_t callback_id;
-    uint32_t configured_infer_model_handle;
-    std::vector<BufferPtr> outputs;
-    std::vector<std::string> outputs_names;
-};
-
-class Server;
-class HailoRTServer : public Server {
-public:
-    static Expected<std::unique_ptr<HailoRTServer>> create_unique();
-    explicit HailoRTServer(std::shared_ptr<ConnectionContext> connection_context,
-        std::shared_ptr<SpscQueue<FinishedInferRequest>> callbacks_done_queue,
-        EventPtr callbacks_queue_shutdown_event);
-    virtual ~HailoRTServer();
-
-    std::unordered_map<uint32_t, uint32_t> &infer_model_to_info_id() { return m_infer_model_to_info_id; };
-    ThreadSafeMap<uint32_t, std::shared_ptr<ServiceNetworkGroupBufferPool>> &buffer_pool_per_cim() { return m_buffer_pool_per_cim; };
-    std::unordered_map<infer_model_handle_t, Buffer> &hef_buffers() { return m_hef_buffers_per_infer_model; };
-    std::shared_ptr<SpscQueue<FinishedInferRequest>> &callbacks_done_queue() { return m_callbacks_done_queue; };
-
-
-    void cleanup_cim_buffer_pools(const std::vector<uint32_t> &cim_handles);
-
-private:
-    virtual hailo_status cleanup_client_resources(RpcConnection client_connection) override;
-    void cleanup_infer_model_hef_buffers(const std::vector<uint32_t> &infer_model_handles);
-    hailo_status callbacks_thread_loop();
-
-    std::unordered_map<uint32_t, uint32_t> m_infer_model_to_info_id;
-    ThreadSafeMap<uint32_t, std::shared_ptr<ServiceNetworkGroupBufferPool>> m_buffer_pool_per_cim;
-    std::mutex m_buffer_pool_mutex;
-    std::unordered_map<infer_model_handle_t, Buffer> m_hef_buffers_per_infer_model;
-    std::shared_ptr<SpscQueue<FinishedInferRequest>> m_callbacks_done_queue;
-    EventPtr m_callbacks_queue_shutdown_event;
-    std::thread m_callbacks_thread;
-};
-
-} // namespace hailort
-
-#endif // HAILORT_SERVER_HPP_
--- a/hailort/hailort_service/cng_buffer_pool.cpp
+++ b/hailort/hailort_service/cng_buffer_pool.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file cng_buffer_pool.cpp
 * @brief Network group buffer pool implementation
@@ -14,13 +14,12 @@
 namespace hailort
 {

-Expected<BasicBufferPoolPtr> ServiceNetworkGroupBufferPool::create_stream_buffer_pool(size_t buffer_size,
+Expected<BasicBufferPoolPtr> BaseNetworkGroupBufferPool::create_stream_buffer_pool(size_t buffer_size,
    size_t buffer_count, hailo_dma_buffer_direction_t direction, EventPtr shutdown_event)
 {
    auto map_buffer_lambda = [direction](std::shared_ptr<VDevice> vdevice, BufferPtr buffer) {
        return DmaMappedBuffer::create(*vdevice, buffer->data(), buffer->size(), direction);
    };
-    auto &vdevice_manager = ServiceResourceManager<VDevice>::get_instance();

    TRY(auto free_buffers_queue,
        SpscQueue<BufferPtr>::create(buffer_count, shutdown_event, DEFAULT_TRANSFER_TIMEOUT));
@@ -29,10 +28,7 @@ Expected<BasicBufferPoolPtr> ServiceNetworkGroupBufferPool::create_stream_buffer
    buffers.reserve(buffer_count);
    for (size_t i = 0; i < buffer_count; i++) {
        TRY(auto buffer, Buffer::create_shared(buffer_size, BufferStorageParams::create_dma()));
-
-        TRY(auto mapped_buffer,
-            vdevice_manager.execute<Expected<DmaMappedBuffer>>(m_vdevice_handle, map_buffer_lambda, buffer));
-
+        TRY(auto mapped_buffer, m_map_buffer_func(m_vdevice_handle, map_buffer_lambda, buffer));
        auto status = free_buffers_queue.enqueue(buffer);
        CHECK_SUCCESS(status);

@@ -47,21 +43,13 @@ Expected<BasicBufferPoolPtr> ServiceNetworkGroupBufferPool::create_stream_buffer
    return buffer_pool_ptr;
 }

-Expected<std::shared_ptr<ServiceNetworkGroupBufferPool>> ServiceNetworkGroupBufferPool::create(uint32_t vdevice_handle)
-{
-    TRY(auto shutdown_event, Event::create_shared(Event::State::not_signalled));
-
-    auto cng_buffer_pool_ptr = make_shared_nothrow<ServiceNetworkGroupBufferPool>(shutdown_event, vdevice_handle);
-    CHECK_NOT_NULL_AS_EXPECTED(cng_buffer_pool_ptr, HAILO_OUT_OF_HOST_MEMORY);
-
-    return cng_buffer_pool_ptr;
-}
-
-ServiceNetworkGroupBufferPool::ServiceNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle) :
-    m_stream_name_to_buffer_pool(), m_mapped_buffers(), m_shutdown_event(shutdown_event), m_vdevice_handle(vdevice_handle), m_is_shutdown(false)
+BaseNetworkGroupBufferPool::BaseNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle,
+    map_buffer_on_handle_func_t map_buffer_func)
+    : m_stream_name_to_buffer_pool(), m_mapped_buffers(), m_shutdown_event(shutdown_event), m_vdevice_handle(vdevice_handle),
+    m_map_buffer_func(map_buffer_func), m_is_shutdown(false)
 {}

-hailo_status ServiceNetworkGroupBufferPool::allocate_pool(const std::string &name,
+hailo_status BaseNetworkGroupBufferPool::allocate_pool(const std::string &name,
    hailo_dma_buffer_direction_t direction, size_t frame_size, size_t pool_size)
 {
    TRY(auto buffer_pool, create_stream_buffer_pool(frame_size, pool_size, direction, m_shutdown_event));
@@ -72,7 +60,7 @@ hailo_status ServiceNetworkGroupBufferPool::allocate_pool(const std::string &nam
    return HAILO_SUCCESS;
 }

-hailo_status ServiceNetworkGroupBufferPool::reallocate_pool(const std::string &name,
+hailo_status BaseNetworkGroupBufferPool::reallocate_pool(const std::string &name,
    hailo_dma_buffer_direction_t direction, size_t frame_size)
 {
    std::unique_lock<std::mutex> lock(m_mutex);
@@ -86,7 +74,7 @@ hailo_status ServiceNetworkGroupBufferPool::reallocate_pool(const std::string &n
    return HAILO_SUCCESS;
 }

-Expected<BufferPtr> ServiceNetworkGroupBufferPool::acquire_buffer(const std::string &stream_name)
+Expected<BufferPtr> BaseNetworkGroupBufferPool::acquire_buffer(const std::string &stream_name)
 {
    CHECK_AS_EXPECTED(contains(m_stream_name_to_buffer_pool, stream_name), HAILO_INTERNAL_FAILURE,
        "acquire_buffer() for stream {} failed, stream name does not exist in buffer pool", stream_name);
@@ -104,7 +92,7 @@ Expected<BufferPtr> ServiceNetworkGroupBufferPool::acquire_buffer(const std::str
    return buffer;
 }

-hailo_status ServiceNetworkGroupBufferPool::return_to_pool(const std::string &stream_name, BufferPtr buffer)
+hailo_status BaseNetworkGroupBufferPool::return_to_pool(const std::string &stream_name, BufferPtr buffer)
 {
    CHECK(contains(m_stream_name_to_buffer_pool, stream_name), HAILO_INTERNAL_FAILURE,
        "acquire_buffer() for stream {} failed, stream name does not exist in buffer pool", stream_name);
@@ -119,7 +107,7 @@ hailo_status ServiceNetworkGroupBufferPool::return_to_pool(const std::string &st
    return HAILO_SUCCESS;
 }

-hailo_status ServiceNetworkGroupBufferPool::shutdown()
+hailo_status BaseNetworkGroupBufferPool::shutdown()
 {
    {
        std::unique_lock<std::mutex> lock(m_mutex);
@@ -129,4 +117,36 @@ hailo_status ServiceNetworkGroupBufferPool::shutdown()
    return m_shutdown_event->signal();
 }

+Expected<std::shared_ptr<ServiceNetworkGroupBufferPool>> ServiceNetworkGroupBufferPool::create(uint32_t vdevice_handle)
+{
+    TRY(auto shutdown_event, Event::create_shared(Event::State::not_signalled));
+
+    auto map_buffer_func = [](uint32_t handle, execute_map_on_vdevice_func_t execute_map_buffer_func, BufferPtr buffer) -> Expected<DmaMappedBuffer> {
+        auto &vdevice_manager = ServiceResourceManager<VDevice>::get_instance();
+        TRY(auto mapped_buffer,
+            vdevice_manager.execute<Expected<DmaMappedBuffer>>(handle, execute_map_buffer_func, buffer));
+        return mapped_buffer;
+    };
+    auto cng_buffer_pool_ptr = make_shared_nothrow<ServiceNetworkGroupBufferPool>(shutdown_event, vdevice_handle, map_buffer_func);
+    CHECK_NOT_NULL_AS_EXPECTED(cng_buffer_pool_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return cng_buffer_pool_ptr;
+}
+
+Expected<std::shared_ptr<ServerNetworkGroupBufferPool>> ServerNetworkGroupBufferPool::create(uint32_t vdevice_handle)
+{
+    TRY(auto shutdown_event, Event::create_shared(Event::State::not_signalled));
+
+    auto map_buffer_func = [](uint32_t handle, execute_map_on_vdevice_func_t execute_map_buffer_func, BufferPtr buffer) -> Expected<DmaMappedBuffer> {
+        auto &vdevice_manager = ServerResourceManager<VDevice>::get_instance();
+        TRY(auto mapped_buffer,
+            vdevice_manager.execute<Expected<DmaMappedBuffer>>(handle, execute_map_buffer_func, buffer));
+        return mapped_buffer;
+    };
+    auto cng_buffer_pool_ptr = make_shared_nothrow<ServerNetworkGroupBufferPool>(shutdown_event, vdevice_handle, map_buffer_func);
+    CHECK_NOT_NULL_AS_EXPECTED(cng_buffer_pool_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return cng_buffer_pool_ptr;
+}
+
 } /* namespace hailort */
--- a/hailort/hailort_service/cng_buffer_pool.hpp
+++ b/hailort/hailort_service/cng_buffer_pool.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file cng_buffer_pool.hpp
 * @brief This model represents the buffer pools for the streams of each network group. Used in async API
@@ -22,28 +22,28 @@ namespace hailort
 {

 using stream_name_t = std::string;
+using execute_map_on_vdevice_func_t = std::function<Expected<DmaMappedBuffer>(std::shared_ptr<VDevice>, BufferPtr)>;
+using map_buffer_on_handle_func_t = std::function<Expected<DmaMappedBuffer>(uint32_t, execute_map_on_vdevice_func_t, BufferPtr)>;

 // This object holds a buffer pool for each stream of the network group.
 // It is used to pre-allocate all the buffers necessary for the reads from the device.
 // The buffers are reuseable, which also prevents allocation during inference.
 // The buffers are mapped to the device during their creation, which prevent lazy mapping each frame inference.
 // Currently only used in async API.
-class ServiceNetworkGroupBufferPool
+class BaseNetworkGroupBufferPool
 {
 public:
-    static Expected<std::shared_ptr<ServiceNetworkGroupBufferPool>> create(uint32_t vdevice_handle);
-
    hailo_status allocate_pool(const std::string &name, hailo_dma_buffer_direction_t direction, size_t frame_size, size_t pool_size);
    // Used in order to reallocate the pool buffers with different frame_size
    hailo_status reallocate_pool(const std::string &name, hailo_dma_buffer_direction_t direction, size_t frame_size);

-    ServiceNetworkGroupBufferPool(ServiceNetworkGroupBufferPool &&) = delete;
-    ServiceNetworkGroupBufferPool(const ServiceNetworkGroupBufferPool &) = delete;
-    ServiceNetworkGroupBufferPool &operator=(ServiceNetworkGroupBufferPool &&) = delete;
-    ServiceNetworkGroupBufferPool &operator=(const ServiceNetworkGroupBufferPool &) = delete;
-    virtual ~ServiceNetworkGroupBufferPool() = default;
+    BaseNetworkGroupBufferPool(BaseNetworkGroupBufferPool &&) = delete;
+    BaseNetworkGroupBufferPool(const BaseNetworkGroupBufferPool &) = delete;
+    BaseNetworkGroupBufferPool &operator=(BaseNetworkGroupBufferPool &&) = delete;
+    BaseNetworkGroupBufferPool &operator=(const BaseNetworkGroupBufferPool &) = delete;
+    virtual ~BaseNetworkGroupBufferPool() = default;

-    ServiceNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle);
+    BaseNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle, map_buffer_on_handle_func_t map_buffer_func);
    Expected<BufferPtr> acquire_buffer(const std::string &stream_name);
    hailo_status return_to_pool(const std::string &stream_name, BufferPtr buffer);
    hailo_status shutdown();
@@ -57,11 +57,40 @@ private:
    std::vector<DmaMappedBuffer> m_mapped_buffers;
    EventPtr m_shutdown_event;
    uint32_t m_vdevice_handle;
+    map_buffer_on_handle_func_t m_map_buffer_func;
    std::mutex m_mutex;
    std::condition_variable m_cv;
    bool m_is_shutdown;
 };

+class ServiceNetworkGroupBufferPool : public BaseNetworkGroupBufferPool
+{
+public:
+    static Expected<std::shared_ptr<ServiceNetworkGroupBufferPool>> create(uint32_t vdevice_handle);
+    ServiceNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle, map_buffer_on_handle_func_t map_buffer_func)
+        : BaseNetworkGroupBufferPool(shutdown_event, vdevice_handle, map_buffer_func) {}
+
+    ServiceNetworkGroupBufferPool(ServiceNetworkGroupBufferPool &&) = delete;
+    ServiceNetworkGroupBufferPool(const ServiceNetworkGroupBufferPool &) = delete;
+    ServiceNetworkGroupBufferPool &operator=(ServiceNetworkGroupBufferPool &&) = delete;
+    ServiceNetworkGroupBufferPool &operator=(const ServiceNetworkGroupBufferPool &) = delete;
+    virtual ~ServiceNetworkGroupBufferPool() = default;
+};
+
+class ServerNetworkGroupBufferPool : public BaseNetworkGroupBufferPool
+{
+public:
+    static Expected<std::shared_ptr<ServerNetworkGroupBufferPool>> create(uint32_t vdevice_handle);
+    ServerNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle, map_buffer_on_handle_func_t map_buffer_func)
+        : BaseNetworkGroupBufferPool(shutdown_event, vdevice_handle, map_buffer_func) {}
+
+    ServerNetworkGroupBufferPool(ServerNetworkGroupBufferPool &&) = delete;
+    ServerNetworkGroupBufferPool(const ServerNetworkGroupBufferPool &) = delete;
+    ServerNetworkGroupBufferPool &operator=(ServerNetworkGroupBufferPool &&) = delete;
+    ServerNetworkGroupBufferPool &operator=(const ServerNetworkGroupBufferPool &) = delete;
+    virtual ~ServerNetworkGroupBufferPool() = default;
+};
+
 } /* namespace hailort */

 #endif /* _HAILO_CNG_BUFFER_POOL_HPP_ */
--- a/hailort/hailort_service/hailort_rpc_service.cpp
+++ b/hailort/hailort_service/hailort_rpc_service.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
@@ -82,10 +82,10 @@ hailo_status HailoRtRpcService::abort_output_vstream(uint32_t handle)
 }

 // TODO: Add a named templated release functions for InputVStream and OutputVStream to call abort before release.
-void HailoRtRpcService::abort_vstreams_by_pids(std::set<uint32_t> &pids)
+void HailoRtRpcService::abort_vstreams_by_ids(std::set<uint32_t> &pids)
 {
-    auto inputs_handles = ServiceResourceManager<InputVStream>::get_instance().resources_handles_by_pids(pids);
-    auto outputs_handles = ServiceResourceManager<OutputVStream>::get_instance().resources_handles_by_pids(pids);
+    auto inputs_handles = ServiceResourceManager<InputVStream>::get_instance().resources_handles_by_ids(pids);
+    auto outputs_handles = ServiceResourceManager<OutputVStream>::get_instance().resources_handles_by_ids(pids);
    for (auto &input_handle : inputs_handles) {
        abort_input_vstream(input_handle);
    }
@@ -108,9 +108,9 @@ hailo_status HailoRtRpcService::shutdown_configured_network_group(uint32_t vdevi
 }


-void HailoRtRpcService::shutdown_configured_network_groups_by_pids(std::set<uint32_t> &pids)
+void HailoRtRpcService::shutdown_configured_network_groups_by_ids(std::set<uint32_t> &pids)
 {
-    auto cng_handles = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance().resources_handles_by_pids(pids);
+    auto cng_handles = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance().resources_handles_by_ids(pids);
    for (auto &handle : cng_handles) {
        auto status = shutdown_configured_network_group(handle);
        if (status != HAILO_SUCCESS) {
@@ -119,9 +119,9 @@ void HailoRtRpcService::shutdown_configured_network_groups_by_pids(std::set<uint
    }
 }

-void HailoRtRpcService::shutdown_buffer_pool_by_pids(std::set<uint32_t> &pids)
+void HailoRtRpcService::shutdown_buffer_pool_by_ids(std::set<uint32_t> &pids)
 {
-    auto buffer_pools_handles = ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance().resources_handles_by_pids(pids);
+    auto buffer_pools_handles = ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance().resources_handles_by_ids(pids);
    for (auto &handle : buffer_pools_handles) {
        auto status = shutdown_cng_buffer_pool(handle);
        if (status != HAILO_SUCCESS) {
@@ -130,9 +130,9 @@ void HailoRtRpcService::shutdown_buffer_pool_by_pids(std::set<uint32_t> &pids)
    }
 }

-void HailoRtRpcService::shutdown_vdevice_cb_queue_by_pids(std::set<uint32_t> &pids)
+void HailoRtRpcService::shutdown_vdevice_cb_queue_by_ids(std::set<uint32_t> &pids)
 {
-    auto vdevice_cb_queue_handles = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance().resources_handles_by_pids(pids);
+    auto vdevice_cb_queue_handles = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance().resources_handles_by_ids(pids);
    for (auto &handle : vdevice_cb_queue_handles) {
        auto status = shutdown_vdevice_cb_queue(handle);
        if (status != HAILO_SUCCESS) {
@@ -143,7 +143,6 @@ void HailoRtRpcService::shutdown_vdevice_cb_queue_by_pids(std::set<uint32_t> &pi

 void HailoRtRpcService::remove_disconnected_clients()
 {
-    std::this_thread::sleep_for(hailort::HAILO_KEEPALIVE_INTERVAL / 2);
    auto now = std::chrono::high_resolution_clock::now();
    std::set<uint32_t> pids_to_remove;
    {
@@ -164,19 +163,19 @@ void HailoRtRpcService::remove_disconnected_clients()
        // We abort vstreams before releasing them to avoid cases where the vstream is stuck in execute of a
        // blocking operation (which will be finished with timeout).
        // To release the vstream the ServiceResourceManager is waiting for the resource_mutex which is also locked in execute.
-        abort_vstreams_by_pids(pids_to_remove);
+        abort_vstreams_by_ids(pids_to_remove);

        // It is important to shutdown the cb Queue before the NG shutdown, as ongoing callbacks might continue to try to enqueue
-        shutdown_vdevice_cb_queue_by_pids(pids_to_remove);
-        shutdown_configured_network_groups_by_pids(pids_to_remove);
-        shutdown_buffer_pool_by_pids(pids_to_remove);
+        shutdown_vdevice_cb_queue_by_ids(pids_to_remove);
+        shutdown_configured_network_groups_by_ids(pids_to_remove);
+        shutdown_buffer_pool_by_ids(pids_to_remove);
        for (auto &client_pid : pids_to_remove) {
-            ServiceResourceManager<OutputVStream>::get_instance().release_by_pid(client_pid);
-            ServiceResourceManager<InputVStream>::get_instance().release_by_pid(client_pid);
-            ServiceResourceManager<ConfiguredNetworkGroup>::get_instance().release_by_pid(client_pid);
-            ServiceResourceManager<VDeviceCallbacksQueue>::get_instance().release_by_pid(client_pid);
-            ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance().release_by_pid(client_pid);
-            ServiceResourceManager<VDevice>::get_instance().release_by_pid(client_pid);
+            ServiceResourceManager<OutputVStream>::get_instance().release_by_id(client_pid);
+            ServiceResourceManager<InputVStream>::get_instance().release_by_id(client_pid);
+            ServiceResourceManager<ConfiguredNetworkGroup>::get_instance().release_by_id(client_pid);
+            ServiceResourceManager<VDeviceCallbacksQueue>::get_instance().release_by_id(client_pid);
+            ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance().release_by_id(client_pid);
+            ServiceResourceManager<VDevice>::get_instance().release_by_id(client_pid);

            LOGGER__INFO("Client disconnected, pid: {}", client_pid);
            HAILORT_OS_LOG_INFO("Client disconnected, pid: {}", client_pid);
@@ -188,6 +187,7 @@ void HailoRtRpcService::remove_disconnected_clients()
 void HailoRtRpcService::keep_alive()
 {
    while (true) {
+        std::this_thread::sleep_for(hailort::HAILO_KEEPALIVE_INTERVAL / 2);
        remove_disconnected_clients();
    }
 }
@@ -435,7 +435,7 @@ hailo_status HailoRtRpcService::allocate_pool_for_raw_streams(uint32_t ng_handle
 {
    auto &cng_buffer_pool_manager = ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance();
    // For Async API - The buffer size in the pool will be the stream's hw frame size as used in the infer_model pipeline
-    TRY(const auto min_buffer_pool_size, get_min_buffer_pool_size(ng_handle));
+    TRY(const auto min_buffer_pool_size, infer_queue_size(ng_handle));
    TRY(const auto streams_infos, get_all_stream_infos(ng_handle));

    for (const auto &stream_info : streams_infos) {
@@ -953,12 +953,8 @@ void serialize_vstream_info(const hailo_vstream_info_t &info, ProtoVStreamInfo *
    if (HailoRTCommon::is_nms(info.format.order)) {
        auto nms_shape_proto = info_proto->mutable_nms_shape();
        nms_shape_proto->set_number_of_classes(info.nms_shape.number_of_classes);
-        if (info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE) {
-            nms_shape_proto->set_max_bboxes_total(info.nms_shape.max_bboxes_total);
-        } else {
-            nms_shape_proto->set_max_bboxes_per_class(info.nms_shape.max_bboxes_per_class);
-        }
-
+        nms_shape_proto->set_max_bboxes_total(info.nms_shape.max_bboxes_total);
+        nms_shape_proto->set_max_bboxes_per_class(info.nms_shape.max_bboxes_per_class);
        nms_shape_proto->set_max_accumulated_mask_size(info.nms_shape.max_accumulated_mask_size);
    } else {
        auto shape_proto = info_proto->mutable_shape();
@@ -1273,15 +1269,11 @@ void serialize_op_matadata(hailort::net_flow::OpMetadata &op_metadata, ProtoOpMe
        auto nms_config_proto = op_metadata_proto->mutable_nms_post_process_config();
        nms_config_proto->set_nms_score_th(nms_config.nms_score_th);
        nms_config_proto->set_nms_iou_th(nms_config.nms_iou_th);
-        if (HAILO_NMS_RESULT_ORDER_BY_SCORE == nms_config.order_type) {
-            nms_config_proto->set_max_proposals_total(nms_config.max_proposals_total);
-        } else {
-            nms_config_proto->set_max_proposals_per_class(nms_config.max_proposals_per_class);
-        }
+        nms_config_proto->set_max_proposals_total(nms_config.max_proposals_total);
+        nms_config_proto->set_max_proposals_per_class(nms_config.max_proposals_per_class);
        nms_config_proto->set_number_of_classes(nms_config.number_of_classes);
        nms_config_proto->set_background_removal(nms_config.background_removal);
        nms_config_proto->set_background_removal_index(nms_config.background_removal_index);
-        nms_config_proto->set_cross_classes(nms_config.cross_classes);
        nms_config_proto->set_bbox_only(nms_config.bbox_only);
    }

@@ -1775,7 +1767,6 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_all_stream_infos(grpc
            auto proto_nms_info = proto_stream_info.mutable_nms_info();
            proto_nms_info->set_number_of_classes(stream_info.nms_info.number_of_classes);
            proto_nms_info->set_max_bboxes_per_class(stream_info.nms_info.max_bboxes_per_class);
-            proto_nms_info->set_order_type(HAILO_NMS_RESULT_ORDER_HW);
            proto_nms_info->set_bbox_size(stream_info.nms_info.bbox_size);
            proto_nms_info->set_chunks_per_frame(stream_info.nms_info.chunks_per_frame);
            proto_nms_info->set_is_defused(stream_info.nms_info.is_defused);
@@ -1872,25 +1863,25 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_sorted_output_names(g
    return grpc::Status::OK;
 }

-Expected<size_t> HailoRtRpcService::get_min_buffer_pool_size(uint32_t ng_handle)
+Expected<size_t> HailoRtRpcService::infer_queue_size(uint32_t ng_handle)
 {
    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
-        return cng->get_min_buffer_pool_size();
+        return cng->infer_queue_size();
    };
    auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    TRY(auto min_buffer_pool_size, manager.execute<Expected<size_t>>(ng_handle, lambda));
+    TRY(auto queue_size, manager.execute<Expected<size_t>>(ng_handle, lambda));

-    return min_buffer_pool_size;
+    return queue_size;
 }

-grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_min_buffer_pool_size(grpc::ServerContext*,
-    const ConfiguredNetworkGroup_get_min_buffer_pool_size_Request *request,
-    ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply *reply)
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_infer_queue_size(grpc::ServerContext*,
+    const ConfiguredNetworkGroup_infer_queue_size_Request *request,
+    ConfiguredNetworkGroup_infer_queue_size_Reply *reply)
 {
-    auto min_buffer_pool_size_expected = get_min_buffer_pool_size(request->identifier().network_group_handle());
-    CHECK_EXPECTED_AS_RPC_STATUS(min_buffer_pool_size_expected, reply);
+    auto queue_size_expected = infer_queue_size(request->identifier().network_group_handle());
+    CHECK_EXPECTED_AS_RPC_STATUS(queue_size_expected, reply);

-    reply->set_min_buffer_pool_size(static_cast<uint32_t>(min_buffer_pool_size_expected.release()));
+    reply->set_infer_queue_size(static_cast<uint32_t>(queue_size_expected.release()));
    reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
    return grpc::Status::OK;
 }
@@ -1996,21 +1987,6 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_nms_max_bboxes_total(
    return grpc::Status::OK;
 }

-grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_nms_result_order_type(grpc::ServerContext*,
-    const ConfiguredNetworkGroup_set_nms_result_order_type_Request *request,
-    ConfiguredNetworkGroup_set_nms_result_order_type_Reply *reply)
-{
-    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::string &edge_name, hailo_nms_result_order_type_t order_type) {
-        return cng->set_nms_result_order_type(edge_name, order_type);
-    };
-    auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto status = manager.execute(request->identifier().network_group_handle(), lambda,
-                                    request->edge_name(), static_cast<hailo_nms_result_order_type_t>(request->nms_result_order_type()));
-    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
-    reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
-    return grpc::Status::OK;
-}
-
 grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(grpc::ServerContext*,
    const ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Request *request,
    ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Reply *reply)
--- a/hailort/hailort_service/hailort_rpc_service.hpp
+++ b/hailort/hailort_service/hailort_rpc_service.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
@@ -190,9 +190,9 @@ public:
    virtual grpc::Status ConfiguredNetworkGroup_get_sorted_output_names(grpc::ServerContext*,
        const ConfiguredNetworkGroup_get_sorted_output_names_Request *request,
        ConfiguredNetworkGroup_get_sorted_output_names_Reply *reply) override;
-    virtual grpc::Status ConfiguredNetworkGroup_get_min_buffer_pool_size(grpc::ServerContext*,
-        const ConfiguredNetworkGroup_get_min_buffer_pool_size_Request *request,
-        ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply *reply) override;
+    virtual grpc::Status ConfiguredNetworkGroup_infer_queue_size(grpc::ServerContext*,
+        const ConfiguredNetworkGroup_infer_queue_size_Request *request,
+        ConfiguredNetworkGroup_infer_queue_size_Reply *reply) override;
    virtual grpc::Status ConfiguredNetworkGroup_get_layer_info(grpc::ServerContext*,
        const ConfiguredNetworkGroup_get_layer_info_Request *request,
        ConfiguredNetworkGroup_get_layer_info_Reply *reply) override;
@@ -211,9 +211,6 @@ public:
    virtual grpc::Status ConfiguredNetworkGroup_set_nms_max_bboxes_total(grpc::ServerContext*,
        const ConfiguredNetworkGroup_set_nms_max_bboxes_total_Request *request,
        ConfiguredNetworkGroup_set_nms_max_bboxes_total_Reply *reply) override;
-    virtual grpc::Status ConfiguredNetworkGroup_set_nms_result_order_type(grpc::ServerContext*,
-        const ConfiguredNetworkGroup_set_nms_result_order_type_Request *request,
-        ConfiguredNetworkGroup_set_nms_result_order_type_Reply *reply) override;
    virtual grpc::Status ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(grpc::ServerContext*,
        const ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Request *request,
        ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Reply *reply) override;
@@ -232,11 +229,11 @@ private:
    hailo_status flush_input_vstream(uint32_t handle);
    hailo_status abort_input_vstream(uint32_t handle);
    hailo_status abort_output_vstream(uint32_t handle);
-    void abort_vstreams_by_pids(std::set<uint32_t> &pids);
-    void release_configured_network_groups_by_pid(uint32_t client_pid);
+    void abort_vstreams_by_ids(std::set<uint32_t> &pids);
+    void release_configured_network_groups_by_id(uint32_t client_pid);
    void remove_disconnected_clients();
    void update_client_id_timestamp(uint32_t pid);
-    Expected<size_t> get_min_buffer_pool_size(uint32_t ng_handle);
+    Expected<size_t> infer_queue_size(uint32_t ng_handle);
    Expected<std::vector<hailo_stream_info_t>> get_all_stream_infos(uint32_t ng_handle);
    Expected<std::vector<hailo_vstream_info_t>> get_all_vstream_infos(uint32_t ng_handle);
    Expected<std::string> output_vstream_name(uint32_t vstream_handle);
@@ -255,9 +252,9 @@ private:
    Expected<BufferPtr> acquire_buffer_from_cng_pool(uint32_t ng_handle, const std::string &output_name);
    Expected<size_t> output_vstream_frame_size(uint32_t vstream_handle);
    hailo_status update_buffer_size_in_pool(uint32_t vstream_handle, uint32_t network_group_handle);
-    void shutdown_configured_network_groups_by_pids(std::set<uint32_t> &pids);
-    void shutdown_buffer_pool_by_pids(std::set<uint32_t> &pids);
-    void shutdown_vdevice_cb_queue_by_pids(std::set<uint32_t> &pids);
+    void shutdown_configured_network_groups_by_ids(std::set<uint32_t> &pids);
+    void shutdown_buffer_pool_by_ids(std::set<uint32_t> &pids);
+    void shutdown_vdevice_cb_queue_by_ids(std::set<uint32_t> &pids);
    hailo_status shutdown_cng_buffer_pool(uint32_t network_group_handle);
    hailo_status shutdown_vdevice_cb_queue(uint32_t vdevice_handle);
    hailo_status shutdown_configured_network_group(uint32_t vdevice_handle);
--- a/hailort/hailort_service/service_resource_manager.hpp
+++ b/hailort/hailort_service/service_resource_manager.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
@@ -19,33 +19,25 @@
 #include <shared_mutex>
 #include <unordered_set>

-#define SINGLE_CLIENT_PID (0)
-
 namespace hailort
 {

 template<class T>
 struct Resource {
-    Resource(uint32_t pid, std::shared_ptr<T> resource)
+    Resource(uint32_t id, std::shared_ptr<T> resource)
        : resource(std::move(resource))
    {
-        pids.insert(pid);
+        ids.insert(id);
    }

    std::shared_ptr<T> resource;
-    std::unordered_set<uint32_t> pids;
+    std::unordered_set<uint32_t> ids;
 };

 template<class T>
-class ServiceResourceManager
+class BaseResourceManager
 {
 public:
-    static ServiceResourceManager& get_instance()
-    {
-        static ServiceResourceManager instance;
-        return instance;
-    }
-
    template<class K, class Func, typename... Args>
    K execute(uint32_t handle, Func &lambda, Args... args)
    {
@@ -55,7 +47,6 @@ public:
        std::shared_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
        lock.unlock();
        auto ret = lambda(resource->resource, args...);
-
        return ret;
    }

@@ -68,16 +59,15 @@ public:
        std::shared_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
        lock.unlock();
        auto ret = lambda(resource->resource, args...);
-
        return ret;
    }

-    uint32_t register_resource(uint32_t pid, const std::shared_ptr<T> &resource)
+    uint32_t register_resource(uint32_t id, const std::shared_ptr<T> &resource)
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        auto index = m_current_handle_index.load();
        // Create a new resource and register
-        m_resources.emplace(m_current_handle_index, std::make_shared<Resource<T>>(pid, std::move(resource)));
+        m_resources.emplace(m_current_handle_index, std::make_shared<Resource<T>>(id, std::move(resource)));
        m_resources_mutexes[m_current_handle_index]; // construct std::shared_timed_mutex
        m_current_handle_index++;
        return index;
@@ -90,25 +80,25 @@ public:
        m_current_handle_index++;
    }

-    Expected<uint32_t> dup_handle(uint32_t handle, uint32_t pid)
+    Expected<uint32_t> dup_handle(uint32_t handle, uint32_t id)
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        TRY(auto resource, resource_lookup(handle));
        assert(contains(m_resources_mutexes, handle));
        std::unique_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
-        resource->pids.insert(pid);
+        resource->ids.insert(id);

        return Expected<uint32_t>(handle);
    }

-    std::shared_ptr<T> release_resource(uint32_t handle, uint32_t pid)
+    std::shared_ptr<T> release_resource(uint32_t handle, uint32_t id)
    {
        std::shared_ptr<T> res = nullptr;
        std::unique_lock<std::mutex> lock(m_mutex);
        auto found = m_resources.find(handle);
        if (found == m_resources.end()) {
-            LOGGER__INFO("Failed to release resource with handle {} and PID {}. The resource no longer exists or may have already been released",
-                handle, pid);
+            LOGGER__INFO("Failed to release resource with handle {} and ID {}. The resource no longer exists or may have already been released",
+                handle, id);
            return res;
        }

@@ -117,8 +107,8 @@ public:
        bool release_resource = false;
        {
            std::unique_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
-            resource->pids.erase(pid);
-            if ((SINGLE_CLIENT_PID == pid) || all_pids_dead(resource)) {
+            resource->ids.erase(id);
+            if (should_resource_be_released(resource)) {
                release_resource = true;
                res = resource->resource;
                m_resources.erase(handle);
@@ -130,19 +120,19 @@ public:
        return res;
    }

-    std::vector<std::shared_ptr<T>> release_by_pid(uint32_t pid)
+    std::vector<std::shared_ptr<T>> release_by_id(uint32_t id)
    {
        std::vector<std::shared_ptr<T>> res;
        std::unique_lock<std::mutex> lock(m_mutex);
        for (auto iter = m_resources.begin(); iter != m_resources.end(); ) {
            auto handle = iter->first;
            bool release_resource = false;
-            if (contains(iter->second->pids, pid)) {
+            if (contains(iter->second->ids, id)) {
                assert(contains(m_resources_mutexes, handle));
                {
                    std::unique_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
-                    iter->second->pids.erase(pid);
-                    if (iter->second->pids.empty()) {
+                    iter->second->ids.erase(id);
+                    if (iter->second->ids.empty()) {
                        release_resource = true;
                        res.push_back(iter->second->resource);
                        iter = m_resources.erase(iter);
@@ -159,13 +149,13 @@ public:
        return res;
    }

-    std::vector<uint32_t> resources_handles_by_pids(std::set<uint32_t> &pids)
+    std::vector<uint32_t> resources_handles_by_ids(std::set<uint32_t> &ids)
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        std::vector<uint32_t> resources_handles;
        for (auto &handle_resource_pair : m_resources) {
-            for (auto &pid : pids) {
-                if (contains(handle_resource_pair.second->pids, pid)) {
+            for (auto &id : ids) {
+                if (contains(handle_resource_pair.second->ids, id)) {
                    resources_handles.emplace_back(handle_resource_pair.first);
                }
            }
@@ -173,11 +163,14 @@ public:
        return resources_handles;
    }

-private:
-    ServiceResourceManager()
+protected:
+    BaseResourceManager()
        : m_current_handle_index(0)
    {}

+    virtual bool should_resource_be_released(std::shared_ptr<Resource<T>> resource) = 0;
+
+private:
    Expected<std::shared_ptr<Resource<T>>> resource_lookup(uint32_t handle)
    {
        auto found = m_resources.find(handle);
@@ -186,22 +179,57 @@ private:
        return resource;
    }

-    bool all_pids_dead(std::shared_ptr<Resource<T>> resource)
-    {
-        for (auto &pid : resource->pids) {
-            if (OsUtils::is_pid_alive(pid)) {
-                return false;
-            }
-        }
-        return true;
-    }
-
    std::mutex m_mutex;
    std::atomic<uint32_t> m_current_handle_index;
    std::unordered_map<uint32_t, std::shared_ptr<Resource<T>>> m_resources;
    std::unordered_map<uint32_t, std::shared_timed_mutex> m_resources_mutexes;
 };

+template<class T>
+class ServiceResourceManager : public BaseResourceManager<T>
+{
+public:
+    static ServiceResourceManager& get_instance()
+    {
+        static ServiceResourceManager instance;
+        return instance;
+    }
+
+protected:
+    virtual bool should_resource_be_released(std::shared_ptr<Resource<T>> resource) override
+    {
+        for (auto &id : resource->ids) {
+            if (OsUtils::is_pid_alive(id)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+private:
+    ServiceResourceManager() = default;
+};
+
+template<class T>
+class ServerResourceManager : public BaseResourceManager<T>
+{
+public:
+    static ServerResourceManager& get_instance()
+    {
+        static ServerResourceManager instance;
+        return instance;
+    }
+
+protected:
+    virtual bool should_resource_be_released(std::shared_ptr<Resource<T>>) override
+    {
+        return true;
+    }
+
+private:
+    ServerResourceManager() = default;
+};
+
 }

 #endif /* HAILO_SERVICE_RESOURCE_MANAGER_HPP_ */
--- a/hailort/hailort_service/unix/hailort_service.cpp
+++ b/hailort/hailort_service/unix/hailort_service.cpp
@@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- *
+ **/
+/**
 * @file hailort_service.cpp
 * @brief main for hailort service 
 * To run without daemonization run the hailort_service executable with `standalone`.
@@ -32,9 +33,29 @@

 using namespace hailort;

+bool is_default_service_address(const std::string server_address)
+{
+    return HAILORT_SERVICE_DEFAULT_ADDR == server_address;
+}
+
+bool socket_file_exists_and_unremovable()
+{
+    // Will return false in case we failed to remove the file for a reason other than "file doesn't exist"
+    return ((unlink(HAILO_DEFAULT_SERVICE_ADDR.c_str()) != 0) && (errno != ENOENT));
+}
+
 void RunService()
 {
    const std::string server_address = HAILORT_SERVICE_ADDRESS;
+
+    // If the socket file already exists and cannot be removed due to insufficient permissions,
+    // we should fail early to prevent grpc::BuildAndStart() from causing a segmentation fault.
+    if (is_default_service_address(server_address) && socket_file_exists_and_unremovable()) {
+        LOGGER__CRITICAL("Failed to remove existing socket file {}. This might indicate insufficient permissions for this operation.",
+            HAILO_DEFAULT_SERVICE_ADDR);
+        return;
+    }
+
    HailoRtRpcService service;
    grpc::ServerBuilder builder;
    builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
--- a/hailort/hailort_service/vdevice_callbacks_queue.hpp
+++ b/hailort/hailort_service/vdevice_callbacks_queue.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
 * @file vdevice_callbacks_queue.hpp
 * @brief Queue used for the callbacks in infer async over service.
--- a/hailort/hailort_service/windows/hailort_service.cpp
+++ b/hailort/hailort_service/windows/hailort_service.cpp
@@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- *
+ **/
+/**
 * @file hailort_service.cpp
 * @brief main for hailort service 
 * The service code is based on Microsoft's documenataion: https://learn.microsoft.com/en-us/windows/win32/services/the-complete-service-sample
@@ -24,7 +25,7 @@
 *
 *       5) Delete service:
 *           `sc delete hailort_service`
-*/
+ */

 #include "hailort_rpc_service.hpp"
 #include "rpc/rpc_definitions.hpp"
--- a/hailort/hailortcli/CMakeLists.txt
+++ b/hailort/hailortcli/CMakeLists.txt
@@ -24,6 +24,7 @@ set(HAILORTCLI_CPP_FILES
    common.cpp
    benchmark_command.cpp
    parse_hef_command.cpp
+    memory_requirements_command.cpp
    graph_printer.cpp
    mon_command.cpp

--- a/hailort/hailortcli/benchmark_command.cpp
+++ b/hailort/hailortcli/benchmark_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
 * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
--- a/Show More
+++ b/Show More