diff --git a/backends/arm/CMakeLists.txt b/backends/arm/CMakeLists.txt index 095ccb6a49b..56b8d790d61 100644 --- a/backends/arm/CMakeLists.txt +++ b/backends/arm/CMakeLists.txt @@ -184,6 +184,36 @@ if(EXECUTORCH_BUILD_VGF) set(LIBVGF_PATH "${_vgf_site_pure}") endif() + if(EXECUTORCH_BUILD_TESTS) + add_executable( + vgf_neural_statistics_test + ${EXECUTORCH_ROOT}/backends/arm/test/vgf_neural_statistics_test.cpp + ${EXECUTORCH_ROOT}/backends/arm/runtime/VGFNeuralStatistics.cpp + ) + target_include_directories( + vgf_neural_statistics_test + PRIVATE ${_common_include_directories} ${VULKAN_HEADERS_PATH} + ${VOLK_HEADERS_PATH} + ) + target_compile_options( + vgf_neural_statistics_test PRIVATE -DUSE_VULKAN_WRAPPER + -DUSE_VULKAN_VOLK + ) + target_link_libraries(vgf_neural_statistics_test PRIVATE executorch_core) + if(TARGET GTest::gtest_main) + target_link_libraries( + vgf_neural_statistics_test PRIVATE GTest::gtest_main + ) + else() + target_link_libraries( + vgf_neural_statistics_test PRIVATE gtest gtest_main + ) + endif() + add_test(NAME vgf_neural_statistics_test + COMMAND vgf_neural_statistics_test + ) + endif() + set(LIBVGF_STATIC "${LIBVGF_PATH}/lib/libvgf.a") endif() @@ -194,8 +224,9 @@ if(EXECUTORCH_BUILD_VGF) target_include_directories(vgf INTERFACE "${LIBVGF_INCLUDE}") # Add backend delegate for VGF - set(_vgf_backend_sources backends/arm/runtime/VGFBackend.cpp - backends/arm/runtime/VGFSetup.cpp + set(_vgf_backend_sources + backends/arm/runtime/VGFBackend.cpp backends/arm/runtime/VGFSetup.cpp + backends/arm/runtime/VGFNeuralStatistics.cpp ) if(NOT EXECUTORCH_BUILD_VULKAN) list(APPEND _vgf_backend_sources backends/vulkan/third-party/volk/volk.c) diff --git a/backends/arm/runtime/VGFBackend.cpp b/backends/arm/runtime/VGFBackend.cpp index 0f6893d1dec..7f5bd9bdb0a 100644 --- a/backends/arm/runtime/VGFBackend.cpp +++ b/backends/arm/runtime/VGFBackend.cpp @@ -6,8 +6,11 @@ */ #include +#include +#include #include #include +#include using namespace std; @@ -101,6 +104,17 @@ void vkml_free_basics( // vkDestroyInstance(*instance, nullptr); } +bool vgf_neural_statistics_profiling_enabled() { + const char* value = std::getenv("EXECUTORCH_VGF_ENABLE_NEURAL_STATISTICS"); + if (value == nullptr || value[0] == '\0') { + return false; + } + + return std::strcmp(value, "0") != 0 && std::strcmp(value, "false") != 0 && + std::strcmp(value, "FALSE") != 0 && std::strcmp(value, "off") != 0 && + std::strcmp(value, "OFF") != 0; +} + class VGFBackend final : public ::executorch::runtime::BackendInterface { public: VGFBackend() = default; @@ -365,6 +379,28 @@ class VGFBackend final : public ::executorch::runtime::BackendInterface { #ifdef ET_EVENT_TRACER_ENABLED event_tracer_end_profiling_delegate(event_tracer, dispatch_event); + if (event_tracer != nullptr && vgf_neural_statistics_profiling_enabled()) { + // We attach the neural statistics JSON blob to ETDump as delegate + // metadata, when event tracer is active. + + // This is “synthetic” event, which we use as a carrier for metadata + EventTracerEntry neural_statistics_event = + event_tracer_start_profiling_delegate( + event_tracer, + kVgfNeuralStatisticsDelegateEventName, + /*delegate_debug_id=*/-1); + + // Ask VGF representation for neural accelerator diagnostics + std::string neural_statistics_metadata = + repr->collect_neural_statistics_metadata(); + + event_tracer_end_profiling_delegate( + event_tracer, + neural_statistics_event, + neural_statistics_metadata.data(), + neural_statistics_metadata.size()); + } + EventTracerEntry copy_outputs_event = event_tracer_start_profiling_delegate( event_tracer, "VGF_COPY_OUTPUTS", diff --git a/backends/arm/runtime/VGFNeuralStatistics.cpp b/backends/arm/runtime/VGFNeuralStatistics.cpp new file mode 100644 index 00000000000..414eaa6f0f0 --- /dev/null +++ b/backends/arm/runtime/VGFNeuralStatistics.cpp @@ -0,0 +1,524 @@ +/* + * Copyright 2026 Arm Limited and/or its affiliates. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include +#include + +#include + +// In this file we checks Vulkan API availability, +// queries debug database/statistics info, +// maps statistics memory if available, +// serializes everything into JSON. +namespace executorch { +namespace backends { +namespace vgf { +namespace { + +// Converts a C++ string into a valid JSON string literal. +std::string json_escape(const std::string& value) { + std::ostringstream out; + out << '"'; + for (unsigned char c : value) { + switch (c) { + case '"': + out << "\\\""; + break; + case '\\': + out << "\\\\"; + break; + case '\b': + out << "\\b"; + break; + case '\f': + out << "\\f"; + break; + case '\n': + out << "\\n"; + break; + case '\r': + out << "\\r"; + break; + case '\t': + out << "\\t"; + break; + default: + if (c < 0x20) { + out << "\\u" << std::hex << std::setw(4) << std::setfill('0') + << static_cast(c) << std::dec; + } else { + out << c; + } + break; + } + } + out << '"'; + return out.str(); +} + +// Converts raw binary bytes into base64 text. +// We need this, because JSON can contain arbitrary +// raw binary data. +std::string base64_encode(const std::vector& input) { + static constexpr char kAlphabet[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + std::string output; + output.reserve(((input.size() + 2) / 3) * 4); + + int val = 0; + int valb = -6; + for (uint8_t c : input) { + val = (val << 8) + c; + valb += 8; + while (valb >= 0) { + output.push_back(kAlphabet[(val >> valb) & 0x3F]); + valb -= 6; + } + } + + if (valb > -6) { + output.push_back(kAlphabet[((val << 8) >> (valb + 8)) & 0x3F]); + } + + while (output.size() % 4 != 0) { + output.push_back('='); + } + + return output; +} + +void append_bool(std::ostringstream& out, bool value) { + out << (value ? "true" : "false"); +} + +void append_blob( + std::ostringstream& out, + const char* name, + const VgfNeuralStatisticsBlob& blob) { + out << json_escape(name) << ":{"; + out << "\"available\":"; + append_bool(out, blob.available); + out << ",\"is_text\":"; + append_bool(out, blob.is_text); + out << ",\"vulkan_result\":" << blob.vulkan_result; + out << ",\"size\":" << blob.data.size(); + out << ",\"encoding\":\"base64\""; + out << ",\"reason\":" << json_escape(blob.reason); + out << ",\"data\":" << json_escape(base64_encode(blob.data)); + out << "}"; +} + +bool contains_property( + const std::vector& properties, + VkDataGraphPipelinePropertyARM property) { + return std::find(properties.begin(), properties.end(), property) != + properties.end(); +} + +VgfNeuralStatisticsBlob make_unavailable_blob(const std::string& reason) { + VgfNeuralStatisticsBlob blob; + blob.available = false; + blob.reason = reason; + return blob; +} + +VgfNeuralStatisticsBlob query_pipeline_property( + VkDevice device, + VkPipeline pipeline, + VkDataGraphPipelinePropertyARM property) { + if (device == VK_NULL_HANDLE) { + return make_unavailable_blob("VkDevice is null"); + } + if (pipeline == VK_NULL_HANDLE) { + return make_unavailable_blob("VkPipeline is null"); + } + +#if defined(VK_ARM_data_graph) && \ + defined( \ + VK_DATA_GRAPH_PIPELINE_PROPERTY_NEURAL_ACCELERATOR_DEBUG_DATABASE_ARM) && \ + defined( \ + VK_DATA_GRAPH_PIPELINE_PROPERTY_NEURAL_ACCELERATOR_STATISTICS_INFO_ARM) + + if (!vkGetDataGraphPipelineAvailablePropertiesARM || + !vkGetDataGraphPipelinePropertiesARM) { + return make_unavailable_blob( + "VK_ARM_data_graph pipeline property query functions are not loaded"); + } + + VkDataGraphPipelineInfoARM pipeline_info{ + .sType = VK_STRUCTURE_TYPE_DATA_GRAPH_PIPELINE_INFO_ARM, + .pNext = nullptr, + .dataGraphPipeline = pipeline, + }; + + uint32_t property_count = 0; + VkResult result = vkGetDataGraphPipelineAvailablePropertiesARM( + device, &pipeline_info, &property_count, nullptr); + if (result != VK_SUCCESS) { + VgfNeuralStatisticsBlob blob; + blob.available = false; + blob.vulkan_result = static_cast(result); + blob.reason = + "vkGetDataGraphPipelineAvailablePropertiesARM failed when querying count"; + return blob; + } + + std::vector properties(property_count); + if (property_count > 0) { + result = vkGetDataGraphPipelineAvailablePropertiesARM( + device, &pipeline_info, &property_count, properties.data()); + if (result != VK_SUCCESS) { + VgfNeuralStatisticsBlob blob; + blob.available = false; + blob.vulkan_result = static_cast(result); + blob.reason = + "vkGetDataGraphPipelineAvailablePropertiesARM failed when querying properties"; + return blob; + } + } + + if (!contains_property(properties, property)) { + return make_unavailable_blob( + "Requested VK_ARM_data_graph pipeline property is not available"); + } + + VkDataGraphPipelinePropertyQueryResultARM query{ + .sType = VK_STRUCTURE_TYPE_DATA_GRAPH_PIPELINE_PROPERTY_QUERY_RESULT_ARM, + .pNext = nullptr, + .property = property, + .isText = VK_FALSE, + .dataSize = 0, + .pData = nullptr, + }; + + result = + vkGetDataGraphPipelinePropertiesARM(device, &pipeline_info, 1, &query); + if (result != VK_SUCCESS) { + VgfNeuralStatisticsBlob blob; + blob.available = false; + blob.vulkan_result = static_cast(result); + blob.reason = + "vkGetDataGraphPipelinePropertiesARM failed when querying property size"; + return blob; + } + + std::vector data(query.dataSize); + if (!data.empty()) { + query.pData = data.data(); + result = + vkGetDataGraphPipelinePropertiesARM(device, &pipeline_info, 1, &query); + if (result != VK_SUCCESS) { + VgfNeuralStatisticsBlob blob; + blob.available = false; + blob.vulkan_result = static_cast(result); + blob.reason = + "vkGetDataGraphPipelinePropertiesARM failed when querying property data"; + return blob; + } + + if (query.dataSize < data.size()) { + data.resize(query.dataSize); + } + } + + VgfNeuralStatisticsBlob blob; + blob.available = true; + blob.is_text = query.isText == VK_TRUE; + blob.vulkan_result = static_cast(result); + blob.data = std::move(data); + return blob; +#else + (void)device; + (void)pipeline; + (void)property; + return make_unavailable_blob( + "Vulkan headers do not expose VK_ARM_data_graph neural accelerator properties"); +#endif +} + +VgfNeuralStatisticsBlob read_statistics_memory( + VkDevice device, + const VgfNeuralStatisticsSegmentContext& segment) { + if (device == VK_NULL_HANDLE) { + return make_unavailable_blob("VkDevice is null"); + } + if (!segment.statistics_bind_point_available) { + return make_unavailable_blob( + segment.statistics_bind_point_reason.empty() + ? "Neural accelerator statistics bind point is not available" + : segment.statistics_bind_point_reason); + } + if (segment.statistics_memory == VK_NULL_HANDLE || + segment.statistics_memory_size == 0) { + return make_unavailable_blob( + segment.statistics_bind_point_reason.empty() + ? "Neural accelerator statistics memory is not bound" + : segment.statistics_bind_point_reason); + } + if (!segment.statistics_memory_host_visible) { + return make_unavailable_blob( + "Neural accelerator statistics memory is not host visible"); + } + + void* mapped = nullptr; + VkResult result = vkMapMemory( + device, + segment.statistics_memory, + /*offset=*/0, + /*size=*/VK_WHOLE_SIZE, + /*flags=*/0, + &mapped); + if (result != VK_SUCCESS) { + VgfNeuralStatisticsBlob blob; + blob.available = false; + blob.vulkan_result = static_cast(result); + blob.reason = "vkMapMemory failed for neural accelerator statistics memory"; + return blob; + } + + if (!segment.statistics_memory_host_coherent) { + const VkMappedMemoryRange mapped_range{ + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .pNext = nullptr, + .memory = segment.statistics_memory, + .offset = 0, + .size = VK_WHOLE_SIZE, + }; + + result = vkInvalidateMappedMemoryRanges(device, 1, &mapped_range); + if (result != VK_SUCCESS) { + vkUnmapMemory(device, segment.statistics_memory); + + VgfNeuralStatisticsBlob blob; + blob.available = false; + blob.vulkan_result = static_cast(result); + blob.reason = + "vkInvalidateMappedMemoryRanges failed for non-coherent neural accelerator statistics memory"; + return blob; + } + } + + std::vector data( + static_cast(segment.statistics_memory_size)); + if (!data.empty()) { + std::memcpy(data.data(), mapped, data.size()); + } + + vkUnmapMemory(device, segment.statistics_memory); + + VgfNeuralStatisticsBlob blob; + blob.available = true; + blob.is_text = false; + blob.vulkan_result = static_cast(VK_SUCCESS); + blob.data = std::move(data); + return blob; +} + +VgfNeuralStatisticsCollectorForTest& test_collector_storage() { + static VgfNeuralStatisticsCollectorForTest collector; + return collector; +} + +} // namespace + +bool vgf_neural_statistics_api_available() { +#if defined(VK_ARM_data_graph) && \ + defined( \ + VK_DATA_GRAPH_PIPELINE_PROPERTY_NEURAL_ACCELERATOR_DEBUG_DATABASE_ARM) && \ + defined( \ + VK_DATA_GRAPH_PIPELINE_PROPERTY_NEURAL_ACCELERATOR_STATISTICS_INFO_ARM) + return vkGetDataGraphPipelineAvailablePropertiesARM != nullptr && + vkGetDataGraphPipelinePropertiesARM != nullptr; +#else + return false; +#endif +} + +VgfNeuralStatisticsCollection collect_vgf_neural_statistics( + VkDevice device, + const std::vector& segments) { + VgfNeuralStatisticsCollection collection; + +#if defined(VK_ARM_data_graph) && \ + defined( \ + VK_DATA_GRAPH_PIPELINE_PROPERTY_NEURAL_ACCELERATOR_DEBUG_DATABASE_ARM) && \ + defined( \ + VK_DATA_GRAPH_PIPELINE_PROPERTY_NEURAL_ACCELERATOR_STATISTICS_INFO_ARM) + + collection.api_available = vgf_neural_statistics_api_available(); + + if (device == VK_NULL_HANDLE) { + collection.reason = "VkDevice is null"; + return collection; + } + + if (!collection.api_available) { + collection.reason = + "VK_ARM_data_graph neural accelerator property query API is unavailable"; + return collection; + } + + if (segments.empty()) { + collection.reason = "No VGF segments are available for collection"; + return collection; + } + + for (const auto& segment : segments) { + VgfCollectedSegmentNeuralStatistics collected; + collected.segment_id = segment.segment_id; + collected.is_data_graph_pipeline = segment.is_data_graph_pipeline; + collected.statistics_bind_point_available = + segment.statistics_bind_point_available; + collected.statistics_memory_host_visible = + segment.statistics_memory_host_visible; + collected.statistics_memory_host_coherent = + segment.statistics_memory_host_coherent; + collected.statistics_bind_point_reason = + segment.statistics_bind_point_reason; + + if (!segment.is_data_graph_pipeline) { + collected.debug_database = + make_unavailable_blob("Segment is not a data graph pipeline"); + collected.statistics_info = + make_unavailable_blob("Segment is not a data graph pipeline"); + collected.statistics_memory = + make_unavailable_blob("Segment is not a data graph pipeline"); + collection.segments.push_back(std::move(collected)); + continue; + } + + collected.debug_database = query_pipeline_property( + device, + segment.pipeline, + VK_DATA_GRAPH_PIPELINE_PROPERTY_NEURAL_ACCELERATOR_DEBUG_DATABASE_ARM); + collected.statistics_info = query_pipeline_property( + device, + segment.pipeline, + VK_DATA_GRAPH_PIPELINE_PROPERTY_NEURAL_ACCELERATOR_STATISTICS_INFO_ARM); + + collected.statistics_memory = read_statistics_memory(device, segment); + + if (collected.debug_database.available || + collected.statistics_info.available || + collected.statistics_memory.available) { + collection.data_available = true; + } + + collection.segments.push_back(std::move(collected)); + } + + if (!collection.data_available) { + collection.reason = + "VK_ARM_data_graph neural accelerator statistics data is not available"; + } + + return collection; + +#else + (void)device; + (void)segments; + + collection.api_available = false; + collection.data_available = false; + collection.reason = + "VK_ARM_data_graph neural accelerator property query API is unavailable"; + return collection; +#endif +} + +std::string serialize_vgf_neural_statistics_collection( + const VgfNeuralStatisticsCollection& collection) { + std::ostringstream out; + out << "{"; + out << "\"schema\":" << json_escape(kVgfNeuralStatisticsSchema); + out << ",\"schema_version\":" << kVgfNeuralStatisticsSchemaVersion; + out << ",\"backend\":\"VgfBackend\""; + out << ",\"api\":\"VK_ARM_data_graph\""; + out << ",\"event_name\":" + << json_escape(kVgfNeuralStatisticsDelegateEventName); + out << ",\"api_available\":"; + append_bool(out, collection.api_available); + out << ",\"data_available\":"; + append_bool(out, collection.data_available); + + // Write that not available + out << ",\"available\":"; + append_bool(out, collection.data_available); + + out << ",\"reason\":" << json_escape(collection.reason); + out << ",\"segments\":["; + + for (size_t i = 0; i < collection.segments.size(); ++i) { + const auto& segment = collection.segments[i]; + if (i > 0) { + out << ","; + } + + out << "{"; + out << "\"segment_id\":" << segment.segment_id; + out << ",\"is_data_graph_pipeline\":"; + append_bool(out, segment.is_data_graph_pipeline); + out << ",\"statistics_bind_point_available\":"; + append_bool(out, segment.statistics_bind_point_available); + out << ",\"statistics_memory_host_visible\":"; + append_bool(out, segment.statistics_memory_host_visible); + out << ",\"statistics_memory_host_coherent\":"; + append_bool(out, segment.statistics_memory_host_coherent); + out << ",\"statistics_bind_point_reason\":" + << json_escape(segment.statistics_bind_point_reason); + out << ","; + append_blob(out, "debug_database", segment.debug_database); + out << ","; + append_blob(out, "statistics_info", segment.statistics_info); + out << ","; + append_blob(out, "statistics_memory", segment.statistics_memory); + out << "}"; + } + + out << "]"; + out << "}"; + return out.str(); +} + +std::string make_vgf_neural_statistics_unavailable_metadata( + const std::string& reason) { + VgfNeuralStatisticsCollection collection; + collection.api_available = false; + collection.data_available = false; + collection.reason = reason; + return serialize_vgf_neural_statistics_collection(collection); +} + +std::string collect_vgf_neural_statistics_metadata( + VkDevice device, + const std::vector& segments) { + const auto& test_collector = test_collector_storage(); + if (test_collector) { + return test_collector(device, segments); + } + + return serialize_vgf_neural_statistics_collection( + collect_vgf_neural_statistics(device, segments)); +} + +void set_vgf_neural_statistics_collector_for_test( + VgfNeuralStatisticsCollectorForTest collector) { + test_collector_storage() = std::move(collector); +} + +void reset_vgf_neural_statistics_collector_for_test() { + test_collector_storage() = nullptr; +} + +} // namespace vgf +} // namespace backends +} // namespace executorch \ No newline at end of file diff --git a/backends/arm/runtime/VGFNeuralStatistics.h b/backends/arm/runtime/VGFNeuralStatistics.h new file mode 100644 index 00000000000..3865f0972ae --- /dev/null +++ b/backends/arm/runtime/VGFNeuralStatistics.h @@ -0,0 +1,134 @@ +/* + * Copyright 2026 Arm Limited and/or its affiliates. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace executorch { +namespace backends { +namespace vgf { + +constexpr const char* kVgfNeuralStatisticsDelegateEventName = + "VGF_NEURAL_STATISTICS"; +constexpr const char* kVgfNeuralStatisticsSchema = + "executorch.vgf.neural_statistics"; +constexpr int kVgfNeuralStatisticsSchemaVersion = 1; + +// One binary payload from neural statistics API +struct VgfNeuralStatisticsBlob { + // Whether we got it successfully + bool available = false; + // Whether it is a text like JSON + bool is_text = false; + // Vulkan result code from query + int32_t vulkan_result = 0; + // Why blob is not available + std::string reason; + // Actual payload, raw bytes + std::vector data; +}; + +// Info needed to collect stats for one VGF segment +struct VgfNeuralStatisticsSegmentContext { + // id of the segment + int segment_id = -1; + // Stats are only for data graph pipeline + // We skip if it is false + bool is_data_graph_pipeline = false; + VkPipeline pipeline = VK_NULL_HANDLE; + VkDataGraphPipelineSessionARM session = VK_NULL_HANDLE; + // We record whether stats bind point was available. + bool statistics_bind_point_available = false; + // We record memory properties + VkDeviceMemory statistics_memory = VK_NULL_HANDLE; + VkDeviceSize statistics_memory_size = 0; + bool statistics_memory_host_visible = false; + bool statistics_memory_host_coherent = false; + // Why it is unavailable + std::string statistics_bind_point_reason; +}; + +// This is the output for one segment after collection. +struct VgfCollectedSegmentNeuralStatistics { + // Status fields + + // id of the segment + int segment_id = -1; + // Stats are only for data graph pipeline + // We skip if it is false + bool is_data_graph_pipeline = false; + // We record whether stats bind point was available. + bool statistics_bind_point_available = false; + // We record memory properties + bool statistics_memory_host_visible = false; + bool statistics_memory_host_coherent = false; + // Why it is unavailable + std::string statistics_bind_point_reason; + + // Debug database blob queried + VgfNeuralStatisticsBlob debug_database; + // API-provided information about the statistics + VgfNeuralStatisticsBlob statistics_info; + // Raw bytes read from the neural statistics memory bind point + VgfNeuralStatisticsBlob statistics_memory; +}; + +// This is the top-level result for the whole VGF execution +struct VgfNeuralStatisticsCollection { + // Whether API is supported at all + bool api_available = false; + // Whether driver provides data + bool data_available = false; + // Top level explanation to the user if something went wrong + std::string reason; + // Per segment results + std::vector segments; +}; + +// Checks whether the neural statistics Vulkan API can be used +bool vgf_neural_statistics_api_available(); + +VgfNeuralStatisticsCollection collect_vgf_neural_statistics( + VkDevice device, + const std::vector& segments); + +// We convert collection into the JSON file that will be +// stored in ETDump delegate metadata. +std::string serialize_vgf_neural_statistics_collection( + const VgfNeuralStatisticsCollection& collection); + +// Creates metadata JSON string when collection cannot happen +std::string make_vgf_neural_statistics_unavailable_metadata( + const std::string& reason); + +// High level function used by the backend +std::string collect_vgf_neural_statistics_metadata( + VkDevice device, + const std::vector& segments); + +// Functions for testing: +// Define mackable function for testing +using VgfNeuralStatisticsCollectorForTest = std::function&)>; + +// This lets a unit test override the real collector +void set_vgf_neural_statistics_collector_for_test( + VgfNeuralStatisticsCollectorForTest collector); + +void reset_vgf_neural_statistics_collector_for_test(); + +} // namespace vgf +} // namespace backends +} // namespace executorch \ No newline at end of file diff --git a/backends/arm/runtime/VGFSetup.cpp b/backends/arm/runtime/VGFSetup.cpp index dd92af1d15b..6a565656aa5 100644 --- a/backends/arm/runtime/VGFSetup.cpp +++ b/backends/arm/runtime/VGFSetup.cpp @@ -3018,23 +3018,59 @@ bool VgfRepr::process_vgf( return false; } - for (const auto& bind_point_requirement : bind_point_requirements) { - if (bind_point_requirement.bindPointType != - VK_DATA_GRAPH_PIPELINE_SESSION_BIND_POINT_TYPE_MEMORY_ARM) { + for (auto& bind_point_requirement : bind_point_requirements) { + const bool is_transient_bind_point = bind_point_requirement.bindPoint == + VK_DATA_GRAPH_PIPELINE_SESSION_BIND_POINT_TRANSIENT_ARM; + + bool is_neural_statistics_bind_point = false; +#ifdef VK_DATA_GRAPH_PIPELINE_SESSION_BIND_POINT_NEURAL_ACCELERATOR_STATISTICS_ARM + is_neural_statistics_bind_point = bind_point_requirement.bindPoint == + VK_DATA_GRAPH_PIPELINE_SESSION_BIND_POINT_NEURAL_ACCELERATOR_STATISTICS_ARM; +#endif + + if (!is_transient_bind_point && !is_neural_statistics_bind_point) { ET_LOG( Error, - "Expected VK_DATA_GRAPH_PIPELINE_SESSION_BIND_POINT_TYPE_MEMORY_ARM"); + "Unsupported data-graph session bind point %u", + static_cast(bind_point_requirement.bindPoint)); return false; } - if (bind_point_requirement.bindPoint != - VK_DATA_GRAPH_PIPELINE_SESSION_BIND_POINT_TRANSIENT_ARM) { + + auto mark_neural_statistics_unavailable = + [&](const std::string& message) { + segment.neural_statistics_bind_point_available = false; + segment.neural_statistics_memory = VK_NULL_HANDLE; + segment.neural_statistics_memory_size = 0; + segment.neural_statistics_memory_host_visible = false; + segment.neural_statistics_memory_host_coherent = false; + segment.neural_statistics_status = message; + ET_LOG(Info, "%s", message.c_str()); + }; + + if (bind_point_requirement.bindPointType != + VK_DATA_GRAPH_PIPELINE_SESSION_BIND_POINT_TYPE_MEMORY_ARM) { + const std::string message = + "Neural accelerator statistics bind point was advertised, but is not a memory bind point"; + if (is_neural_statistics_bind_point) { + mark_neural_statistics_unavailable(message); + continue; + } + ET_LOG( Error, - "Expected VK_DATA_GRAPH_PIPELINE_SESSION_BIND_POINT_TRANSIENT_ARM"); + "Expected VK_DATA_GRAPH_PIPELINE_SESSION_BIND_POINT_TYPE_MEMORY_ARM"); return false; } + if (bind_point_requirement.numObjects != 1) { - ET_LOG(Error, "Expected only one object for the bindpoint"); + const std::string message = + "Neural accelerator statistics bind point was advertised, but numObjects is not 1"; + if (is_neural_statistics_bind_point) { + mark_neural_statistics_unavailable(message); + continue; + } + + ET_LOG(Error, "Expected exactly one object for bind point"); return false; } @@ -3047,19 +3083,28 @@ bool VgfRepr::process_vgf( .bindPoint = bind_point_requirement.bindPoint, .objectIndex = 0, }; + VkMemoryRequirements2 memory_requirements = { .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, .pNext = nullptr, }; + vkGetDataGraphPipelineSessionMemoryRequirementsARM( vk_device, &memory_requirements_info, &memory_requirements); VkMemoryPropertyFlags aims = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + uint32_t memory_index = 0; if (!find_memory_index( vk_physical, memory_requirements, aims, &memory_index)) { + if (is_neural_statistics_bind_point) { + mark_neural_statistics_unavailable( + "Neural accelerator statistics bind point was advertised, but no host-visible coherent memory type is available"); + continue; + } + ET_LOG( Error, "Failed to find data-graph session memory type for segment %d", @@ -3074,14 +3119,19 @@ bool VgfRepr::process_vgf( .memoryTypeIndex = memory_index, }; - VkDeviceMemory memory; + VkDeviceMemory memory = VK_NULL_HANDLE; result = vkAllocateMemory( vk_device, &memory_allocate_info, nullptr, &memory); if (result != VK_SUCCESS) { + if (is_neural_statistics_bind_point) { + mark_neural_statistics_unavailable( + "Failed to allocate neural accelerator statistics memory"); + continue; + } + ET_LOG(Error, "Failed to allocate memory for intermediates"); return false; } - intermediates.push_back(memory); VkBindDataGraphPipelineSessionMemoryInfoARM bind_info = { .sType = @@ -3093,12 +3143,42 @@ bool VgfRepr::process_vgf( .memory = memory, .memoryOffset = 0, }; + result = vkBindDataGraphPipelineSessionMemoryARM(vk_device, 1, &bind_info); if (result != VK_SUCCESS) { + vkFreeMemory(vk_device, memory, nullptr); + + if (is_neural_statistics_bind_point) { + mark_neural_statistics_unavailable( + "Failed to bind neural accelerator statistics memory"); + continue; + } + ET_LOG(Error, "Failed to bind intermediates memory"); return false; } + + intermediates.push_back(memory); + + if (is_neural_statistics_bind_point) { + // Only set this true after memory is successfully allocated and + // bound. + segment.neural_statistics_bind_point_available = true; + segment.neural_statistics_memory = memory; + segment.neural_statistics_memory_size = + memory_requirements.memoryRequirements.size; + segment.neural_statistics_memory_host_visible = true; + segment.neural_statistics_memory_host_coherent = true; + segment.neural_statistics_status.clear(); + + ET_LOG( + Info, + "Bound neural accelerator statistics memory for segment %d, size=%llu", + segment.segment_id, + static_cast( + segment.neural_statistics_memory_size)); + } } } else { VkPipelineShaderStageCreateInfo stage_info{ @@ -3576,6 +3656,37 @@ bool VgfRepr::execute_vgf(executorch::runtime::EventTracer* event_tracer) { return true; } +std::vector +VgfRepr::get_neural_statistics_segment_contexts() const { + std::vector contexts; + contexts.reserve(segments.size()); + + for (const auto& segment : segments) { + contexts.push_back(VgfNeuralStatisticsSegmentContext{ + .segment_id = segment.segment_id, + .is_data_graph_pipeline = segment.use_data_graph_pipeline, + .pipeline = segment.vk_pipeline, + .session = segment.vk_session, + .statistics_bind_point_available = + segment.neural_statistics_bind_point_available, + .statistics_memory = segment.neural_statistics_memory, + .statistics_memory_size = segment.neural_statistics_memory_size, + .statistics_memory_host_visible = + segment.neural_statistics_memory_host_visible, + .statistics_memory_host_coherent = + segment.neural_statistics_memory_host_coherent, + .statistics_bind_point_reason = segment.neural_statistics_status, + }); + } + + return contexts; +} + +std::string VgfRepr::collect_neural_statistics_metadata() const { + return collect_vgf_neural_statistics_metadata( + vk_device, get_neural_statistics_segment_contexts()); +} + void VgfRepr::free_vgf() { unmap_persistent_io_memory(); diff --git a/backends/arm/runtime/VGFSetup.h b/backends/arm/runtime/VGFSetup.h index 0d52965025e..7d3f39f2a99 100644 --- a/backends/arm/runtime/VGFSetup.h +++ b/backends/arm/runtime/VGFSetup.h @@ -21,6 +21,8 @@ using executorch::runtime::CompileSpec; // We use the platform and runtime environment provided by the Vulkan delegate #include +#include + namespace executorch { namespace backends { namespace vgf { @@ -67,6 +69,14 @@ typedef struct SegmentState { VkDataGraphPipelineSessionARM vk_session = VK_NULL_HANDLE; VkShaderModule vk_shader = VK_NULL_HANDLE; std::array dispatch_shape = {1, 1, 1}; + + // to work with data provide by arm neural statistics api + bool neural_statistics_bind_point_available = false; + VkDeviceMemory neural_statistics_memory = VK_NULL_HANDLE; + VkDeviceSize neural_statistics_memory_size = 0; + bool neural_statistics_memory_host_visible = false; + bool neural_statistics_memory_host_coherent = false; + std::string neural_statistics_status; } SegmentState; typedef struct ResourceAlloc { @@ -149,6 +159,12 @@ class VgfRepr { (void)io; } + // to work with arm neural statistics data + std::vector + get_neural_statistics_segment_contexts() const; + + std::string collect_neural_statistics_metadata() const; + ~VgfRepr() { free_vgf(); } diff --git a/backends/arm/test/vgf_neural_statistics_test.cpp b/backends/arm/test/vgf_neural_statistics_test.cpp new file mode 100644 index 00000000000..f4fded52592 --- /dev/null +++ b/backends/arm/test/vgf_neural_statistics_test.cpp @@ -0,0 +1,98 @@ +/* + * Copyright 2026 Arm Limited and/or its affiliates. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include + +#include + +namespace vgf = executorch::backends::vgf; + +TEST(VgfNeuralStatisticsTest, SerializesUnavailableWrapper) { + const std::string metadata = + vgf::make_vgf_neural_statistics_unavailable_metadata("api missing"); + + EXPECT_NE( + metadata.find("\"schema\":\"executorch.vgf.neural_statistics\""), + std::string::npos); + EXPECT_NE(metadata.find("\"schema_version\":1"), std::string::npos); + EXPECT_NE(metadata.find("\"api_available\":false"), std::string::npos); + EXPECT_NE(metadata.find("\"data_available\":false"), std::string::npos); + EXPECT_NE(metadata.find("\"available\":false"), std::string::npos); + EXPECT_NE(metadata.find("api missing"), std::string::npos); +} + +TEST(VgfNeuralStatisticsTest, SerializesMockedBlobs) { + vgf::VgfNeuralStatisticsCollection collection; + collection.api_available = true; + collection.data_available = true; + + vgf::VgfCollectedSegmentNeuralStatistics segment; + segment.segment_id = 7; + segment.is_data_graph_pipeline = true; + segment.statistics_bind_point_available = true; + segment.statistics_memory_host_visible = true; + segment.statistics_memory_host_coherent = true; + + segment.debug_database.available = true; + segment.debug_database.data = {0x01, 0x02, 0x03}; + + segment.statistics_info.available = true; + segment.statistics_info.is_text = true; + segment.statistics_info.data = {'i', 'n', 'f', 'o'}; + + segment.statistics_memory.available = true; + segment.statistics_memory.data = {0xDE, 0xAD}; + + collection.segments.push_back(segment); + + const std::string metadata = + vgf::serialize_vgf_neural_statistics_collection(collection); + + EXPECT_NE(metadata.find("\"schema_version\":1"), std::string::npos); + EXPECT_NE(metadata.find("\"api_available\":true"), std::string::npos); + EXPECT_NE(metadata.find("\"data_available\":true"), std::string::npos); + EXPECT_NE(metadata.find("\"segment_id\":7"), std::string::npos); + + // Base64("AQID") = {0x01,0x02,0x03}; Base64("3q0=") = {0xDE,0xAD}. + EXPECT_NE(metadata.find("\"data\":\"AQID\""), std::string::npos); + EXPECT_NE(metadata.find("\"data\":\"3q0=\""), std::string::npos); +} + +TEST(VgfNeuralStatisticsTest, TestCollectorMocksVulkanApi) { + vgf::set_vgf_neural_statistics_collector_for_test( + [](VkDevice, const std::vector&) + -> std::string { + return "{\"schema\":\"executorch.vgf.neural_statistics\"," + "\"schema_version\":1," + "\"api_available\":true," + "\"data_available\":true," + "\"available\":true," + "\"segments\":[]}"; + }); + + const std::string metadata = + vgf::collect_vgf_neural_statistics_metadata(VK_NULL_HANDLE, {}); + + EXPECT_NE(metadata.find("\"schema_version\":1"), std::string::npos); + EXPECT_NE(metadata.find("\"data_available\":true"), std::string::npos); + + vgf::reset_vgf_neural_statistics_collector_for_test(); +} + +TEST(VgfNeuralStatisticsTest, DefaultCollectorHandlesUnavailableApi) { + vgf::reset_vgf_neural_statistics_collector_for_test(); + + const std::string metadata = + vgf::collect_vgf_neural_statistics_metadata(VK_NULL_HANDLE, {}); + + EXPECT_NE(metadata.find("\"schema_version\":1"), std::string::npos); + EXPECT_NE(metadata.find("\"data_available\":false"), std::string::npos); + EXPECT_NE(metadata.find("\"available\":false"), std::string::npos); +} \ No newline at end of file diff --git a/devtools/inspector/__init__.py b/devtools/inspector/__init__.py index 375123a0a5b..a63a1b57f87 100644 --- a/devtools/inspector/__init__.py +++ b/devtools/inspector/__init__.py @@ -1,5 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. +# Copyright 2026 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -13,6 +14,10 @@ PerfData, ) from executorch.devtools.inspector._inspector_utils import compare_results, TimeScale +from executorch.devtools.inspector.vgf_neural_statistics import ( + parse_vgf_neural_statistics_delegate_metadata, + parse_vgf_neural_statistics_metadata, +) __all__ = [ "Event", @@ -21,4 +26,6 @@ "PerfData", "compare_results", "TimeScale", + "parse_vgf_neural_statistics_delegate_metadata", + "parse_vgf_neural_statistics_metadata", ] diff --git a/devtools/inspector/_inspector.py b/devtools/inspector/_inspector.py index ad914878347..fc94f0ad183 100644 --- a/devtools/inspector/_inspector.py +++ b/devtools/inspector/_inspector.py @@ -1,5 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. +# Copyright 2026 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -1686,3 +1687,26 @@ def get_stacktraces_for_row(aot_ops: List[str]) -> Dict[str, Optional[str]]: df["stacktraces"] = df["aot_ops"].apply(get_stacktraces_for_row) return df + + def get_vgf_neural_statistics(self) -> List[Dict[str, Any]]: + """ + Return parsed VGF neural accelerator statistics records embedded as + delegate metadata in ETDump. + + Each returned item is the JSON wrapper emitted by the VGF backend. Blob + fields under each segment contain both the original base64 string and a + decoded `raw_data: bytes` field. + """ + from executorch.devtools.inspector.vgf_neural_statistics import ( + parse_vgf_neural_statistics_delegate_metadata, + ) + + records: List[Dict[str, Any]] = [] + for event_block in self.event_blocks: + for event in event_block.events: + records.extend( + parse_vgf_neural_statistics_delegate_metadata( + event.raw_delegate_debug_metadatas + ) + ) + return records diff --git a/devtools/inspector/vgf_neural_statistics.py b/devtools/inspector/vgf_neural_statistics.py new file mode 100644 index 00000000000..4457abaa3db --- /dev/null +++ b/devtools/inspector/vgf_neural_statistics.py @@ -0,0 +1,124 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# Copyright 2026 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-unsafe + +import base64 +import binascii +import json +from typing import Any, Dict, List, Sequence, Union + +# Functions here are used to take the raw delegate_debug_metadata bytes stored in ETDump, +# check whether those bytes contain VGF neural statistics JSON, +# validate the schema/version, decode base64-encoded binary blobs, +# and return a normal Python dictionary that tooling can consume. + +SCHEMA = "executorch.vgf.neural_statistics" +SCHEMA_VERSION = 1 + +DelegateMetadataBytes = Union[bytes, bytearray, str] + + +def _to_bytes(metadata: DelegateMetadataBytes) -> bytes: + if isinstance(metadata, bytes): + return metadata + if isinstance(metadata, bytearray): + return bytes(metadata) + if isinstance(metadata, str): + return metadata.encode("utf-8") + raise TypeError(f"Unsupported delegate metadata type: {type(metadata)}") + + +def _decode_blob(blob: Dict[str, Any]) -> Dict[str, Any]: + decoded = dict(blob) + + if not decoded.get("available", False): + decoded.setdefault("raw_data", b"") + return decoded + + if decoded.get("encoding") != "base64": + raise ValueError( + f"Unsupported VGF neural statistics blob encoding: {decoded.get('encoding')}" + ) + + encoded_data = decoded.get("data", "") + try: + decoded["raw_data"] = base64.b64decode(encoded_data, validate=True) + except (binascii.Error, TypeError) as exc: + raise ValueError("Malformed base64 data in VGF neural statistics blob") from exc + + return decoded + + +def parse_vgf_neural_statistics_metadata( + metadata: DelegateMetadataBytes, +) -> Dict[str, Any]: + payload = json.loads(_to_bytes(metadata).decode("utf-8")) + + if payload.get("schema") != SCHEMA: + raise ValueError(f"Not VGF neural statistics metadata: {payload.get('schema')}") + + if payload.get("schema_version") != SCHEMA_VERSION: + raise ValueError( + "Unsupported VGF neural statistics metadata schema version: " + f"{payload.get('schema_version')}" + ) + + payload = dict(payload) + decoded_segments = [] + + for segment in payload.get("segments", []): + decoded_segment = dict(segment) + for key in ("debug_database", "statistics_info", "statistics_memory"): + blob = decoded_segment.get(key) + if isinstance(blob, dict): + decoded_segment[key] = _decode_blob(blob) + decoded_segments.append(decoded_segment) + + payload["segments"] = decoded_segments + return payload + + +def parse_vgf_neural_statistics_delegate_metadata( + delegate_metadata_list: Sequence[DelegateMetadataBytes], +) -> List[Dict[str, Any]]: + parsed: List[Dict[str, Any]] = [] + + for metadata in delegate_metadata_list: + if metadata is None or metadata == b"" or metadata == "": + continue + + try: + metadata_bytes = _to_bytes(metadata) + except TypeError: + # Not a valid delegate metadata representation. + # Treat it as unrelated metadata from another source. + continue + + try: + payload = json.loads(metadata_bytes.decode("utf-8")) + except (UnicodeDecodeError, json.JSONDecodeError) as exc: + # If the blob appears to be VGF neural statistics metadata but is + # malformed, surface the error instead of silently dropping it. + if SCHEMA.encode("utf-8") in metadata_bytes: + raise ValueError( + "Malformed VGF neural statistics delegate metadata" + ) from exc + + # Otherwise this is generic delegate metadata from another backend. + continue + + if not isinstance(payload, dict) or payload.get("schema") != SCHEMA: + # Inspector events can contain delegate metadata from other backends. + # Ignore only records that are clearly not VGF neural statistics. + continue + + # From this point onward the record claims to be VGF neural statistics. + # Do not swallow parse errors: malformed VGF records should be visible. + parsed.append(parse_vgf_neural_statistics_metadata(metadata_bytes)) + + return parsed diff --git a/docs/source/backends/arm-vgf/vgf-neural-statistics.md b/docs/source/backends/arm-vgf/vgf-neural-statistics.md new file mode 100644 index 00000000000..62bd7c07e4d --- /dev/null +++ b/docs/source/backends/arm-vgf/vgf-neural-statistics.md @@ -0,0 +1,111 @@ +# VGF neural accelerator statistics in ETDump + +The VGF backend emits neural accelerator profiling data through ETDump delegate +metadata when runtime profiling is enabled and a supported `VK_ARM_data_graph` +driver is available. + +By default, VGF runtime profiling emits timing events only. Neural accelerator +statistics collection is opt-in because the payload can include binary blobs and +may increase ETDump size and profiling overhead, especially across repeated +inference runs. + +To enable VGF neural accelerator statistics collection, set: + +```text +EXECUTORCH_VGF_ENABLE_NEURAL_STATISTICS=1 +``` + +When this option is not set, the backend does not emit the +VGF_NEURAL_STATISTICS delegate metadata event, even if general ETDump/runtime +profiling is enabled. + +The emitted delegate profiling event name is: + +```text +VGF_NEURAL_STATISTICS +``` + +The delegate metadata payload is a UTF-8 JSON wrapper. The wrapper schema is: + +```json +{ + "schema": "executorch.vgf.neural_statistics", + "schema_version": 1, + "backend": "VgfBackend", + "api": "VK_ARM_data_graph", + "event_name": "VGF_NEURAL_STATISTICS", + "api_available": true, + "data_available": true, + "available": true, + "reason": "", + "segments": [] +} +``` + +Each segment can contain: +```json +{ + "segment_id": 0, + "is_data_graph_pipeline": true, + "statistics_bind_point_available": true, + "statistics_memory_host_visible": true, + "statistics_memory_host_coherent": true, + "statistics_bind_point_reason": "", + "debug_database": { + "available": true, + "is_text": false, + "vulkan_result": 0, + "size": 0, + "encoding": "base64", + "reason": "", + "data": "" + }, + "statistics_info": { + "available": true, + "is_text": true, + "vulkan_result": 0, + "size": 0, + "encoding": "base64", + "reason": "", + "data": "" + }, + "statistics_memory": { + "available": true, + "is_text": false, + "vulkan_result": 0, + "size": 0, + "encoding": "base64", + "reason": "", + "data": "" + } +} +``` + +We don't parse the neural accelerator blobs. Consumers should treat +debug_database, statistics_info, and statistics_memory as opaque bytes. + +## Reading from Inspector + +```py +from executorch.devtools.inspector import Inspector + +inspector = Inspector(etdump_path="run.etdump") +records = inspector.get_vgf_neural_statistics() + +for record in records: + print(record["schema_version"], record["data_available"]) + for segment in record["segments"]: + stats_bytes = segment["statistics_memory"]["raw_data"] + debug_db_bytes = segment["debug_database"]["raw_data"] +``` + +If the Vulkan API, driver, or hardware support is unavailable, normal execution +continues and the JSON wrapper is still emitted with: +```json +{ + "api_available": false, + "data_available": false, + "available": false, + "reason": "..." +} +``` \ No newline at end of file