// Copyright (c) "2022" Advanced Micro Devices, Inc. All rights reserved.

#include "fake_gpu_encoding.h"

#include <util/timestamp_record.h>
#include <vk_helper/vk_common.h>

FakeGpuEncoding::FakeGpuEncoding()
    : IEncodeDevice(), m_instance{}, pfnGetMemoryFdKHR(nullptr),
      m_selected_gpu_index(UINT32_MAX), m_gpu{}, m_gpu_props{}, m_gpu_memory_props{},
      m_queue_family_count(0), m_queue_props(nullptr), m_graphics_queue{}, m_device{},
      m_exported_image_memory_requirements{}, m_exported_image_memory_type_index{}, m_cmd_pool{},
      m_show_exported_image_rscs(nullptr), m_pipeline_cache{}, m_pipeline_layout{}, m_desc_layout{},
      m_render_pass{}, m_pipeline{}, m_desc_pool{}, m_desc_set{} {}

void FakeGpuEncoding::device_init(const Option &in_option) {
    m_option = in_option;

    // option.gpu_number specifies the GPU used by rendering side,
    // for fake_gpu_encoding, it will select a GPU different from the rendering
    // to achieve P2P transfer.
    // However, for debugging purpose, `debug_fake_encoding_same_gpu` is used
    // to force fake_gpu_encoding to select the same GPU as the rendering side.
    int blacklist_gpu_no = -1;
    int force_gpu_no = -1;
    if (m_option.debug_fake_encoding_same_gpu) {
        blacklist_gpu_no = -1;
        force_gpu_no = in_option.gpu_number;
    } else {
        blacklist_gpu_no = in_option.gpu_number;
        force_gpu_no = -1;
    }

    vk::QueueFamilyProperties *queue_props_ptr = nullptr;
    init_vulkan_instance_and_phys_device(in_option.validate,
                                         /*in_need_display=*/true,
                                         /*in_force_select_gpu_number*/ force_gpu_no,
                                         /*in_force_exclude_gpu_number*/ blacklist_gpu_no,
                                         &m_selected_gpu_index,
                                         &m_instance,
                                         &m_enabled_device_extension_count,
                                         m_device_extension_names,
                                         &m_enabled_layer_count,
                                         m_layer_names,
                                         &m_gpu,
                                         &m_gpu_props,
                                         &m_queue_family_count,
                                         &queue_props_ptr);
    m_queue_props.reset(queue_props_ptr);

    init_vk_device();
    init_ext_api();
    init_device_memory_properties();

    swapchain_rsc->prepare_images(m_device, m_gpu, in_option.presentMode);
    m_show_exported_image_rscs.reset(
        new ShowExportedImageRscs[swapchain_rsc->get_swapchain_image_count()]);

    init_shared_images();
    init_descriptor_layout();
    init_render_pass();
    init_pipeline();
    init_descriptor_set();
    init_framebuffer();
    init_sync_primitives();
    build_cmds();
}

void FakeGpuEncoding::device_cleanup() {
    m_device.waitIdle();

    // Wait for fences from present operations
    for (uint32_t i = 0; i < get_max_number_of_inflight_commands(); i++) {
        m_device.waitForFences(1, &m_fences[i], VK_TRUE, UINT64_MAX);
        m_device.destroyFence(m_fences[i], nullptr);
        m_device.waitForFences(1, &m_swp_chain_acquire_image_fences[i], VK_TRUE, UINT64_MAX);
        m_device.destroyFence(m_swp_chain_acquire_image_fences[i], nullptr);
        m_device.destroySemaphore(m_swapchain_image_acquired_semaphores[i], nullptr);
        m_device.destroySemaphore(m_show_exported_image_complete_semaphores[i], nullptr);
        m_device.destroySemaphore(m_acquire_exported_image_semaphores[i], nullptr);
    }

    m_device.destroySampler(m_texture_sampler);
    for (uint32_t i = 0; i < swapchain_rsc->get_swapchain_image_count(); i++) {
        m_device.destroyImageView(m_show_exported_image_rscs[i].exported_image_view, nullptr);
        m_device.destroyImage(m_show_exported_image_rscs[i].exported_image, nullptr);
        m_device.freeMemory(m_show_exported_image_rscs[i].exported_image_memory, nullptr);
        m_device.destroyFramebuffer(m_show_exported_image_rscs[i].frame_buffer);
        m_device.freeCommandBuffers(m_cmd_pool, 1, &m_show_exported_image_rscs[i].cmd);
    }

    pfnDestroySamplerYcbcrConversionKHR(m_device, m_ycbcr_conversion, nullptr);
    m_device.destroyPipeline(m_pipeline);
    m_device.destroyPipelineCache(m_pipeline_cache);
    m_device.destroyRenderPass(m_render_pass);
    m_device.destroyPipelineLayout(m_pipeline_layout);
    m_device.destroyDescriptorSetLayout(m_desc_layout);
    m_device.destroyDescriptorPool(m_desc_pool);
    m_device.destroyCommandPool(m_cmd_pool);

    swapchain_rsc->cleanup(m_instance, m_device);

    m_device.destroy(nullptr);
    m_instance.destroy(nullptr);
}

void FakeGpuEncoding::init_vk_device() {
    swapchain_rsc->init_surface_and_select_queue(
        m_instance, m_gpu, m_queue_family_count, m_queue_props, &m_graphics_queue_family_index);
    if (m_graphics_queue_family_index == UINT32_MAX) {
        ERR_EXIT("Could not find both graphics queues\n", "Swapchain Initialization Failure");
    }

    {
        float const priorities[1] = {0.0};

        vk::DeviceQueueCreateInfo queues[1];
        queues[0].setQueueFamilyIndex(m_graphics_queue_family_index);
        queues[0].setQueueCount(1);
        queues[0].setPQueuePriorities(priorities);

        const auto physicalDeviceVk11Features =
            vk::PhysicalDeviceVulkan11Features().setSamplerYcbcrConversion(true);

        auto deviceInfo =
            vk::DeviceCreateInfo()
                .setQueueCreateInfoCount(1)
                .setPQueueCreateInfos(queues)
                .setEnabledLayerCount(m_enabled_layer_count)
                .setPpEnabledLayerNames((const char *const *)m_layer_names)
                .setEnabledExtensionCount(m_enabled_device_extension_count)
                .setPpEnabledExtensionNames((const char *const *)m_device_extension_names)
                .setPEnabledFeatures(nullptr)
                .setPNext(&physicalDeviceVk11Features);

        auto result = m_gpu.createDevice(&deviceInfo, nullptr, &m_device);
        VERIFY(result == vk::Result::eSuccess);
    }

    m_device.getQueue(m_graphics_queue_family_index, 0, &m_graphics_queue);
    swapchain_rsc->set_present_queue(m_device);
}

void FakeGpuEncoding::init_ext_api() {
    pfnGetMemoryFdKHR = reinterpret_cast<decltype(pfnGetMemoryFdKHR)>(
        vkGetInstanceProcAddr(m_instance, "vkGetMemoryFdKHR"));
    VERIFY(pfnGetMemoryFdKHR != nullptr);
    pfnGetSemaphoreFdKHR = reinterpret_cast<decltype(pfnGetSemaphoreFdKHR)>(
        vkGetInstanceProcAddr(m_instance, "vkGetSemaphoreFdKHR"));
    VERIFY(pfnGetSemaphoreFdKHR != nullptr);
    pfnImportSemaphoreFdKHR = reinterpret_cast<decltype(pfnImportSemaphoreFdKHR)>(
        vkGetInstanceProcAddr(m_instance, "vkImportSemaphoreFdKHR"));
    VERIFY(pfnImportSemaphoreFdKHR != nullptr);
    pfnCreateSamplerYcbcrConversionKHR =
        reinterpret_cast<decltype(pfnCreateSamplerYcbcrConversionKHR)>(
            vkGetInstanceProcAddr(m_instance, "vkCreateSamplerYcbcrConversionKHR"));
    VERIFY(pfnCreateSamplerYcbcrConversionKHR != nullptr);
    pfnDestroySamplerYcbcrConversionKHR =
        reinterpret_cast<decltype(pfnDestroySamplerYcbcrConversionKHR)>(
            vkGetInstanceProcAddr(m_instance, "vkDestroySamplerYcbcrConversionKHR"));
    VERIFY(pfnDestroySamplerYcbcrConversionKHR != nullptr);
}

void FakeGpuEncoding::init_device_memory_properties() {
    m_gpu.getMemoryProperties(&m_gpu_memory_props);
}

void FakeGpuEncoding::init_shared_images() {
    vk::ImageCreateFlags imageCreateFlags = {};
    vk::ImageUsageFlags imageUsages = {};
    vk::Format imageFormat = {};
    if (m_option.encodingMode == EncodingMode::FakeGpuEncoding_RGBA) {
        imageUsages = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst;
        imageFormat = vk::Format::eR8G8B8A8Unorm;
    } else if (m_option.encodingMode == EncodingMode::FakeGpuEncoding_NV12) {
        imageUsages = vk::ImageUsageFlagBits::eSampled;
        imageFormat = vk::Format::eG8B8R82Plane420Unorm;
    } else if (m_option.encodingMode == EncodingMode::FakeGpuEncoding_YUV420p) {
        imageUsages = vk::ImageUsageFlagBits::eSampled;
        imageFormat = vk::Format::eG8B8R83Plane420Unorm;
    } else {
        assert(false && "Never called!");
    }

    {
        // Not going to resize the shared images with surface.
        const uint32_t width = m_option.width;
        const uint32_t height = m_option.height;

        const auto externalMemoryImageCreateInfo =
            vk::ExternalMemoryImageCreateInfo().setHandleTypes(
                vk::ExternalMemoryHandleTypeFlagBits::eDmaBufEXT);

        const auto imageCreateInfo = vk::ImageCreateInfo()
                                         .setImageType(vk::ImageType::e2D)
                                         .setFormat(imageFormat)
                                         .setExtent({width, height, 1})
                                         .setArrayLayers(1)
                                         .setMipLevels(1)
                                         .setFlags({})
                                         .setSamples(vk::SampleCountFlagBits::e1)
                                         .setTiling(vk::ImageTiling::eLinear)
                                         .setUsage(imageUsages)
                                         .setSharingMode(vk::SharingMode::eExclusive)
                                         .setQueueFamilyIndexCount(0)
                                         .setPQueueFamilyIndices(nullptr)
                                         .setInitialLayout(vk::ImageLayout::eUndefined)
                                         .setPNext(&externalMemoryImageCreateInfo);

        for (uint32_t i = 0; i < swapchain_rsc->get_swapchain_image_count(); i++) {
            const auto result = m_device.createImage(
                &imageCreateInfo, nullptr, &m_show_exported_image_rscs[i].exported_image);
            VERIFY(result == vk::Result::eSuccess);
        }
    }

    {
        m_device.getImageMemoryRequirements(m_show_exported_image_rscs[0].exported_image,
                                            &m_exported_image_memory_requirements);

        const bool success =
            memory_type_from_properties(m_gpu_memory_props,
                                        m_exported_image_memory_requirements.memoryTypeBits,
                                        vk::MemoryPropertyFlagBits::eDeviceLocal,
                                        &m_exported_image_memory_type_index);
        VERIFY(success == true);
    }

    {
        const auto externalMemAllocInfo = vk::ExportMemoryAllocateInfo().setHandleTypes(
            vk::ExternalMemoryHandleTypeFlagBits::eDmaBufEXT);

        const auto memoryAllocInfo =
            vk::MemoryAllocateInfo()
                .setAllocationSize(m_exported_image_memory_requirements.size)
                .setMemoryTypeIndex(m_exported_image_memory_type_index)
                .setPNext(&externalMemAllocInfo);

        for (uint32_t i = 0; i < swapchain_rsc->get_swapchain_image_count(); i++) {
            vk::Result result = m_device.allocateMemory(
                &memoryAllocInfo, nullptr, &m_show_exported_image_rscs[i].exported_image_memory);
            VERIFY(result == vk::Result::eSuccess);

            m_device.bindImageMemory(m_show_exported_image_rscs[i].exported_image,
                                     m_show_exported_image_rscs[i].exported_image_memory,
                                     0);
        }
    }

    auto samplerYCbCrConversionInfo = vk::SamplerYcbcrConversionInfo();

    if (m_option.encodingMode == EncodingMode::FakeGpuEncoding_NV12 ||
        m_option.encodingMode == EncodingMode::FakeGpuEncoding_YUV420p) {
        const auto samplerYCbCrConversionCreateInfo = VkSamplerYcbcrConversionCreateInfo{
            .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO_KHR,
            .pNext = nullptr,
            .format = static_cast<VkFormat>(imageFormat),
            .ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709,
            .ycbcrRange = VK_SAMPLER_YCBCR_RANGE_ITU_NARROW,
            .components =
                {
                    VK_COMPONENT_SWIZZLE_IDENTITY,
                    VK_COMPONENT_SWIZZLE_IDENTITY,
                    VK_COMPONENT_SWIZZLE_IDENTITY,
                    VK_COMPONENT_SWIZZLE_IDENTITY,
                },
            .xChromaOffset = VK_CHROMA_LOCATION_MIDPOINT,
            .yChromaOffset = VK_CHROMA_LOCATION_MIDPOINT,
            .chromaFilter = VK_FILTER_LINEAR,
            .forceExplicitReconstruction = VK_FALSE,
        };

        const auto result = pfnCreateSamplerYcbcrConversionKHR(
            m_device, &samplerYCbCrConversionCreateInfo, nullptr, &m_ycbcr_conversion);
        VERIFY(result == VK_SUCCESS);

        samplerYCbCrConversionInfo.setConversion(m_ycbcr_conversion);
    }

    {
        for (uint32_t i = 0; i < swapchain_rsc->get_swapchain_image_count(); i++) {
            auto viewInfo = vk::ImageViewCreateInfo()
                                .setImage(m_show_exported_image_rscs[i].exported_image)
                                .setViewType(vk::ImageViewType::e2D)
                                .setFormat(imageFormat)
                                .setSubresourceRange(vk::ImageSubresourceRange(
                                    vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1));

            if (m_option.encodingMode == EncodingMode::FakeGpuEncoding_NV12 ||
                m_option.encodingMode == EncodingMode::FakeGpuEncoding_YUV420p) {
                viewInfo.setPNext(&samplerYCbCrConversionInfo);
            }

            const auto result = m_device.createImageView(
                &viewInfo, nullptr, &m_show_exported_image_rscs[i].exported_image_view);
            VERIFY(result == vk::Result::eSuccess);
        }
    }

    {
        auto samplerCreationInfo = vk::SamplerCreateInfo()
                                       .setMagFilter(vk::Filter::eLinear)
                                       .setMinFilter(vk::Filter::eLinear)
                                       .setAddressModeU(vk::SamplerAddressMode::eClampToEdge)
                                       .setAddressModeV(vk::SamplerAddressMode::eClampToEdge)
                                       .setAddressModeW(vk::SamplerAddressMode::eClampToEdge)
                                       .setAnisotropyEnable(VK_FALSE)
                                       .setMaxAnisotropy(1)
                                       .setCompareEnable(VK_TRUE)
                                       .setCompareOp(vk::CompareOp::eNever)
                                       .setMinLod(0.0f)
                                       .setMaxLod(0.0f)
                                       .setBorderColor(vk::BorderColor::eFloatOpaqueWhite)
                                       .setUnnormalizedCoordinates(VK_FALSE);

        if (m_option.encodingMode == EncodingMode::FakeGpuEncoding_NV12 ||
            m_option.encodingMode == EncodingMode::FakeGpuEncoding_YUV420p) {
            samplerCreationInfo.setPNext(&samplerYCbCrConversionInfo);
        }

        const auto result =
            m_device.createSampler(&samplerCreationInfo, nullptr, &m_texture_sampler);
        VERIFY(result == vk::Result::eSuccess);
    }
}

void FakeGpuEncoding::init_descriptor_layout() {
    const auto layout_bindings = vk::DescriptorSetLayoutBinding()
                                     .setBinding(0)
                                     .setDescriptorType(vk::DescriptorType::eCombinedImageSampler)
                                     .setDescriptorCount(1)
                                     .setStageFlags(vk::ShaderStageFlagBits::eFragment)
                                     .setPImmutableSamplers(&m_texture_sampler);

    auto const descriptor_layout =
        vk::DescriptorSetLayoutCreateInfo().setBindingCount(1).setPBindings(&layout_bindings);

    auto result = m_device.createDescriptorSetLayout(&descriptor_layout, nullptr, &m_desc_layout);
    VERIFY(result == vk::Result::eSuccess);

    auto const pPipelineLayoutCreateInfo =
        vk::PipelineLayoutCreateInfo().setSetLayoutCount(1).setPSetLayouts(&m_desc_layout);

    result = m_device.createPipelineLayout(&pPipelineLayoutCreateInfo, nullptr, &m_pipeline_layout);
    VERIFY(result == vk::Result::eSuccess);
}

void FakeGpuEncoding::init_render_pass() {
    // The initial layout for the color and depth attachments will be LAYOUT_UNDEFINED
    // because at the start of the renderpass, we don't care about their contents.
    // At the start of the subpass, the color attachment's layout will be transitioned
    // to LAYOUT_COLOR_ATTACHMENT_OPTIMAL and the depth stencil attachment's layout
    // will be transitioned to LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL.  At the end of
    // the renderpass, the color attachment's layout will be transitioned to
    // LAYOUT_PRESENT_SRC_KHR to be ready to present.  This is all done as part of
    // the renderpass, no barriers are necessary.
    const vk::AttachmentDescription attachments =
        vk::AttachmentDescription()
            .setFormat(swapchain_rsc->get_image_format())
            .setSamples(vk::SampleCountFlagBits::e1)
            .setLoadOp(vk::AttachmentLoadOp::eClear)
            .setStoreOp(vk::AttachmentStoreOp::eStore)
            .setStencilLoadOp(vk::AttachmentLoadOp::eDontCare)
            .setStencilStoreOp(vk::AttachmentStoreOp::eDontCare)
            .setInitialLayout(vk::ImageLayout::eUndefined)
            .setFinalLayout(vk::ImageLayout::ePresentSrcKHR);

    auto const color_reference = vk::AttachmentReference().setAttachment(0).setLayout(
        vk::ImageLayout::eColorAttachmentOptimal);

    auto const depth_reference = vk::AttachmentReference()
                                     .setAttachment(VK_ATTACHMENT_UNUSED)
                                     .setLayout(vk::ImageLayout::eUndefined);

    auto const subpass = vk::SubpassDescription()
                             .setPipelineBindPoint(vk::PipelineBindPoint::eGraphics)
                             .setInputAttachmentCount(0)
                             .setPInputAttachments(nullptr)
                             .setColorAttachmentCount(1)
                             .setPColorAttachments(&color_reference)
                             .setPResolveAttachments(nullptr)
                             .setPDepthStencilAttachment(&depth_reference)
                             .setPreserveAttachmentCount(0)
                             .setPPreserveAttachments(nullptr);

    vk::SubpassDependency const dependencies[1] = {
        vk::SubpassDependency() // Image layout transition
            .setSrcSubpass(VK_SUBPASS_EXTERNAL)
            .setDstSubpass(0)
            .setSrcStageMask(vk::PipelineStageFlagBits::eColorAttachmentOutput)
            .setDstStageMask(vk::PipelineStageFlagBits::eColorAttachmentOutput)
            .setSrcAccessMask(vk::AccessFlagBits())
            .setDstAccessMask(vk::AccessFlagBits::eColorAttachmentWrite |
                              vk::AccessFlagBits::eColorAttachmentRead)
            .setDependencyFlags(vk::DependencyFlags()),
    };

    auto const rp_info = vk::RenderPassCreateInfo()
                             .setAttachmentCount(1)
                             .setPAttachments(&attachments)
                             .setSubpassCount(1)
                             .setPSubpasses(&subpass)
                             .setDependencyCount(1)
                             .setPDependencies(dependencies);

    auto result = m_device.createRenderPass(&rp_info, nullptr, &m_render_pass);
    VERIFY(result == vk::Result::eSuccess);
}

void FakeGpuEncoding::init_framebuffer() {
    for (uint32_t i = 0; i < swapchain_rsc->get_swapchain_image_count(); i++) {
        vk::ImageView attachment = swapchain_rsc->get_image_view(i);
        auto const fb_info = vk::FramebufferCreateInfo()
                                 .setRenderPass(m_render_pass)
                                 .setAttachmentCount(1)
                                 .setPAttachments(&attachment)
                                 .setWidth((uint32_t)width())
                                 .setHeight((uint32_t)height())
                                 .setLayers(1);
        auto const result = m_device.createFramebuffer(
            &fb_info, nullptr, &m_show_exported_image_rscs[i].frame_buffer);
        VERIFY(result == vk::Result::eSuccess);
    }
}

vk::ShaderModule FakeGpuEncoding::prepare_vs() {
    const uint32_t vertShaderCode[] = {
#include "show_texture.vert.inc"
    };

    vk::ShaderModule module;
    const auto moduleCreateInfo =
        vk::ShaderModuleCreateInfo().setCodeSize(sizeof(vertShaderCode)).setPCode(vertShaderCode);
    auto result = m_device.createShaderModule(&moduleCreateInfo, nullptr, &module);
    VERIFY(result == vk::Result::eSuccess);

    return module;
}

vk::ShaderModule FakeGpuEncoding::prepare_fs() {
    const uint32_t fragShaderCode[] = {
#include "show_texture.frag.inc"
    };

    vk::ShaderModule module;
    const auto moduleCreateInfo =
        vk::ShaderModuleCreateInfo().setCodeSize(sizeof(fragShaderCode)).setPCode(fragShaderCode);
    auto result = m_device.createShaderModule(&moduleCreateInfo, nullptr, &module);
    VERIFY(result == vk::Result::eSuccess);

    return module;
}

void FakeGpuEncoding::init_pipeline() {
    vk::PipelineCacheCreateInfo const pipelineCacheInfo;
    auto result = m_device.createPipelineCache(&pipelineCacheInfo, nullptr, &m_pipeline_cache);
    VERIFY(result == vk::Result::eSuccess);

    vk::ShaderModule vert_shader_module = prepare_vs();
    vk::ShaderModule frag_shader_module = prepare_fs();
    vk::PipelineShaderStageCreateInfo const shaderStageInfo[2] = {
        vk::PipelineShaderStageCreateInfo()
            .setStage(vk::ShaderStageFlagBits::eVertex)
            .setModule(vert_shader_module)
            .setPName("main"),
        vk::PipelineShaderStageCreateInfo()
            .setStage(vk::ShaderStageFlagBits::eFragment)
            .setModule(frag_shader_module)
            .setPName("main")};

    vk::PipelineVertexInputStateCreateInfo const vertexInputInfo;

    auto const inputAssemblyInfo = vk::PipelineInputAssemblyStateCreateInfo().setTopology(
        vk::PrimitiveTopology::eTriangleList);

    // TODO: Where are pViewports and pScissors set?
    auto const viewportInfo =
        vk::PipelineViewportStateCreateInfo().setViewportCount(1).setScissorCount(1);

    auto const rasterizationInfo = vk::PipelineRasterizationStateCreateInfo()
                                       .setDepthClampEnable(VK_FALSE)
                                       .setRasterizerDiscardEnable(VK_FALSE)
                                       .setPolygonMode(vk::PolygonMode::eFill)
                                       .setCullMode(vk::CullModeFlagBits::eBack)
                                       .setFrontFace(vk::FrontFace::eClockwise)
                                       .setDepthBiasEnable(VK_FALSE)
                                       .setLineWidth(1.0f);

    auto const multisampleInfo = vk::PipelineMultisampleStateCreateInfo();

    auto const stencilOp = vk::StencilOpState()
                               .setFailOp(vk::StencilOp::eKeep)
                               .setPassOp(vk::StencilOp::eKeep)
                               .setCompareOp(vk::CompareOp::eAlways);

    auto const depthStencilInfo = vk::PipelineDepthStencilStateCreateInfo()
                                      .setDepthTestEnable(VK_FALSE)
                                      .setDepthWriteEnable(VK_FALSE)
                                      .setDepthBoundsTestEnable(VK_FALSE)
                                      .setStencilTestEnable(VK_FALSE)
                                      .setFront(stencilOp)
                                      .setBack(stencilOp);

    vk::PipelineColorBlendAttachmentState const colorBlendAttachments[1] = {
        vk::PipelineColorBlendAttachmentState().setColorWriteMask(
            vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
            vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA)};

    auto const colorBlendInfo =
        vk::PipelineColorBlendStateCreateInfo().setAttachmentCount(1).setPAttachments(
            colorBlendAttachments);

    vk::DynamicState const dynamicStates[2] = {vk::DynamicState::eViewport,
                                               vk::DynamicState::eScissor};

    auto const dynamicStateInfo = vk::PipelineDynamicStateCreateInfo()
                                      .setPDynamicStates(dynamicStates)
                                      .setDynamicStateCount(2);

    auto const pipeline = vk::GraphicsPipelineCreateInfo()
                              .setStageCount(2)
                              .setPStages(shaderStageInfo)
                              .setPVertexInputState(&vertexInputInfo)
                              .setPInputAssemblyState(&inputAssemblyInfo)
                              .setPViewportState(&viewportInfo)
                              .setPRasterizationState(&rasterizationInfo)
                              .setPMultisampleState(&multisampleInfo)
                              .setPDepthStencilState(&depthStencilInfo)
                              .setPColorBlendState(&colorBlendInfo)
                              .setPDynamicState(&dynamicStateInfo)
                              .setLayout(m_pipeline_layout)
                              .setRenderPass(m_render_pass);

    result = m_device.createGraphicsPipelines(m_pipeline_cache, 1, &pipeline, nullptr, &m_pipeline);
    VERIFY(result == vk::Result::eSuccess);

    m_device.destroyShaderModule(frag_shader_module, nullptr);
    m_device.destroyShaderModule(vert_shader_module, nullptr);
}

void FakeGpuEncoding::init_descriptor_set() {
    {
        const uint32_t descriptorSetCount = swapchain_rsc->get_swapchain_image_count();
        uint32_t textureCount = 1;
        // TODO: Why multi-planer formats takes more than one descriptors??
        if (m_option.encodingMode == EncodingMode::FakeGpuEncoding_NV12) {
            textureCount = 2;
        } else if (m_option.encodingMode == EncodingMode::FakeGpuEncoding_YUV420p) {
            textureCount = 3;
        }

        vk::DescriptorPoolSize const poolSizes[1] = {
            vk::DescriptorPoolSize()
                .setType(vk::DescriptorType::eCombinedImageSampler)
                .setDescriptorCount(descriptorSetCount * textureCount)};

        auto const descriptorPoolCreationInfo = vk::DescriptorPoolCreateInfo()
                                                    .setMaxSets(descriptorSetCount)
                                                    .setPoolSizeCount(1)
                                                    .setPPoolSizes(poolSizes);

        auto result =
            m_device.createDescriptorPool(&descriptorPoolCreationInfo, nullptr, &m_desc_pool);
        VERIFY(result == vk::Result::eSuccess);
    }

    {
        auto const descSetAllocInfo = vk::DescriptorSetAllocateInfo()
                                          .setDescriptorPool(m_desc_pool)
                                          .setDescriptorSetCount(1)
                                          .setPSetLayouts(&m_desc_layout);

        vk::WriteDescriptorSet writes[1];

        /* If the descriptor refers to a sampler that performs Y′CBCR conversion or samples a
           subsampled image, the sampler must only be used to sample the image in the same
           descriptor. Otherwise, the sampler and image in this type of descriptor can be used
           freely with any other samplers and images.
        */
        writes[0].setDstBinding(0);
        writes[0].setDescriptorCount(1);
        writes[0].setDescriptorType(vk::DescriptorType::eCombinedImageSampler);

        for (unsigned int i = 0; i < swapchain_rsc->get_swapchain_image_count(); i++) {
            auto result = m_device.allocateDescriptorSets(
                &descSetAllocInfo, &m_show_exported_image_rscs[i].descriptor_set);
            VERIFY(result == vk::Result::eSuccess);

            const auto descImageInfo =
                vk::DescriptorImageInfo()
                    .setSampler(m_texture_sampler)
                    .setImageView(m_show_exported_image_rscs[i].exported_image_view)
                    .setImageLayout(vk::ImageLayout::eShaderReadOnlyOptimal);
            writes[0].setPImageInfo(&descImageInfo);
            writes[0].setDstSet(m_show_exported_image_rscs[i].descriptor_set);

            m_device.updateDescriptorSets(1, writes, 0, nullptr);
        }
    }
}

void FakeGpuEncoding::init_sync_primitives() {
    // Create semaphores to synchronize acquiring presentable buffers before
    // rendering and waiting for drawing to be complete before presenting
    auto semaphoreCreateInfo = vk::SemaphoreCreateInfo();

    // Create fences that we can use to throttle if we get too far
    // ahead of the image presents
    auto const fenceCreateInfo = vk::FenceCreateInfo().setFlags(vk::FenceCreateFlagBits::eSignaled);

    m_swapchain_image_acquired_semaphores.reset(
        new vk::Semaphore[get_max_number_of_inflight_commands()]);

    m_fences.reset(new vk::Fence[get_max_number_of_inflight_commands()]);
    m_swp_chain_acquire_image_fences.reset(new vk::Fence[get_max_number_of_inflight_commands()]);

    m_show_exported_image_complete_semaphores.reset(
        new vk::Semaphore[get_max_number_of_inflight_commands()]);
    m_acquire_exported_image_semaphores.reset(
        new vk::Semaphore[get_max_number_of_inflight_commands()]);

    vk::Result result = vk::Result::eSuccess;
    for (uint32_t i = 0; i < get_max_number_of_inflight_commands(); i++) {
        result =
            m_device.createFence(&fenceCreateInfo, nullptr, &m_swp_chain_acquire_image_fences[i]);
        VERIFY(result == vk::Result::eSuccess);

        result = m_device.createFence(&fenceCreateInfo, nullptr, &m_fences[i]);
        VERIFY(result == vk::Result::eSuccess);

        result = m_device.createSemaphore(
            &semaphoreCreateInfo, nullptr, &m_swapchain_image_acquired_semaphores[i]);
        VERIFY(result == vk::Result::eSuccess);

        result = m_device.createSemaphore(
            &semaphoreCreateInfo, nullptr, &m_acquire_exported_image_semaphores[i]);
        VERIFY(result == vk::Result::eSuccess);
    }

    auto const exportSemaphoreInfo = vk::ExportSemaphoreCreateInfo().setHandleTypes(
        vk::ExternalSemaphoreHandleTypeFlagBits::eSyncFd);
    semaphoreCreateInfo.setPNext(&exportSemaphoreInfo);
    for (uint32_t i = 0; i < get_max_number_of_inflight_commands(); i++) {
        result = m_device.createSemaphore(
            &semaphoreCreateInfo, nullptr, &m_show_exported_image_complete_semaphores[i]);
        VERIFY(result == vk::Result::eSuccess);
    }
}

void FakeGpuEncoding::build_cmds() {
    {
        auto const cmdPoolCreationInfo =
            vk::CommandPoolCreateInfo().setQueueFamilyIndex(m_graphics_queue_family_index);
        const auto result = m_device.createCommandPool(&cmdPoolCreationInfo, nullptr, &m_cmd_pool);
        VERIFY(result == vk::Result::eSuccess);
    }

    auto const cmdAllocInfo = vk::CommandBufferAllocateInfo()
                                  .setCommandPool(m_cmd_pool)
                                  .setLevel(vk::CommandBufferLevel::ePrimary)
                                  .setCommandBufferCount(1);

    for (uint32_t i = 0; i < swapchain_rsc->get_swapchain_image_count(); ++i) {
        const auto result =
            m_device.allocateCommandBuffers(&cmdAllocInfo, &m_show_exported_image_rscs[i].cmd);
        VERIFY(result == vk::Result::eSuccess);
    }

    auto const cmdBufBeginInfo =
        vk::CommandBufferBeginInfo().setFlags(vk::CommandBufferUsageFlagBits::eSimultaneousUse);
    for (uint32_t i = 0; i < swapchain_rsc->get_swapchain_image_count(); ++i) {
        const auto clearValues = vk::ClearValue(std::array<float, 4>({{0.0f, 0.0f, 0.0f, 0.0f}}));

        auto const passInfo =
            vk::RenderPassBeginInfo()
                .setRenderPass(m_render_pass)
                .setFramebuffer(m_show_exported_image_rscs[i].frame_buffer)
                .setRenderArea(vk::Rect2D(vk::Offset2D(0, 0),
                                          vk::Extent2D((uint32_t)width(), (uint32_t)height())))
                .setClearValueCount(1)
                .setPClearValues(&clearValues);

        const auto &commandBuffer = m_show_exported_image_rscs[i].cmd;
        auto result = commandBuffer.begin(&cmdBufBeginInfo);
        VERIFY(result == vk::Result::eSuccess);

        const auto acquireImageMemoryBarrier =
            vk::ImageMemoryBarrier()
                .setSrcAccessMask(m_option.encodingMode == EncodingMode::FakeGpuEncoding_RGBA
                                      ? vk::AccessFlagBits::eTransferWrite
                                      : vk::AccessFlagBits::eShaderWrite)
                .setDstAccessMask(vk::AccessFlagBits::eShaderRead)
                .setOldLayout(m_option.encodingMode == EncodingMode::FakeGpuEncoding_NV12 ||
                                      m_option.encodingMode == EncodingMode::FakeGpuEncoding_YUV420p
                                  ? vk::ImageLayout::eGeneral
                                  : vk::ImageLayout::eTransferDstOptimal)
                .setNewLayout(vk::ImageLayout::eShaderReadOnlyOptimal)
                .setSrcQueueFamilyIndex(VK_QUEUE_FAMILY_FOREIGN_EXT)
                .setDstQueueFamilyIndex(m_graphics_queue_family_index)
                .setImage(m_show_exported_image_rscs[i].exported_image)
                .setSubresourceRange(
                    vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1));

        commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eBottomOfPipe,
                                      vk::PipelineStageFlagBits::eFragmentShader,
                                      vk::DependencyFlagBits(),
                                      0,
                                      nullptr,
                                      0,
                                      nullptr,
                                      1,
                                      &acquireImageMemoryBarrier);

        commandBuffer.beginRenderPass(&passInfo, vk::SubpassContents::eInline);

        commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, m_pipeline);
        commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics,
                                         m_pipeline_layout,
                                         0,
                                         1,
                                         &m_show_exported_image_rscs[i].descriptor_set,
                                         0,
                                         nullptr);
        auto const viewport = vk::Viewport()
                                  .setX(0.0f)
                                  .setY(0.0f)
                                  .setWidth((float)width())
                                  .setHeight((float)height())
                                  .setMinDepth((float)0.0f)
                                  .setMaxDepth((float)1.0f);
        commandBuffer.setViewport(0, 1, &viewport);

        vk::Rect2D const scissor(vk::Offset2D(0, 0), vk::Extent2D(width(), height()));
        commandBuffer.setScissor(0, 1, &scissor);
        commandBuffer.draw(6, 1, 0, 0);

        // Note that ending the renderpass changes the image's layout from
        // COLOR_ATTACHMENT_OPTIMAL to PRESENT_SRC_KHR
        commandBuffer.endRenderPass();

        const auto releaseImageBarrier =
            vk::ImageMemoryBarrier()
                .setSrcAccessMask(vk::AccessFlags()) // No shader writes.
                .setDstAccessMask(vk::AccessFlags()) // Dont care about its content
                .setOldLayout(vk::ImageLayout::eShaderReadOnlyOptimal)
                .setNewLayout(vk::ImageLayout::eGeneral)
                .setSrcQueueFamilyIndex(m_graphics_queue_family_index)
                .setDstQueueFamilyIndex(VK_QUEUE_FAMILY_FOREIGN_EXT)
                .setImage(m_show_exported_image_rscs[i].exported_image)
                .setSubresourceRange(
                    vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1));

        commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eFragmentShader,
                                      vk::PipelineStageFlagBits::eTopOfPipe,
                                      vk::DependencyFlagBits(),
                                      0,
                                      nullptr,
                                      0,
                                      nullptr,
                                      1,
                                      &releaseImageBarrier);

        commandBuffer.end();
    }
}

void FakeGpuEncoding::resize() {
    // TODO
    assert(false && "Resize in fake encoding mode is not supported!");
}

DmaBufFd FakeGpuEncoding::allocate_shared_image_by_index(ResourceIndex index,
                                                         int32_t width,
                                                         int32_t height) {
    assert(width == m_option.width);
    assert(height == m_option.height);

    DmaBufFd dmabuf_fd;
    const VkMemoryGetFdInfoKHR getMemFdInfo = {
        .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
        .pNext = nullptr,
        .memory = m_show_exported_image_rscs[index].exported_image_memory,
        .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
    };

    const auto result = pfnGetMemoryFdKHR(m_device, &getMemFdInfo, &dmabuf_fd);
    VERIFY(result == VK_SUCCESS);
    return dmabuf_fd;
}

vk::DeviceSize FakeGpuEncoding::get_shared_image_size_by_index(ResourceIndex index) {
    return m_exported_image_memory_requirements.size;
}

void FakeGpuEncoding::get_shared_image_plane_range_by_index(ResourceIndex index,
                                                            ImagePlaneAspect image_aspect,
                                                            size_t *offset,
                                                            size_t *size) {
    vk::ImageAspectFlags vk_image_aspect;
    switch (image_aspect) {
    case ImagePlaneAspect::Plane0:
        vk_image_aspect = vk::ImageAspectFlagBits::ePlane0;
        break;
    case ImagePlaneAspect::Plane1:
        vk_image_aspect = vk::ImageAspectFlagBits::ePlane1;
        break;
    case ImagePlaneAspect::Plane2:
        vk_image_aspect = vk::ImageAspectFlagBits::ePlane2;
        break;
    default:
        assert(false && "never called!");
    }
    vk::SubresourceLayout subresourceLayout = {};
    const auto imageSubresource =
        vk::ImageSubresource().setArrayLayer(0).setMipLevel(0).setAspectMask(vk_image_aspect);
    m_device.getImageSubresourceLayout(
        m_show_exported_image_rscs[index].exported_image, &imageSubresource, &subresourceLayout);
    *offset = subresourceLayout.offset;
    *size = subresourceLayout.size;
}

FrameIndex FakeGpuEncoding::get_max_number_of_inflight_commands() {
    // Can be adjusted
    return FrameIndex{3};
}

void FakeGpuEncoding::import_semaphore_encoding_wait_semaphore(SyncFileFd syncFd,
                                                               FrameIndex index) {
    // Before going to change the semaphore, have to make sure the semaphore is not in use.
    m_device.waitForFences(1, &m_fences[index], VK_TRUE, UINT64_MAX);
    m_device.resetFences({m_fences[index]});

    VkImportSemaphoreFdInfoKHR semaphoreImportInfo = {
        .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR,
        .pNext = nullptr,
        .semaphore = m_acquire_exported_image_semaphores[index],
        .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
        .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
        .fd = syncFd,
    };
    const auto vk_result = pfnImportSemaphoreFdKHR(m_device, &semaphoreImportInfo);
    VERIFY(vk_result == VK_SUCCESS);
}

SyncFileFd FakeGpuEncoding::export_semaphore_encoding_signal_semaphore(FrameIndex index) {
    const VkSemaphoreGetFdInfoKHR semaphoreGetFdInfo = {
        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
        .pNext = nullptr,
        .semaphore = m_show_exported_image_complete_semaphores[index],
        .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
    };

    SyncFileFd fd;
    const auto result = pfnGetSemaphoreFdKHR(m_device, &semaphoreGetFdInfo, &fd);
    VERIFY(result == VK_SUCCESS);
    return fd;
}

ResourceIndex FakeGpuEncoding::acquire_shared_image(FrameIndex frame_index) {
    // Use the index of swapchain image to index the shared transfer dest images, so that we don't
    // need to rebuild command buffer for every frame.
    const auto resource_idx = ResourceIndex{swapchain_rsc->acquire_next_image(
        m_instance,
        m_device,
        m_option.debug_always_cpu_wait ? m_swp_chain_acquire_image_fences[frame_index]
                                       : vk::Fence{},
        m_swapchain_image_acquired_semaphores[frame_index])};

    if (m_option.debug_always_cpu_wait) {
        m_device.waitForFences(
            1, &m_swp_chain_acquire_image_fences[frame_index], VK_TRUE, UINT64_MAX);
    }

    return resource_idx;
}

void FakeGpuEncoding::encoding_on_memory(uint32_t cur_frame,
                                         TimestampRecord *p_timestamp_record,
                                         FrameIndex frame_index,
                                         ResourceIndex rsc_index) {
    // Wait for the image acquired semaphore to be signaled to ensure
    // that the image won't be rendered to until the presentation
    // engine has fully released ownership to the application, and it is
    // okay to render to the image.
    vk::PipelineStageFlags const pipe_stage_flags[]{
        vk::PipelineStageFlagBits::eColorAttachmentOutput,
        vk::PipelineStageFlagBits::eFragmentShader,
    };
    vk::Semaphore wait_semaphores[] = {
        m_swapchain_image_acquired_semaphores[frame_index],
        m_acquire_exported_image_semaphores[frame_index],
    };

    p_timestamp_record->mark_begin_encoding(cur_frame);
    auto const submit_info =
        vk::SubmitInfo()
            .setPWaitDstStageMask(pipe_stage_flags)
            .setWaitSemaphoreCount(2)
            .setPWaitSemaphores(wait_semaphores)
            .setCommandBufferCount(1)
            .setPCommandBuffers(&m_show_exported_image_rscs[rsc_index].cmd)
            .setSignalSemaphoreCount(1)
            .setPSignalSemaphores(&m_show_exported_image_complete_semaphores[frame_index]);

    const auto result = m_graphics_queue.submit(1, &submit_info, m_fences[frame_index]);
    VERIFY(result == vk::Result::eSuccess);
    if (m_option.debug_always_cpu_wait) {
        m_device.waitForFences(1, &m_fences[frame_index], VK_TRUE, UINT64_MAX);
        p_timestamp_record->mark_end_encoding(cur_frame);
    }

    swapchain_rsc->present_image(m_instance,
                                 m_gpu,
                                 m_device,
                                 m_show_exported_image_complete_semaphores[frame_index],
                                 rsc_index);
}
