// Modifications Copyright (C) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
// Notified per clause 4(b) of the license

/*
 * Copyright (c) 2015-2019 The Khronos Group Inc.
 * Copyright (c) 2015-2019 Valve Corporation
 * Copyright (c) 2015-2019 LunarG, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Authors: Jeremy Hayes <jeremy@lunarg.com>
 */

#include <cassert>
#include <csignal>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <memory>
#include <sstream>

#include "app.h"
#include "cube.h"
#include "options.h"
#include "timestamp_record.h"
#include "vk_common.h"

static char const *const tex_files[] = {"lunarg.ppm"};

struct vktexcube_vs_uniform {
    // Must start with MVP
    float mvp[4][4];
    float position[12 * 3][4];
    float attr[12 * 3][4];
};

//--------------------------------------------------------------------------------------
// Mesh and VertexFormat Data
//--------------------------------------------------------------------------------------
// clang-format off
static const float g_vertex_buffer_data[] = {
    -1.0f,-1.0f,-1.0f,  // -X side
    -1.0f,-1.0f, 1.0f,
    -1.0f, 1.0f, 1.0f,
    -1.0f, 1.0f, 1.0f,
    -1.0f, 1.0f,-1.0f,
    -1.0f,-1.0f,-1.0f,

    -1.0f,-1.0f,-1.0f,  // -Z side
     1.0f, 1.0f,-1.0f,
     1.0f,-1.0f,-1.0f,
    -1.0f,-1.0f,-1.0f,
    -1.0f, 1.0f,-1.0f,
     1.0f, 1.0f,-1.0f,

    -1.0f,-1.0f,-1.0f,  // -Y side
     1.0f,-1.0f,-1.0f,
     1.0f,-1.0f, 1.0f,
    -1.0f,-1.0f,-1.0f,
     1.0f,-1.0f, 1.0f,
    -1.0f,-1.0f, 1.0f,

    -1.0f, 1.0f,-1.0f,  // +Y side
    -1.0f, 1.0f, 1.0f,
     1.0f, 1.0f, 1.0f,
    -1.0f, 1.0f,-1.0f,
     1.0f, 1.0f, 1.0f,
     1.0f, 1.0f,-1.0f,

     1.0f, 1.0f,-1.0f,  // +X side
     1.0f, 1.0f, 1.0f,
     1.0f,-1.0f, 1.0f,
     1.0f,-1.0f, 1.0f,
     1.0f,-1.0f,-1.0f,
     1.0f, 1.0f,-1.0f,

    -1.0f, 1.0f, 1.0f,  // +Z side
    -1.0f,-1.0f, 1.0f,
     1.0f, 1.0f, 1.0f,
    -1.0f,-1.0f, 1.0f,
     1.0f,-1.0f, 1.0f,
     1.0f, 1.0f, 1.0f,
};

static const float g_uv_buffer_data[] = {
    0.0f, 1.0f,  // -X side
    1.0f, 1.0f,
    1.0f, 0.0f,
    1.0f, 0.0f,
    0.0f, 0.0f,
    0.0f, 1.0f,

    1.0f, 1.0f,  // -Z side
    0.0f, 0.0f,
    0.0f, 1.0f,
    1.0f, 1.0f,
    1.0f, 0.0f,
    0.0f, 0.0f,

    1.0f, 0.0f,  // -Y side
    1.0f, 1.0f,
    0.0f, 1.0f,
    1.0f, 0.0f,
    0.0f, 1.0f,
    0.0f, 0.0f,

    1.0f, 0.0f,  // +Y side
    0.0f, 0.0f,
    0.0f, 1.0f,
    1.0f, 0.0f,
    0.0f, 1.0f,
    1.0f, 1.0f,

    1.0f, 0.0f,  // +X side
    0.0f, 0.0f,
    0.0f, 1.0f,
    0.0f, 1.0f,
    1.0f, 1.0f,
    1.0f, 0.0f,

    0.0f, 0.0f,  // +Z side
    0.0f, 1.0f,
    1.0f, 0.0f,
    0.0f, 1.0f,
    1.0f, 1.0f,
    1.0f, 0.0f,
};
// clang-format on

Cube::Cube(const Option &option)
    : option(option), encoding_device(nullptr), prepared{false}, graphics_queue_family_index{0},
      frame_index{0}, spin_angle{0.0f}, spin_increment{0.0f}, queue_family_count{0} {
    memset(projection_matrix, 0, sizeof(projection_matrix));
    memset(view_matrix, 0, sizeof(view_matrix));
    memset(model_matrix, 0, sizeof(model_matrix));
}

void Cube::cleanup() {
    prepared = false;

    device.waitIdle();

    cleanup_for_color_conversion_and_transfer();

    cleanup_render_target_resources();

    // Wait for fences from present operations
    for (FrameIndex i = 0; i < get_max_frame_number(); i++) {
        device.waitForFences(1, &fences[i], VK_TRUE, UINT64_MAX);
        device.destroyFence(fences[i], nullptr);
        device.destroySemaphore(image_acquire_semaphores[i], nullptr);
        device.destroySemaphore(draw_complete_semaphores[i], nullptr);
    }

    for (ResourceIndex i = 0; i < get_image_count(); i++) {
        device.destroyFramebuffer(draw_resources[i].framebuffer, nullptr);
    }
    device.destroyDescriptorPool(desc_pool, nullptr);

    device.destroyPipeline(pipeline, nullptr);
    device.destroyPipelineCache(pipelineCache, nullptr);
    device.destroyRenderPass(render_pass, nullptr);
    device.destroyPipelineLayout(pipeline_layout, nullptr);
    device.destroyDescriptorSetLayout(desc_layout, nullptr);

    for (uint32_t i = 0; i < texture_count; i++) {
        device.destroyImageView(textures[i].view, nullptr);
        device.destroyImage(textures[i].image, nullptr);
        device.freeMemory(textures[i].mem, nullptr);
        device.destroySampler(textures[i].sampler, nullptr);
    }

    device.destroyImageView(depth.view, nullptr);
    device.destroyImage(depth.image, nullptr);
    device.freeMemory(depth.mem, nullptr);

    for (uint32_t i = 0; i < get_image_count(); i++) {
        device.freeCommandBuffers(cmd_pool, {draw_resources[i].cmd});
        device.destroyBuffer(draw_resources[i].uniform_buffer, nullptr);
        device.unmapMemory(draw_resources[i].uniform_memory);
        device.freeMemory(draw_resources[i].uniform_memory, nullptr);
    }

    device.destroyCommandPool(cmd_pool, nullptr);

    device.waitIdle();
    device.destroy(nullptr);
    inst.destroy(nullptr);
}

void Cube::destroy_texture(texture_object *tex_objs) {
    // clean up staging resources
    device.freeMemory(tex_objs->mem, nullptr);
    if (tex_objs->image)
        device.destroyImage(tex_objs->image, nullptr);
    if (tex_objs->buffer)
        device.destroyBuffer(tex_objs->buffer, nullptr);
}

void Cube::prepare_for_color_conversion_and_transfer() {
    transfer_resources.reset(new TransferResources[get_image_count()]);
    color_conversion_resources.reset(new ColorConversionResource[get_image_count()]);

    pfnImportSemaphoreFdKHR = reinterpret_cast<decltype(pfnImportSemaphoreFdKHR)>(
        vkGetInstanceProcAddr(inst, "vkImportSemaphoreFdKHR"));
    VERIFY(pfnImportSemaphoreFdKHR != nullptr);
    pfnGetSemaphoreFdKHR = reinterpret_cast<decltype(pfnGetSemaphoreFdKHR)>(
        vkGetInstanceProcAddr(inst, "vkGetSemaphoreFdKHR"));
    VERIFY(pfnGetSemaphoreFdKHR != nullptr);
    pfnGetMemoryFdPropertiesKHR = reinterpret_cast<decltype(pfnGetMemoryFdPropertiesKHR)>(
        vkGetInstanceProcAddr(inst, "vkGetMemoryFdPropertiesKHR"));
    VERIFY(pfnGetMemoryFdPropertiesKHR != nullptr);

    build_transfer_syncs();

    if (option.need_rgb2yuv_conversion_after_rendering()) {
        assert(option.width == 1280 && option.height == 720 && "TODO push constant image extents.");

        create_rgb2yuv_convert_images();
        create_rgb2yuv_descriptor_layout();
        create_rgb2yuv_pipeline();
        update_rgb2yuv_descriptors();
        build_rgb2yuv_convert_cmds();
    }
    create_transfer_external_image();
    build_transfer_cmds();
}

void Cube::cleanup_for_color_conversion_and_transfer() {
    for (uint32_t i = 0; i < get_max_frame_number(); i++) {
        device.destroyFence(transfer_cmd_fences[i]);
        device.destroySemaphore(transfer_acquire_semaphores[i]);
        device.destroySemaphore(transfer_complete_semaphores[i]);
        device.destroySemaphore(color_conversion_acquire_semaphores[i]);
        device.destroySemaphore(color_conversion_complete_semaphores[i]);
    }

    for (uint32_t i = 0; i < get_image_count(); i++) {
        device.destroyBuffer(transfer_resources[i].ni_buffer, nullptr);
        device.freeMemory(transfer_resources[i].ni_buffer_memory, nullptr);
        device.freeCommandBuffers(transfer_cmd_pool, 1, &transfer_resources[i].cmd);
        device.freeCommandBuffers(compute_cmd_pool, {color_conversion_resources[i].cmd});
        device.destroyBufferView(color_conversion_resources[i].plane0_texel_buffer_view, nullptr);
        device.destroyBufferView(color_conversion_resources[i].plane1_texel_buffer_view, nullptr);
        device.destroyBufferView(color_conversion_resources[i].plane2_texel_buffer_view, nullptr);
        device.destroyBuffer(color_conversion_resources[i].yuv_buffer, nullptr);
        device.freeMemory(color_conversion_resources[i].yuv_buffer_memory, nullptr);
    }

    device.destroyDescriptorPool(color_conversion_desc_pool, nullptr);
    device.destroyPipelineLayout(color_conversion_pipeline_layout);
    device.destroyDescriptorSetLayout(color_conversion_desc_layout);
    device.destroyPipeline(color_conversion_pipeline);
    device.destroyCommandPool(compute_cmd_pool);
    device.destroyCommandPool(transfer_cmd_pool);
}

void Cube::create_rgb2yuv_convert_images() {
    auto yuvBufferCreateInfo = vk::BufferCreateInfo()
                                   .setUsage(vk::BufferUsageFlagBits::eStorageTexelBuffer |
                                             vk::BufferUsageFlagBits::eTransferSrc)
                                   .setSharingMode(vk::SharingMode::eExclusive);

    for (ResourceIndex i = 0; i < get_image_count(); i++) {
        const vk::DeviceSize bufferSize = encoding_device->get_shared_image_size_by_index(i);
        yuvBufferCreateInfo.setSize(bufferSize);

        const auto result = device.createBuffer(
            &yuvBufferCreateInfo, nullptr, &color_conversion_resources[i].yuv_buffer);
        VERIFY(result == vk::Result::eSuccess);
    }

    for (ResourceIndex i = 0; i < get_image_count(); i++) {
        vk::MemoryRequirements memoryRequirements;
        uint32_t memoryTypeIndex = 0;
        {
            device.getBufferMemoryRequirements(color_conversion_resources[i].yuv_buffer,
                                               &memoryRequirements);
            const bool success =
                memory_type_from_properties(memory_properties,
                                            memoryRequirements.memoryTypeBits,
                                            vk::MemoryPropertyFlagBits::eDeviceLocal,
                                            &memoryTypeIndex);
            VERIFY(success == true);
        }

        const auto memAllocInfo =
            vk::MemoryAllocateInfo()
                .setMemoryTypeIndex(memoryTypeIndex)
                .setAllocationSize(encoding_device->get_shared_image_size_by_index(i));

        const auto result = device.allocateMemory(
            &memAllocInfo, nullptr, &color_conversion_resources[i].yuv_buffer_memory);
        VERIFY(result == vk::Result::eSuccess);
    }

    for (ResourceIndex i = 0; i < get_image_count(); i++) {
        const auto result = device.bindBufferMemory(color_conversion_resources[i].yuv_buffer,
                                                    color_conversion_resources[i].yuv_buffer_memory,
                                                    0);
        VERIFY(result == vk::Result::eSuccess);
    }

    for (ResourceIndex i = 0; i < get_image_count(); i++) {
        size_t plane_0_offset, plane_0_size, plane_1_offset, plane_1_size, plane_2_offset,
            plane_2_size;
        encoding_device->get_shared_image_plane_range_by_index(
            i, ImagePlaneAspect::Plane0, &plane_0_offset, &plane_0_size);
        encoding_device->get_shared_image_plane_range_by_index(
            i, ImagePlaneAspect::Plane1, &plane_1_offset, &plane_1_size);

        const auto plane0BufferViewCreationInfo =
            vk::BufferViewCreateInfo()
                .setBuffer(color_conversion_resources[i].yuv_buffer)
                .setFormat(vk::Format::eR8Unorm)
                .setOffset(plane_0_offset)
                .setRange(plane_0_size);

        auto result =
            device.createBufferView(&plane0BufferViewCreationInfo,
                                    nullptr,
                                    &color_conversion_resources[i].plane0_texel_buffer_view);
        VERIFY(result == vk::Result::eSuccess);

        // YUV420p:
        // plane0 Y
        // plane1 U
        // plane2 V

        const auto plane1BufferViewCreationInfo =
            vk::BufferViewCreateInfo()
                .setBuffer(color_conversion_resources[i].yuv_buffer)
                .setFormat(vk::Format::eR8Unorm)
                .setOffset(plane_1_offset)
                .setRange(plane_1_size);

        result = device.createBufferView(&plane1BufferViewCreationInfo,
                                         nullptr,
                                         &color_conversion_resources[i].plane1_texel_buffer_view);
        VERIFY(result == vk::Result::eSuccess);

        encoding_device->get_shared_image_plane_range_by_index(
            i, ImagePlaneAspect::Plane2, &plane_2_offset, &plane_2_size);
        const auto plane2BufferViewCreationInfo =
            vk::BufferViewCreateInfo()
                .setBuffer(color_conversion_resources[i].yuv_buffer)
                .setFormat(vk::Format::eR8Unorm)
                .setOffset(plane_2_offset)
                .setRange(plane_2_size);

        result = device.createBufferView(&plane2BufferViewCreationInfo,
                                         nullptr,
                                         &color_conversion_resources[i].plane2_texel_buffer_view);
        VERIFY(result == vk::Result::eSuccess);
    }
}

void Cube::create_rgb2yuv_descriptor_layout() {
    const vk::DescriptorSetLayoutBinding layoutBindingInfos[] = {
        vk::DescriptorSetLayoutBinding()
            .setBinding(0)
            .setDescriptorType(vk::DescriptorType::eStorageImage)
            .setDescriptorCount(1)
            .setStageFlags(vk::ShaderStageFlagBits::eCompute)
            .setPImmutableSamplers(nullptr),
        vk::DescriptorSetLayoutBinding()
            .setBinding(1)
            .setDescriptorType(vk::DescriptorType::eStorageTexelBuffer)
            .setDescriptorCount(1)
            .setStageFlags(vk::ShaderStageFlagBits::eCompute)
            .setPImmutableSamplers(nullptr),
        vk::DescriptorSetLayoutBinding()
            .setBinding(2)
            .setDescriptorType(vk::DescriptorType::eStorageTexelBuffer)
            .setDescriptorCount(1)
            .setStageFlags(vk::ShaderStageFlagBits::eCompute)
            .setPImmutableSamplers(nullptr),
        vk::DescriptorSetLayoutBinding()
            .setBinding(3)
            .setDescriptorType(vk::DescriptorType::eStorageTexelBuffer)
            .setDescriptorCount(1)
            .setStageFlags(vk::ShaderStageFlagBits::eCompute)
            .setPImmutableSamplers(nullptr)};

    auto const descriptorLayoutCreationInfo =
        vk::DescriptorSetLayoutCreateInfo().setBindingCount(4).setPBindings(layoutBindingInfos);

    auto result = device.createDescriptorSetLayout(
        &descriptorLayoutCreationInfo, nullptr, &color_conversion_desc_layout);
    VERIFY(result == vk::Result::eSuccess);

    auto const pipelineLayoutCreateInfo =
        vk::PipelineLayoutCreateInfo().setSetLayoutCount(1).setPSetLayouts(
            &color_conversion_desc_layout);

    result = device.createPipelineLayout(
        &pipelineLayoutCreateInfo, nullptr, &color_conversion_pipeline_layout);
    VERIFY(result == vk::Result::eSuccess);
}

vk::ShaderModule Cube::create_rgb2yuv_cs() {
    const uint32_t yuv420pToNetintShaderCode[] = {
#include "rgb2yuv420p_netint.comp.inc"
    };

    return prepare_shader_module(yuv420pToNetintShaderCode, sizeof(yuv420pToNetintShaderCode));
}

void Cube::create_rgb2yuv_pipeline() {
    vk::ShaderModule comp_shader_module = create_rgb2yuv_cs();
    const auto shaderStageInfo = vk::PipelineShaderStageCreateInfo()
                                     .setStage(vk::ShaderStageFlagBits::eCompute)
                                     .setModule(comp_shader_module)
                                     .setPName("main");

    auto const pipelineCreateInfo = vk::ComputePipelineCreateInfo()
                                        .setStage(shaderStageInfo)
                                        .setLayout(color_conversion_pipeline_layout);

    const auto result = device.createComputePipelines(
        pipelineCache, 1, &pipelineCreateInfo, nullptr, &color_conversion_pipeline);
    VERIFY(result == vk::Result::eSuccess);

    device.destroyShaderModule(comp_shader_module, nullptr);
}

void Cube::update_rgb2yuv_descriptors() {
    constexpr uint32_t storageImageCount = 1;
    constexpr uint32_t yuv420pStorageBufferCount = 3;
    {
        const uint32_t descritproSetsCount = get_image_count();
        vk::DescriptorPoolSize const poolSizes[2] = {
            vk::DescriptorPoolSize()
                .setType(vk::DescriptorType::eStorageImage)
                .setDescriptorCount(descritproSetsCount * storageImageCount),
            vk::DescriptorPoolSize()
                .setType(vk::DescriptorType::eStorageTexelBuffer)
                .setDescriptorCount(descritproSetsCount * yuv420pStorageBufferCount)};

        auto const descriptorPoolCreationInfo = vk::DescriptorPoolCreateInfo()
                                                    .setMaxSets(descritproSetsCount)
                                                    .setPoolSizeCount(2)
                                                    .setPPoolSizes(poolSizes);

        auto result = device.createDescriptorPool(
            &descriptorPoolCreationInfo, nullptr, &color_conversion_desc_pool);
        VERIFY(result == vk::Result::eSuccess);
    }

    {
        auto const descSetAllocInfo = vk::DescriptorSetAllocateInfo()
                                          .setDescriptorPool(color_conversion_desc_pool)
                                          .setDescriptorSetCount(1)
                                          .setPSetLayouts(&color_conversion_desc_layout);

        constexpr uint32_t descriptorWriteCount = storageImageCount + yuv420pStorageBufferCount;
        vk::WriteDescriptorSet writes[descriptorWriteCount];

        /* If the descriptor refers to a sampler that performs Y′CBCR conversion or samples a
           subsampled image, the sampler must only be used to sample the image in the same
           descriptor. Otherwise, the sampler and image in this type of descriptor can be used
           freely with any other samplers and images.
        */
        writes[0].setDstBinding(0);
        writes[0].setDescriptorCount(1);
        writes[0].setDescriptorType(vk::DescriptorType::eStorageImage);

        writes[1].setDstBinding(1);
        writes[1].setDescriptorCount(1);
        writes[1].setDescriptorType(vk::DescriptorType::eStorageTexelBuffer);

        writes[2].setDstBinding(2);
        writes[2].setDescriptorCount(1);
        writes[2].setDescriptorType(vk::DescriptorType::eStorageTexelBuffer);

        writes[3].setDstBinding(3);
        writes[3].setDescriptorCount(1);
        writes[3].setDescriptorType(vk::DescriptorType::eStorageTexelBuffer);

        for (unsigned int i = 0; i < get_image_count(); i++) {
            const auto descReadImageInfo =
                vk::DescriptorImageInfo()
                    .setImageView(draw_resources[i].color_attachment_image_view)
                    .setImageLayout(vk::ImageLayout::eShaderReadOnlyOptimal);

            auto result = device.allocateDescriptorSets(
                &descSetAllocInfo, &color_conversion_resources[i].descriptor_set);
            VERIFY(result == vk::Result::eSuccess);

            writes[0].setPImageInfo(&descReadImageInfo);
            writes[0].setDstSet(color_conversion_resources[i].descriptor_set);

            writes[1].setPTexelBufferView(&color_conversion_resources[i].plane0_texel_buffer_view);
            writes[1].setDstSet(color_conversion_resources[i].descriptor_set);

            writes[2].setPTexelBufferView(&color_conversion_resources[i].plane1_texel_buffer_view);
            writes[2].setDstSet(color_conversion_resources[i].descriptor_set);

            writes[3].setPTexelBufferView(&color_conversion_resources[i].plane2_texel_buffer_view);
            writes[3].setDstSet(color_conversion_resources[i].descriptor_set);

            device.updateDescriptorSets(
                storageImageCount + yuv420pStorageBufferCount, writes, 0, nullptr);
        }
    }
}

void Cube::build_rgb2yuv_convert_cmds() {
    {
        const auto computeCmdPoolCreateInfo =
            vk::CommandPoolCreateInfo()
                .setFlags(vk::CommandPoolCreateFlagBits::eResetCommandBuffer)
                .setQueueFamilyIndex(compute_queue_family_index);
        const auto result =
            device.createCommandPool(&computeCmdPoolCreateInfo, nullptr, &compute_cmd_pool);
        VERIFY(result == vk::Result::eSuccess);
    }

    for (uint32_t i = 0; i < get_image_count(); i++) {
        auto &color_conversion_cmd = color_conversion_resources[i].cmd;

        const auto colorConversionCmdAllocInfo = vk::CommandBufferAllocateInfo()
                                                     .setCommandPool(compute_cmd_pool)
                                                     .setLevel(vk::CommandBufferLevel::ePrimary)
                                                     .setCommandBufferCount(1);
        auto result =
            device.allocateCommandBuffers(&colorConversionCmdAllocInfo, &color_conversion_cmd);
        VERIFY(result == vk::Result::eSuccess);

        const auto cmdBufferBeginInfo =
            vk::CommandBufferBeginInfo().setFlags(vk::CommandBufferUsageFlagBits::eSimultaneousUse);

        result = color_conversion_cmd.begin(&cmdBufferBeginInfo);
        VERIFY(result == vk::Result::eSuccess);

        {
            const vk::ImageMemoryBarrier acquireImageMemoryBarriers[] = {
                vk::ImageMemoryBarrier()
                    .setSrcAccessMask(vk::AccessFlagBits::eColorAttachmentWrite)
                    .setDstAccessMask(vk::AccessFlagBits::eShaderRead)
                    .setOldLayout(vk::ImageLayout::eColorAttachmentOptimal)
                    .setNewLayout(vk::ImageLayout::eShaderReadOnlyOptimal)
                    .setSrcQueueFamilyIndex(graphics_queue_family_index)
                    .setDstQueueFamilyIndex(compute_queue_family_index)
                    .setImage(draw_resources[i].color_attachment_image)
                    .setSubresourceRange(
                        vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1))};

            color_conversion_cmd.pipelineBarrier(vk::PipelineStageFlagBits::eBottomOfPipe,
                                                 vk::PipelineStageFlagBits::eComputeShader,
                                                 vk::DependencyFlagBits(),
                                                 0,
                                                 nullptr,
                                                 0,
                                                 nullptr,
                                                 1,
                                                 acquireImageMemoryBarriers);
        }

        color_conversion_cmd.bindPipeline(vk::PipelineBindPoint::eCompute,
                                          color_conversion_pipeline);
        color_conversion_cmd.bindDescriptorSets(vk::PipelineBindPoint::eCompute,
                                                color_conversion_pipeline_layout,
                                                0,
                                                1,
                                                &color_conversion_resources[i].descriptor_set,
                                                0,
                                                nullptr);
        color_conversion_cmd.dispatch((option.width + 7) / 8 / 2, (option.height + 7) / 8 / 2, 1);

        {
            const vk::ImageMemoryBarrier releaseImageBarriers[] = {
                vk::ImageMemoryBarrier()
                    .setSrcAccessMask(vk::AccessFlags()) // read only in cs
                    .setDstAccessMask(vk::AccessFlags())
                    .setOldLayout(vk::ImageLayout::eShaderReadOnlyOptimal)
                    .setNewLayout(vk::ImageLayout::eColorAttachmentOptimal)
                    .setSrcQueueFamilyIndex(compute_queue_family_index)
                    .setDstQueueFamilyIndex(graphics_queue_family_index)
                    .setImage(draw_resources[i].color_attachment_image)
                    .setSubresourceRange(
                        vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1))};

            color_conversion_cmd.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
                                                 vk::PipelineStageFlagBits::eTopOfPipe, // TODO
                                                 vk::DependencyFlagBits(),
                                                 0,
                                                 nullptr,
                                                 0,
                                                 nullptr,
                                                 1,
                                                 releaseImageBarriers);
        }

        result = color_conversion_cmd.end();
        VERIFY(result == vk::Result::eSuccess);
    }
}

void Cube::create_transfer_external_image() {
    const auto externalMemoryBufferCreateInfo = vk::ExternalMemoryBufferCreateInfo().setHandleTypes(
        vk::ExternalMemoryHandleTypeFlagBits::eDmaBufEXT);

    auto create_info = vk::BufferCreateInfo()
                           .setUsage(vk::BufferUsageFlagBits::eTransferDst)
                           .setSharingMode(vk::SharingMode::eExclusive)
                           .setPNext(&externalMemoryBufferCreateInfo);

    for (ResourceIndex i = 0; i < get_image_count(); i++) {
        const vk::DeviceSize bufferSize = encoding_device->get_shared_image_size_by_index(i);
        create_info.setSize(bufferSize);

        const auto result =
            device.createBuffer(&create_info, nullptr, &transfer_resources[i].ni_buffer);
        VERIFY(result == vk::Result::eSuccess);
    }

    for (ResourceIndex i = 0; i < get_image_count(); i++) {
        const DmaBufFd dmabuf_fd =
            encoding_device->allocate_shared_image_by_index(i, width(), height());
        assert(dmabuf_fd > 0);

        uint32_t memoryTypeIndex = 0;
        {
            VkMemoryFdPropertiesKHR dmaBufProperties = {
                .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR};
            const auto result =
                pfnGetMemoryFdPropertiesKHR(device,
                                            VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
                                            dmabuf_fd,
                                            &dmaBufProperties);
            VERIFY(result == VK_SUCCESS);

            const bool success =
                memory_type_from_properties(memory_properties,
                                            dmaBufProperties.memoryTypeBits,
                                            vk::MemoryPropertyFlagBits::eDeviceLocal,
                                            &memoryTypeIndex);
            VERIFY(success == true);
        }

        const auto importBufferFdInfo =
            vk::ImportMemoryFdInfoKHR()
                .setHandleType(vk::ExternalMemoryHandleTypeFlagBits::eDmaBufEXT)
                .setFd(dmabuf_fd);

        const auto dedicateMemAllocInfo = vk::MemoryDedicatedAllocateInfo()
                                              .setBuffer(transfer_resources[i].ni_buffer)
                                              .setPNext(&importBufferFdInfo);

        const auto memAllocInfo =
            vk::MemoryAllocateInfo()
                .setMemoryTypeIndex(memoryTypeIndex)
                .setAllocationSize(encoding_device->get_shared_image_size_by_index(i))
                .setPNext(&dedicateMemAllocInfo);

        const auto result =
            device.allocateMemory(&memAllocInfo, nullptr, &transfer_resources[i].ni_buffer_memory);
        VERIFY(result == vk::Result::eSuccess);
    }

    for (ResourceIndex i = 0; i < get_image_count(); i++) {
        const auto result = device.bindBufferMemory(
            transfer_resources[i].ni_buffer, transfer_resources[i].ni_buffer_memory, 0);
        VERIFY(result == vk::Result::eSuccess);
    }
}

void Cube::create_offscreen_render_targets() {
    {
        const uint32_t width = option.width;
        const uint32_t height = option.height;
        const auto imageCreateInfo =
            vk::ImageCreateInfo()
                .setImageType(vk::ImageType::e2D)
                .setFormat(get_render_target_format())
                .setExtent({width, height, 1})
                .setArrayLayers(1)
                .setMipLevels(1)
                .setSamples(vk::SampleCountFlagBits::e1)
                .setTiling(vk::ImageTiling::eOptimal)
                .setUsage(vk::ImageUsageFlagBits::eColorAttachment |
                          vk::ImageUsageFlagBits::eStorage | vk::ImageUsageFlagBits::eTransferSrc)
                .setSharingMode(vk::SharingMode::eExclusive)
                .setQueueFamilyIndexCount(0)
                .setPQueueFamilyIndices(nullptr)
                .setInitialLayout(vk::ImageLayout::eUndefined);

        for (uint32_t i = 0; i < get_image_count(); i++) {
            const auto result = device.createImage(
                &imageCreateInfo, nullptr, &draw_resources[i].color_attachment_image);
            VERIFY(result == vk::Result::eSuccess);
        }
    }

    vk::MemoryRequirements memoryRequirements;
    uint32_t memoryTypeIndex;

    {
        device.getImageMemoryRequirements(draw_resources[0].color_attachment_image,
                                          &memoryRequirements);

        const bool success = memory_type_from_properties(memory_properties,
                                                         memoryRequirements.memoryTypeBits,
                                                         vk::MemoryPropertyFlagBits::eDeviceLocal,
                                                         &memoryTypeIndex);
        VERIFY(success == true);
    }

    {
        const auto memAllocInfo = vk::MemoryAllocateInfo()
                                      .setMemoryTypeIndex(memoryTypeIndex)
                                      .setAllocationSize(memoryRequirements.size);

        for (uint32_t i = 0; i < get_image_count(); i++) {
            const auto result = device.allocateMemory(
                &memAllocInfo, nullptr, &draw_resources[i].color_attachment_image_memory);
            VERIFY(result == vk::Result::eSuccess);
        }
    }

    {
        for (uint32_t i = 0; i < get_image_count(); i++) {
            const auto result =
                device.bindImageMemory(draw_resources[i].color_attachment_image,
                                       draw_resources[i].color_attachment_image_memory,
                                       0);
            VERIFY(result == vk::Result::eSuccess);
        }
    }

    {
        for (uint32_t i = 0; i < get_image_count(); i++) {
            auto const viewInfo = vk::ImageViewCreateInfo()
                                      .setImage(draw_resources[i].color_attachment_image)
                                      .setViewType(vk::ImageViewType::e2D)
                                      .setFormat(get_render_target_format())
                                      .setSubresourceRange(vk::ImageSubresourceRange(
                                          vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1));

            const auto result = device.createImageView(
                &viewInfo, nullptr, &draw_resources[i].color_attachment_image_view);
            VERIFY(result == vk::Result::eSuccess);
        }
    }
}

void Cube::cleanup_offscreen_render_targets() {
    for (uint32_t i = 0; i < get_image_count(); i++) {
        device.destroyImageView(draw_resources[i].color_attachment_image_view, nullptr);
        device.destroyImage(draw_resources[i].color_attachment_image, nullptr);
        device.freeMemory(draw_resources[i].color_attachment_image_memory, nullptr);
    }
}

void Cube::build_transfer_cmds() {
    {
        const auto transferCmdPoolCreateInfo =
            vk::CommandPoolCreateInfo()
                .setFlags(vk::CommandPoolCreateFlagBits::eResetCommandBuffer)
                .setQueueFamilyIndex(transfer_queue_family_index);
        const auto result =
            device.createCommandPool(&transferCmdPoolCreateInfo, nullptr, &transfer_cmd_pool);
        VERIFY(result == vk::Result::eSuccess);
    }

    for (uint32_t i = 0; i < get_image_count(); i++) {
        {
            const auto transferCmdAllocInfo = vk::CommandBufferAllocateInfo()
                                                  .setCommandPool(transfer_cmd_pool)
                                                  .setLevel(vk::CommandBufferLevel::ePrimary)
                                                  .setCommandBufferCount(1);
            const auto result =
                device.allocateCommandBuffers(&transferCmdAllocInfo, &transfer_resources[i].cmd);
            VERIFY(result == vk::Result::eSuccess);
        }
        const auto cmdBufferBeginInfo = vk::CommandBufferBeginInfo();

        const auto &transfer_cmd = transfer_resources[i].cmd;

        auto result = transfer_cmd.begin(&cmdBufferBeginInfo);
        VERIFY(result == vk::Result::eSuccess);
        if (option.encodingMode == EncodingMode::RGBA) {
            const vk::ImageMemoryBarrier acquireImageMemoryBarriers[] = {
                vk::ImageMemoryBarrier()
                    .setSrcAccessMask(vk::AccessFlagBits::eColorAttachmentWrite)
                    .setDstAccessMask(vk::AccessFlagBits::eTransferRead)
                    .setOldLayout(vk::ImageLayout::eColorAttachmentOptimal)
                    .setNewLayout(vk::ImageLayout::eTransferSrcOptimal)
                    .setSrcQueueFamilyIndex(graphics_queue_family_index)
                    .setDstQueueFamilyIndex(transfer_queue_family_index)
                    .setImage(draw_resources[i].color_attachment_image)
                    .setSubresourceRange(
                        vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1))};

            transfer_cmd.pipelineBarrier(vk::PipelineStageFlagBits::eBottomOfPipe,
                                         vk::PipelineStageFlagBits::eTransfer,
                                         vk::DependencyFlagBits(),
                                         0,
                                         nullptr,
                                         0,
                                         nullptr,
                                         1,
                                         acquireImageMemoryBarriers);
            const auto region = vk::BufferImageCopy()
                                    .setImageSubresource(vk::ImageSubresourceLayers(
                                        vk::ImageAspectFlagBits::eColor, 0, 0, 1))
                                    .setImageExtent(vk::Extent3D(width(), height(), 1));
            transfer_cmd.copyImageToBuffer(draw_resources[i].color_attachment_image,
                                           vk::ImageLayout::eTransferSrcOptimal,
                                           transfer_resources[i].ni_buffer,
                                           1,
                                           &region);
            const vk::ImageMemoryBarrier releaseImageBarriers[] = {
                vk::ImageMemoryBarrier()
                    .setSrcAccessMask(vk::AccessFlags()) // read only in transfer
                    .setDstAccessMask(vk::AccessFlags())
                    .setOldLayout(vk::ImageLayout::eTransferSrcOptimal)
                    .setNewLayout(vk::ImageLayout::eColorAttachmentOptimal)
                    .setSrcQueueFamilyIndex(transfer_queue_family_index)
                    .setDstQueueFamilyIndex(graphics_queue_family_index)
                    .setImage(draw_resources[i].color_attachment_image)
                    .setSubresourceRange(
                        vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1))};

            transfer_cmd.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
                                         vk::PipelineStageFlagBits::eTopOfPipe,
                                         vk::DependencyFlagBits(),
                                         0,
                                         nullptr,
                                         0,
                                         nullptr,
                                         1,
                                         releaseImageBarriers);
        } else {
            const auto region = vk::BufferCopy()
                                    .setSize(encoding_device->get_shared_image_size_by_index(i))
                                    .setSrcOffset(0)
                                    .setDstOffset(0);
            transfer_cmd.copyBuffer(color_conversion_resources[i].yuv_buffer,
                                    transfer_resources[i].ni_buffer,
                                    1,
                                    &region);
        }
        result = transfer_cmd.end();
        VERIFY(result == vk::Result::eSuccess);
    }
}

void Cube::build_transfer_syncs() {
    transfer_cmd_fences.reset(new vk::Fence[get_max_frame_number()]);
    image_acquire_semaphores.reset(new vk::Semaphore[get_max_frame_number()]);
    draw_complete_semaphores.reset(new vk::Semaphore[get_max_frame_number()]);
    transfer_acquire_semaphores.reset(new vk::Semaphore[get_max_frame_number()]);
    transfer_complete_semaphores.reset(new vk::Semaphore[get_max_frame_number()]);
    color_conversion_acquire_semaphores.reset(new vk::Semaphore[get_max_frame_number()]);
    color_conversion_complete_semaphores.reset(new vk::Semaphore[get_max_frame_number()]);
    for (uint32_t i = 0; i < get_max_frame_number(); i++) {
        const auto fenceCreateInfo = vk::FenceCreateInfo();
        const auto result = device.createFence(&fenceCreateInfo, nullptr, &transfer_cmd_fences[i]);
        VERIFY(result == vk::Result::eSuccess);
    }

    auto semaphoreCreationInfo = vk::SemaphoreCreateInfo();

    for (uint32_t i = 0; i < get_max_frame_number(); i++) {
        vk::Result result;
        result =
            device.createSemaphore(&semaphoreCreationInfo, nullptr, &image_acquire_semaphores[i]);
        VERIFY(result == vk::Result::eSuccess);
        result =
            device.createSemaphore(&semaphoreCreationInfo, nullptr, &draw_complete_semaphores[i]);
        result = device.createSemaphore(
            &semaphoreCreationInfo, nullptr, &transfer_acquire_semaphores[i]);
        VERIFY(result == vk::Result::eSuccess);
        result = device.createSemaphore(
            &semaphoreCreationInfo, nullptr, &transfer_complete_semaphores[i]);
        VERIFY(result == vk::Result::eSuccess);
        result = device.createSemaphore(
            &semaphoreCreationInfo, nullptr, &color_conversion_acquire_semaphores[i]);
        VERIFY(result == vk::Result::eSuccess);
        result = device.createSemaphore(
            &semaphoreCreationInfo, nullptr, &color_conversion_complete_semaphores[i]);
        VERIFY(result == vk::Result::eSuccess);
    }
}

void Cube::transfer_framebuffer_to_encoding(const uint32_t cur_frame,
                                            TimestampRecord *p_timestamp_record,
                                            ResourceIndex transfer_rsc_index) {
    const vk::PipelineStageFlags dstWaitStage[] = {
        vk::PipelineStageFlagBits::eTransfer,
    };
    vk::Semaphore waitSemaphores[] = {
        transfer_acquire_semaphores[frame_index],
        draw_complete_semaphores[frame_index],
    };
    vk::Semaphore signalSemaphores[] = {
        transfer_complete_semaphores[frame_index],
        image_acquire_semaphores[frame_index],
    };
    auto submit_info = vk::SubmitInfo()
                           .setPWaitDstStageMask(dstWaitStage)
                           .setWaitSemaphoreCount(1)
                           .setSignalSemaphoreCount(1)
                           .setCommandBufferCount(1)
                           .setPCommandBuffers(&transfer_resources[transfer_rsc_index].cmd);
    if (option.encodingMode == EncodingMode::RGBA) {
        submit_info.setPWaitSemaphores(waitSemaphores + 1)
            .setPSignalSemaphores(signalSemaphores + 1);
    } else {
        submit_info.setPWaitSemaphores(waitSemaphores).setPSignalSemaphores(signalSemaphores);
    }
    const auto result = transfer_queue.submit(1, &submit_info, transfer_cmd_fences[frame_index]);
    VERIFY(result == vk::Result::eSuccess);

    device.waitForFences(1, &transfer_cmd_fences[frame_index], VK_TRUE, UINT64_MAX);
    device.resetFences({transfer_cmd_fences[frame_index]});
    p_timestamp_record->mark_end_color_conversion_and_dma(cur_frame);

    encoding_device->encoding_on_memory(
        cur_frame, p_timestamp_record, frame_index, transfer_rsc_index);
}

void Cube::color_conversion_and_transfer_to_encoding(const uint32_t cur_frame,
                                                     TimestampRecord *p_timestamp_record,
                                                     ResourceIndex color_conversion_rsc_index) {
    const vk::PipelineStageFlags dstWaitStage[] = {
        vk::PipelineStageFlagBits::eComputeShader,
        vk::PipelineStageFlagBits::eComputeShader,
    };
    vk::Semaphore waitSemaphores[] = {
        draw_complete_semaphores[frame_index],
        transfer_complete_semaphores[frame_index],
    };
    vk::Semaphore signalSemaphores[] = {
        image_acquire_semaphores[frame_index],
        transfer_acquire_semaphores[frame_index],
    };
    auto submit_info =
        vk::SubmitInfo()
            .setCommandBufferCount(1)
            .setPCommandBuffers(&color_conversion_resources[color_conversion_rsc_index].cmd)
            .setSignalSemaphoreCount(2)
            .setPSignalSemaphores(signalSemaphores);
    if (cur_frame > 1) {
        submit_info.setWaitSemaphoreCount(2)
            .setPWaitDstStageMask(dstWaitStage)
            .setPWaitSemaphores(waitSemaphores);
    } else {
        submit_info.setWaitSemaphoreCount(1)
            .setPWaitDstStageMask(dstWaitStage)
            .setPWaitSemaphores(waitSemaphores);
    }

    const auto result = compute_queue.submit(1, &submit_info, nullptr);
    VERIFY(result == vk::Result::eSuccess);
}

void Cube::draw(const uint32_t cur_frame, TimestampRecord *p_timestamp_record) {
    // Ensure no more than get_max_frame_number() renderings are outstanding
    device.waitForFences(1, &fences[frame_index], VK_TRUE, UINT64_MAX);
    device.resetFences({fences[frame_index]});

    const vk::PipelineStageFlags dstWaitStage[] = {
        vk::PipelineStageFlagBits::eColorAttachmentOutput,
    };
    vk::Semaphore waitSemaphores[] = {
        image_acquire_semaphores[frame_index],
    };
    vk::Semaphore signalSemaphores[] = {
        draw_complete_semaphores[frame_index],
    };
    ResourceIndex active_image_index(encoding_device->acquire_shared_image(frame_index));
    update_data_buffer(active_image_index);

    auto submit_info = vk::SubmitInfo()
                           .setSignalSemaphoreCount(1)
                           .setPSignalSemaphores(signalSemaphores)
                           .setCommandBufferCount(1)
                           .setPCommandBuffers(&draw_resources[active_image_index].cmd);
    if (cur_frame > 1) {
        submit_info.setWaitSemaphoreCount(1)
            .setPWaitDstStageMask(dstWaitStage)
            .setPWaitSemaphores(waitSemaphores);
    }

    p_timestamp_record->mark_begin_color_conversion_and_dma(cur_frame);
    const auto result = graphics_queue.submit(1, &submit_info, fences[frame_index]);
    VERIFY(result == vk::Result::eSuccess);
    if (option.debug_always_cpu_wait) {
        device.waitForFences(1, &fences[frame_index], VK_TRUE, UINT64_MAX);
    }

    if (option.encodingMode != EncodingMode::RGBA) {
        color_conversion_and_transfer_to_encoding(
            cur_frame, p_timestamp_record, active_image_index);
    }

    transfer_framebuffer_to_encoding(cur_frame, p_timestamp_record, active_image_index);

    frame_index += 1;
    frame_index %= get_max_frame_number();
}

void Cube::draw_build_cmd(vk::CommandBuffer commandBuffer, ResourceIndex current_buffer) {
    auto const commandInfo =
        vk::CommandBufferBeginInfo().setFlags(vk::CommandBufferUsageFlagBits::eSimultaneousUse);

    vk::ClearValue const clearValues[2] = {
        vk::ClearColorValue(std::array<float, 4>({{0.2f, 0.2f, 0.2f, 0.2f}})),
        vk::ClearDepthStencilValue(1.0f, 0u)};

    auto const passInfo =
        vk::RenderPassBeginInfo()
            .setRenderPass(render_pass)
            .setFramebuffer(draw_resources[current_buffer].framebuffer)
            .setRenderArea(
                vk::Rect2D(vk::Offset2D(0, 0), vk::Extent2D((uint32_t)width(), (uint32_t)height())))
            .setClearValueCount(2)
            .setPClearValues(clearValues);

    auto result = commandBuffer.begin(&commandInfo);
    VERIFY(result == vk::Result::eSuccess);

    const auto acquireImageMemoryBarrier =
        vk::ImageMemoryBarrier()
            .setSrcAccessMask(vk::AccessFlags())
            .setDstAccessMask(vk::AccessFlagBits::eColorAttachmentWrite)
            .setOldLayout(vk::ImageLayout::eUndefined)
            .setNewLayout(vk::ImageLayout::eColorAttachmentOptimal)
            .setSrcQueueFamilyIndex(compute_queue_family_index)
            .setDstQueueFamilyIndex(graphics_queue_family_index)
            .setImage(draw_resources[current_buffer].color_attachment_image)
            .setSubresourceRange(
                vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1));

    commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe,
                                  vk::PipelineStageFlagBits::eColorAttachmentOutput,
                                  vk::DependencyFlagBits(),
                                  0,
                                  nullptr,
                                  0,
                                  nullptr,
                                  1,
                                  &acquireImageMemoryBarrier);

    commandBuffer.beginRenderPass(&passInfo, vk::SubpassContents::eInline);
    commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline);
    commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics,
                                     pipeline_layout,
                                     0,
                                     1,
                                     &draw_resources[current_buffer].descriptor_set,
                                     0,
                                     nullptr);
    float viewport_dimension;
    float viewport_x = 0.0f;
    float viewport_y = 0.0f;
    if (width() < height()) {
        viewport_dimension = (float)width();
        viewport_y = (height() - width()) / 2.0f;
    } else {
        viewport_dimension = (float)height();
        viewport_x = (width() - height()) / 2.0f;
    }
    auto const viewport = vk::Viewport()
                              .setX(viewport_x)
                              .setY(viewport_y)
                              .setWidth((float)viewport_dimension)
                              .setHeight((float)viewport_dimension)
                              .setMinDepth((float)0.0f)
                              .setMaxDepth((float)1.0f);
    commandBuffer.setViewport(0, 1, &viewport);

    vk::Rect2D const scissor(vk::Offset2D(0, 0), vk::Extent2D(width(), height()));
    commandBuffer.setScissor(0, 1, &scissor);
    commandBuffer.draw(12 * 3, 1, 0, 0);
    // Note that ending the renderpass changes the image's layout from
    // COLOR_ATTACHMENT_OPTIMAL to PRESENT_SRC_KHR
    commandBuffer.endRenderPass();

    auto releaseImageBarrier =
        vk::ImageMemoryBarrier()
            .setSrcAccessMask(vk::AccessFlagBits::eColorAttachmentWrite) // No shader writes.
            .setOldLayout(vk::ImageLayout::eColorAttachmentOptimal)
            .setSrcQueueFamilyIndex(graphics_queue_family_index)
            .setImage(draw_resources[current_buffer].color_attachment_image)
            .setSubresourceRange(
                vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1));

    if (option.encodingMode == EncodingMode::RGBA) {
        releaseImageBarrier.setDstAccessMask(vk::AccessFlagBits::eTransferRead)
            .setNewLayout(vk::ImageLayout::eTransferSrcOptimal)
            .setDstQueueFamilyIndex(transfer_queue_family_index);
    } else {
        releaseImageBarrier.setDstAccessMask(vk::AccessFlagBits::eShaderRead)
            .setNewLayout(vk::ImageLayout::eShaderReadOnlyOptimal)
            .setDstQueueFamilyIndex(compute_queue_family_index);
    }
    commandBuffer.pipelineBarrier(
        vk::PipelineStageFlagBits::eColorAttachmentOutput, // TODO: Fragment shader?
        vk::PipelineStageFlagBits::eTransfer,
        vk::DependencyFlagBits(),
        0,
        nullptr,
        0,
        nullptr,
        1,
        &releaseImageBarrier);

    result = commandBuffer.end();
    VERIFY(result == vk::Result::eSuccess);
}

void Cube::flush_init_cmd() {
    // TODO: hmm.
    // This function could get called twice if the texture uses a staging
    // buffer
    // In that case the second call should be ignored
    if (!cmd) {
        return;
    }

    auto result = cmd.end();
    VERIFY(result == vk::Result::eSuccess);

    auto const fenceInfo = vk::FenceCreateInfo();
    vk::Fence fence;
    result = device.createFence(&fenceInfo, nullptr, &fence);
    VERIFY(result == vk::Result::eSuccess);

    vk::CommandBuffer const commandBuffers[] = {cmd};
    auto const submitInfo =
        vk::SubmitInfo().setCommandBufferCount(1).setPCommandBuffers(commandBuffers);

    result = graphics_queue.submit(1, &submitInfo, fence);
    VERIFY(result == vk::Result::eSuccess);

    result = device.waitForFences(1, &fence, VK_TRUE, UINT64_MAX);
    VERIFY(result == vk::Result::eSuccess);

    device.freeCommandBuffers(cmd_pool, 1, commandBuffers);
    device.destroyFence(fence, nullptr);

    cmd = vk::CommandBuffer();
}

void Cube::init() {
    vec3 eye = {0.0f, 3.0f, 5.0f};
    vec3 origin = {0, 0, 0};
    vec3 up = {0.0f, 1.0f, 0.0};

    init_vk();

    select_queue_family();

    create_device();

    prepare();

    spin_angle = 4.0f;
    spin_increment = 0.2f;

    mat4x4_perspective(projection_matrix, (float)degreesToRadians(45.0f), 1.0f, 0.1f, 100.0f);
    mat4x4_look_at(view_matrix, eye, origin, up);
    mat4x4_identity(model_matrix);

    projection_matrix[1][1] *= -1; // Flip projection matrix from GL to Vulkan orientation.
}

void Cube::init_vk() {
    bool need_display;
    int32_t blacklisted_gpu_no;
    blacklisted_gpu_no = -1;
    need_display = false;

    vk::QueueFamilyProperties *queue_props_ptr = nullptr;
    init_vulkan_instance_and_phys_device(
        /*in_need_validate=*/option.validate,
        /*in_need_display=*/need_display,
        /*in_force_select_gpu_number=*/option.gpu_number,
        /*in_force_exclude_gpu_number=*/blacklisted_gpu_no,
        /*out_gpu_number=*/&selected_gpu_number,
        &inst,
        &enabled_extension_count,
        extension_names,
        &enabled_layer_count,
        layer_names,
        &gpu,
        &gpu_props,
        &queue_family_count,
        &queue_props_ptr);

    queue_props.reset(queue_props_ptr);

    // Get Memory information and properties
    gpu.getMemoryProperties(&memory_properties);
}

void Cube::select_queue_family() {
    uint32_t graphicsQueueFamilyIndex = UINT32_MAX;
    uint32_t transferQueueFamilyIndex = UINT32_MAX;
    for (uint32_t i = 0; i < queue_family_count; i++) {
        if ((queue_props[i].queueFlags & vk::QueueFlagBits::eTransfer) &&
            !(queue_props[i].queueFlags & vk::QueueFlagBits::eGraphics) &&
            !(queue_props[i].queueFlags & vk::QueueFlagBits::eCompute)) {
            transferQueueFamilyIndex = i;
            break;
        }
    }
    for (uint32_t i = 0; i < queue_family_count; i++) {
        if (queue_props[i].queueFlags & vk::QueueFlagBits::eGraphics) {
            graphicsQueueFamilyIndex = i;
            break;
        }
    }

    if (transferQueueFamilyIndex == UINT32_MAX) {
        ERR_EXIT("Could not find transfer only queues\n", "Device Initialization Failure");
    }

    transfer_queue_family_index = transferQueueFamilyIndex;

    // Generate error if could not find a graphics queue
    if (graphicsQueueFamilyIndex == UINT32_MAX) {
        ERR_EXIT("Could not find graphics queues\n", "Device Initialization Failure");
    }

    graphics_queue_family_index = graphicsQueueFamilyIndex;

    if (option.need_rgb2yuv_conversion_after_rendering()) {
        uint32_t computeQueueFamilyIndex = UINT32_MAX;
        for (uint32_t i = 0; i < queue_family_count; i++) {
            if (queue_props[i].queueFlags & vk::QueueFlagBits::eCompute &&
                !(queue_props[i].queueFlags & vk::QueueFlagBits::eGraphics)) {
                computeQueueFamilyIndex = i;
                break;
            }
        }

        if (computeQueueFamilyIndex == UINT32_MAX) {
            ERR_EXIT("Could not find compute queues\n", "Device Initialization Failure");
        }

        compute_queue_family_index = computeQueueFamilyIndex;
    }
}

void Cube::create_device() {
    float const priorities[1] = {0.0};

    uint32_t queueCount = 0;
    vk::DeviceQueueCreateInfo queues[3];

    queues[queueCount].setQueueFamilyIndex(graphics_queue_family_index);
    queues[queueCount].setQueueCount(1);
    queues[queueCount].setPQueuePriorities(priorities);
    queueCount++;
    if (option.need_rgb2yuv_conversion_after_rendering()) {
        queues[queueCount].setQueueFamilyIndex(compute_queue_family_index);
        queues[queueCount].setQueueCount(1);
        queues[queueCount].setPQueuePriorities(priorities);
        queueCount++;
    }
    queues[queueCount].setQueueFamilyIndex(transfer_queue_family_index);
    queues[queueCount].setQueueCount(1);
    queues[queueCount].setPQueuePriorities(priorities);
    queueCount++;

    auto deviceInfo = vk::DeviceCreateInfo()
                          .setQueueCreateInfoCount(queueCount)
                          .setPQueueCreateInfos(queues)
                          .setEnabledLayerCount(enabled_layer_count)
                          .setPpEnabledLayerNames((const char *const *)layer_names)
                          .setEnabledExtensionCount(enabled_extension_count)
                          .setPpEnabledExtensionNames((const char *const *)extension_names)
                          .setPEnabledFeatures(nullptr);

    auto result = gpu.createDevice(&deviceInfo, nullptr, &device);
    VERIFY(result == vk::Result::eSuccess);

    device.getQueue(graphics_queue_family_index, 0, &graphics_queue);
    if (option.need_rgb2yuv_conversion_after_rendering()) {
        device.getQueue(compute_queue_family_index, 0, &compute_queue);
    }
    device.getQueue(transfer_queue_family_index, 0, &transfer_queue);

    // Create semaphores to synchronize acquiring presentable buffers before
    // rendering and waiting for drawing to be complete before presenting
    auto const semaphoreCreateInfo = vk::SemaphoreCreateInfo();

    // Create fences that we can use to throttle if we get too far
    // ahead of the image presents
    auto const fence_ci = vk::FenceCreateInfo().setFlags(vk::FenceCreateFlagBits::eSignaled);
    fences.reset(new vk::Fence[get_max_frame_number()]);
    for (uint32_t i = 0; i < get_max_frame_number(); i++) {
        result = device.createFence(&fence_ci, nullptr, &fences[i]);
        VERIFY(result == vk::Result::eSuccess);
    }
    frame_index = 0;
}

void Cube::prepare() {
    auto const cmd_pool_info =
        vk::CommandPoolCreateInfo().setQueueFamilyIndex(graphics_queue_family_index);
    auto result = device.createCommandPool(&cmd_pool_info, nullptr, &cmd_pool);
    VERIFY(result == vk::Result::eSuccess);

    auto const cmd = vk::CommandBufferAllocateInfo()
                         .setCommandPool(cmd_pool)
                         .setLevel(vk::CommandBufferLevel::ePrimary)
                         .setCommandBufferCount(1);

    result = device.allocateCommandBuffers(&cmd, &this->cmd);
    VERIFY(result == vk::Result::eSuccess);

    auto const cmd_buf_info = vk::CommandBufferBeginInfo().setPInheritanceInfo(nullptr);

    result = this->cmd.begin(&cmd_buf_info);
    VERIFY(result == vk::Result::eSuccess);

    prepare_render_target_resources();

    prepare_depth();
    prepare_textures();
    prepare_cube_data_buffers();

    prepare_descriptor_layout();
    prepare_render_pass();
    prepare_pipeline();

    for (uint32_t i = 0; i < get_image_count(); ++i) {
        result = device.allocateCommandBuffers(&cmd, &draw_resources[i].cmd);
        VERIFY(result == vk::Result::eSuccess);
    }

    prepare_descriptor_pool();
    prepare_descriptor_set();

    prepare_framebuffers();

    for (uint32_t i = 0; i < get_image_count(); ++i) {
        draw_build_cmd(draw_resources[i].cmd, i);
    }

    /*
     * Prepare functions above may generate pipeline commands
     * that need to be flushed before beginning the render loop.
     */
    flush_init_cmd();
    if (staging_texture.buffer) {
        destroy_texture(&staging_texture);
    }

    prepare_for_color_conversion_and_transfer();

    prepared = true;
}

void Cube::prepare_cube_data_buffers() {
    mat4x4 VP;
    mat4x4_mul(VP, projection_matrix, view_matrix);

    mat4x4 MVP;
    mat4x4_mul(MVP, VP, model_matrix);

    vktexcube_vs_uniform data;
    memcpy(data.mvp, MVP, sizeof(MVP));
    //    dumpMatrix("MVP", MVP)

    for (int32_t i = 0; i < 12 * 3; i++) {
        data.position[i][0] = g_vertex_buffer_data[i * 3];
        data.position[i][1] = g_vertex_buffer_data[i * 3 + 1];
        data.position[i][2] = g_vertex_buffer_data[i * 3 + 2];
        data.position[i][3] = 1.0f;
        data.attr[i][0] = g_uv_buffer_data[2 * i];
        data.attr[i][1] = g_uv_buffer_data[2 * i + 1];
        data.attr[i][2] = 0;
        data.attr[i][3] = 0;
    }

    auto const buf_info = vk::BufferCreateInfo()
                              .setSize(sizeof(data))
                              .setUsage(vk::BufferUsageFlagBits::eUniformBuffer);

    for (unsigned int i = 0; i < get_image_count(); i++) {
        auto result = device.createBuffer(&buf_info, nullptr, &draw_resources[i].uniform_buffer);
        VERIFY(result == vk::Result::eSuccess);

        vk::MemoryRequirements mem_reqs;
        device.getBufferMemoryRequirements(draw_resources[i].uniform_buffer, &mem_reqs);

        auto mem_alloc =
            vk::MemoryAllocateInfo().setAllocationSize(mem_reqs.size).setMemoryTypeIndex(0);

        bool const pass = memory_type_from_properties(memory_properties,
                                                      mem_reqs.memoryTypeBits,
                                                      vk::MemoryPropertyFlagBits::eHostVisible |
                                                          vk::MemoryPropertyFlagBits::eHostCoherent,
                                                      &mem_alloc.memoryTypeIndex);
        VERIFY(pass);

        result = device.allocateMemory(&mem_alloc, nullptr, &draw_resources[i].uniform_memory);
        VERIFY(result == vk::Result::eSuccess);

        result = device.mapMemory(draw_resources[i].uniform_memory,
                                  0,
                                  VK_WHOLE_SIZE,
                                  vk::MemoryMapFlags(),
                                  &draw_resources[i].uniform_memory_ptr);
        VERIFY(result == vk::Result::eSuccess);

        memcpy(draw_resources[i].uniform_memory_ptr, &data, sizeof data);

        result = device.bindBufferMemory(
            draw_resources[i].uniform_buffer, draw_resources[i].uniform_memory, 0);
        VERIFY(result == vk::Result::eSuccess);
    }
}

void Cube::prepare_depth() {
    depth.format = vk::Format::eD16Unorm;

    auto const image = vk::ImageCreateInfo()
                           .setImageType(vk::ImageType::e2D)
                           .setFormat(depth.format)
                           .setExtent({(uint32_t)width(), (uint32_t)height(), 1})
                           .setMipLevels(1)
                           .setArrayLayers(1)
                           .setSamples(vk::SampleCountFlagBits::e1)
                           .setTiling(vk::ImageTiling::eOptimal)
                           .setUsage(vk::ImageUsageFlagBits::eDepthStencilAttachment)
                           .setSharingMode(vk::SharingMode::eExclusive)
                           .setQueueFamilyIndexCount(0)
                           .setPQueueFamilyIndices(nullptr)
                           .setInitialLayout(vk::ImageLayout::eUndefined);

    auto result = device.createImage(&image, nullptr, &depth.image);
    VERIFY(result == vk::Result::eSuccess);

    vk::MemoryRequirements mem_reqs;
    device.getImageMemoryRequirements(depth.image, &mem_reqs);

    depth.mem_alloc.setAllocationSize(mem_reqs.size);
    depth.mem_alloc.setMemoryTypeIndex(0);

    auto const pass = memory_type_from_properties(memory_properties,
                                                  mem_reqs.memoryTypeBits,
                                                  vk::MemoryPropertyFlagBits::eDeviceLocal,
                                                  &depth.mem_alloc.memoryTypeIndex);
    VERIFY(pass);

    result = device.allocateMemory(&depth.mem_alloc, nullptr, &depth.mem);
    VERIFY(result == vk::Result::eSuccess);

    result = device.bindImageMemory(depth.image, depth.mem, 0);
    VERIFY(result == vk::Result::eSuccess);

    auto const view = vk::ImageViewCreateInfo()
                          .setImage(depth.image)
                          .setViewType(vk::ImageViewType::e2D)
                          .setFormat(depth.format)
                          .setSubresourceRange(vk::ImageSubresourceRange(
                              vk::ImageAspectFlagBits::eDepth, 0, 1, 0, 1));
    result = device.createImageView(&view, nullptr, &depth.view);
    VERIFY(result == vk::Result::eSuccess);
}

void Cube::prepare_descriptor_layout() {
    vk::DescriptorSetLayoutBinding const layout_bindings[2] = {
        vk::DescriptorSetLayoutBinding()
            .setBinding(0)
            .setDescriptorType(vk::DescriptorType::eUniformBuffer)
            .setDescriptorCount(1)
            .setStageFlags(vk::ShaderStageFlagBits::eVertex)
            .setPImmutableSamplers(nullptr),
        vk::DescriptorSetLayoutBinding()
            .setBinding(1)
            .setDescriptorType(vk::DescriptorType::eCombinedImageSampler)
            .setDescriptorCount(texture_count)
            .setStageFlags(vk::ShaderStageFlagBits::eFragment)
            .setPImmutableSamplers(nullptr)};

    auto const descriptor_layout =
        vk::DescriptorSetLayoutCreateInfo().setBindingCount(2).setPBindings(layout_bindings);

    auto result = device.createDescriptorSetLayout(&descriptor_layout, nullptr, &desc_layout);
    VERIFY(result == vk::Result::eSuccess);

    auto const pPipelineLayoutCreateInfo =
        vk::PipelineLayoutCreateInfo().setSetLayoutCount(1).setPSetLayouts(&desc_layout);

    result = device.createPipelineLayout(&pPipelineLayoutCreateInfo, nullptr, &pipeline_layout);
    VERIFY(result == vk::Result::eSuccess);
}

void Cube::prepare_descriptor_pool() {
    const uint32_t activeImageCount = get_image_count();
    vk::DescriptorPoolSize const poolSizes[2] = {
        vk::DescriptorPoolSize()
            .setType(vk::DescriptorType::eUniformBuffer)
            .setDescriptorCount(activeImageCount),
        vk::DescriptorPoolSize()
            .setType(vk::DescriptorType::eCombinedImageSampler)
            .setDescriptorCount(activeImageCount * texture_count)};

    auto const descriptor_pool = vk::DescriptorPoolCreateInfo()
                                     .setMaxSets(activeImageCount)
                                     .setPoolSizeCount(2)
                                     .setPPoolSizes(poolSizes);

    auto result = device.createDescriptorPool(&descriptor_pool, nullptr, &desc_pool);
    VERIFY(result == vk::Result::eSuccess);
}

void Cube::prepare_descriptor_set() {
    auto const alloc_info = vk::DescriptorSetAllocateInfo()
                                .setDescriptorPool(desc_pool)
                                .setDescriptorSetCount(1)
                                .setPSetLayouts(&desc_layout);

    auto buffer_info =
        vk::DescriptorBufferInfo().setOffset(0).setRange(sizeof(struct vktexcube_vs_uniform));

    vk::DescriptorImageInfo tex_descs[texture_count];
    for (uint32_t i = 0; i < texture_count; i++) {
        tex_descs[i].setSampler(textures[i].sampler);
        tex_descs[i].setImageView(textures[i].view);
        tex_descs[i].setImageLayout(vk::ImageLayout::eShaderReadOnlyOptimal);
    }

    vk::WriteDescriptorSet writes[2];

    writes[0].setDstBinding(0);
    writes[0].setDescriptorCount(1);
    writes[0].setDescriptorType(vk::DescriptorType::eUniformBuffer);
    writes[0].setPBufferInfo(&buffer_info);

    writes[1].setDstBinding(1);
    writes[1].setDescriptorCount(texture_count);
    writes[1].setDescriptorType(vk::DescriptorType::eCombinedImageSampler);
    writes[1].setPImageInfo(tex_descs);

    for (unsigned int i = 0; i < get_image_count(); i++) {
        auto result = device.allocateDescriptorSets(&alloc_info, &draw_resources[i].descriptor_set);
        VERIFY(result == vk::Result::eSuccess);

        buffer_info.setBuffer(draw_resources[i].uniform_buffer);
        writes[0].setDstSet(draw_resources[i].descriptor_set);
        writes[1].setDstSet(draw_resources[i].descriptor_set);
        device.updateDescriptorSets(2, writes, 0, nullptr);
    }
}

void Cube::prepare_framebuffers() {
    vk::ImageView attachments[2];
    attachments[1] = depth.view;

    auto const fb_info = vk::FramebufferCreateInfo()
                             .setRenderPass(render_pass)
                             .setAttachmentCount(2)
                             .setPAttachments(attachments)
                             .setWidth((uint32_t)width())
                             .setHeight((uint32_t)height())
                             .setLayers(1);

    for (uint32_t i = 0; i < get_image_count(); i++) {
        attachments[0] = get_render_target_image_view(i);
        auto const result =
            device.createFramebuffer(&fb_info, nullptr, &draw_resources[i].framebuffer);
        VERIFY(result == vk::Result::eSuccess);
    }
}

vk::ShaderModule Cube::prepare_fs() {
    const uint32_t fragShaderCode[] = {
#include "cube.frag.inc"
    };

    frag_shader_module = prepare_shader_module(fragShaderCode, sizeof(fragShaderCode));

    return frag_shader_module;
}

void Cube::prepare_pipeline() {
    vk::PipelineCacheCreateInfo const pipelineCacheInfo;
    auto result = device.createPipelineCache(&pipelineCacheInfo, nullptr, &pipelineCache);
    VERIFY(result == vk::Result::eSuccess);

    vk::PipelineShaderStageCreateInfo const shaderStageInfo[2] = {
        vk::PipelineShaderStageCreateInfo()
            .setStage(vk::ShaderStageFlagBits::eVertex)
            .setModule(prepare_vs())
            .setPName("main"),
        vk::PipelineShaderStageCreateInfo()
            .setStage(vk::ShaderStageFlagBits::eFragment)
            .setModule(prepare_fs())
            .setPName("main")};

    vk::PipelineVertexInputStateCreateInfo const vertexInputInfo;

    auto const inputAssemblyInfo = vk::PipelineInputAssemblyStateCreateInfo().setTopology(
        vk::PrimitiveTopology::eTriangleList);

    // TODO: Where are pViewports and pScissors set?
    auto const viewportInfo =
        vk::PipelineViewportStateCreateInfo().setViewportCount(1).setScissorCount(1);

    auto const rasterizationInfo = vk::PipelineRasterizationStateCreateInfo()
                                       .setDepthClampEnable(VK_FALSE)
                                       .setRasterizerDiscardEnable(VK_FALSE)
                                       .setPolygonMode(vk::PolygonMode::eFill)
                                       .setCullMode(vk::CullModeFlagBits::eBack)
                                       .setFrontFace(vk::FrontFace::eCounterClockwise)
                                       .setDepthBiasEnable(VK_FALSE)
                                       .setLineWidth(1.0f);

    auto const multisampleInfo = vk::PipelineMultisampleStateCreateInfo();

    auto const stencilOp = vk::StencilOpState()
                               .setFailOp(vk::StencilOp::eKeep)
                               .setPassOp(vk::StencilOp::eKeep)
                               .setCompareOp(vk::CompareOp::eAlways);

    auto const depthStencilInfo = vk::PipelineDepthStencilStateCreateInfo()
                                      .setDepthTestEnable(VK_TRUE)
                                      .setDepthWriteEnable(VK_TRUE)
                                      .setDepthCompareOp(vk::CompareOp::eLessOrEqual)
                                      .setDepthBoundsTestEnable(VK_FALSE)
                                      .setStencilTestEnable(VK_FALSE)
                                      .setFront(stencilOp)
                                      .setBack(stencilOp);

    vk::PipelineColorBlendAttachmentState const colorBlendAttachments[1] = {
        vk::PipelineColorBlendAttachmentState().setColorWriteMask(
            vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
            vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA)};

    auto const colorBlendInfo =
        vk::PipelineColorBlendStateCreateInfo().setAttachmentCount(1).setPAttachments(
            colorBlendAttachments);

    vk::DynamicState const dynamicStates[2] = {vk::DynamicState::eViewport,
                                               vk::DynamicState::eScissor};

    auto const dynamicStateInfo = vk::PipelineDynamicStateCreateInfo()
                                      .setPDynamicStates(dynamicStates)
                                      .setDynamicStateCount(2);

    auto const pipeline = vk::GraphicsPipelineCreateInfo()
                              .setStageCount(2)
                              .setPStages(shaderStageInfo)
                              .setPVertexInputState(&vertexInputInfo)
                              .setPInputAssemblyState(&inputAssemblyInfo)
                              .setPViewportState(&viewportInfo)
                              .setPRasterizationState(&rasterizationInfo)
                              .setPMultisampleState(&multisampleInfo)
                              .setPDepthStencilState(&depthStencilInfo)
                              .setPColorBlendState(&colorBlendInfo)
                              .setPDynamicState(&dynamicStateInfo)
                              .setLayout(pipeline_layout)
                              .setRenderPass(render_pass);

    result = device.createGraphicsPipelines(pipelineCache, 1, &pipeline, nullptr, &this->pipeline);
    VERIFY(result == vk::Result::eSuccess);

    device.destroyShaderModule(frag_shader_module, nullptr);
    device.destroyShaderModule(vert_shader_module, nullptr);
}

void Cube::prepare_render_pass() {
    // The initial layout for the color and depth attachments will be LAYOUT_UNDEFINED
    // because at the start of the renderpass, we don't care about their contents.
    // At the start of the subpass, the color attachment's layout will be transitioned
    // to LAYOUT_COLOR_ATTACHMENT_OPTIMAL and the depth stencil attachment's layout
    // will be transitioned to LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL.  At the end of
    // the renderpass, the color attachment's layout will be transitioned to
    // LAYOUT_PRESENT_SRC_KHR to be ready to present.  This is all done as part of
    // the renderpass, no barriers are necessary.
    vk::ImageLayout finalLayout;
    finalLayout = vk::ImageLayout::eColorAttachmentOptimal;
    const vk::AttachmentDescription attachments[2] = {
        vk::AttachmentDescription()
            .setFormat(get_render_target_format())
            .setSamples(vk::SampleCountFlagBits::e1)
            .setLoadOp(vk::AttachmentLoadOp::eClear)
            .setStoreOp(vk::AttachmentStoreOp::eStore)
            .setStencilLoadOp(vk::AttachmentLoadOp::eDontCare)
            .setStencilStoreOp(vk::AttachmentStoreOp::eDontCare)
            .setInitialLayout(vk::ImageLayout::eUndefined)
            .setFinalLayout(finalLayout),
        vk::AttachmentDescription()
            .setFormat(depth.format)
            .setSamples(vk::SampleCountFlagBits::e1)
            .setLoadOp(vk::AttachmentLoadOp::eClear)
            .setStoreOp(vk::AttachmentStoreOp::eDontCare)
            .setStencilLoadOp(vk::AttachmentLoadOp::eDontCare)
            .setStencilStoreOp(vk::AttachmentStoreOp::eDontCare)
            .setInitialLayout(vk::ImageLayout::eUndefined)
            .setFinalLayout(vk::ImageLayout::eDepthStencilAttachmentOptimal)};

    auto const color_reference = vk::AttachmentReference().setAttachment(0).setLayout(
        vk::ImageLayout::eColorAttachmentOptimal);

    auto const depth_reference = vk::AttachmentReference().setAttachment(1).setLayout(
        vk::ImageLayout::eDepthStencilAttachmentOptimal);

    auto const subpass = vk::SubpassDescription()
                             .setPipelineBindPoint(vk::PipelineBindPoint::eGraphics)
                             .setInputAttachmentCount(0)
                             .setPInputAttachments(nullptr)
                             .setColorAttachmentCount(1)
                             .setPColorAttachments(&color_reference)
                             .setPResolveAttachments(nullptr)
                             .setPDepthStencilAttachment(&depth_reference)
                             .setPreserveAttachmentCount(0)
                             .setPPreserveAttachments(nullptr);

    vk::PipelineStageFlags stages = vk::PipelineStageFlagBits::eEarlyFragmentTests |
                                    vk::PipelineStageFlagBits::eLateFragmentTests;
    vk::SubpassDependency const dependencies[2] = {
        vk::SubpassDependency() // Depth buffer is shared between swapchain images
            .setSrcSubpass(VK_SUBPASS_EXTERNAL)
            .setDstSubpass(0)
            .setSrcStageMask(stages)
            .setDstStageMask(stages)
            .setSrcAccessMask(vk::AccessFlagBits::eDepthStencilAttachmentWrite)
            .setDstAccessMask(vk::AccessFlagBits::eDepthStencilAttachmentRead |
                              vk::AccessFlagBits::eDepthStencilAttachmentWrite)
            .setDependencyFlags(vk::DependencyFlags()),
        vk::SubpassDependency() // Image layout transition
            .setSrcSubpass(VK_SUBPASS_EXTERNAL)
            .setDstSubpass(0)
            .setSrcStageMask(vk::PipelineStageFlagBits::eColorAttachmentOutput)
            .setDstStageMask(vk::PipelineStageFlagBits::eColorAttachmentOutput)
            .setSrcAccessMask(vk::AccessFlagBits())
            .setDstAccessMask(vk::AccessFlagBits::eColorAttachmentWrite |
                              vk::AccessFlagBits::eColorAttachmentRead)
            .setDependencyFlags(vk::DependencyFlags()),
    };

    auto const rp_info = vk::RenderPassCreateInfo()
                             .setAttachmentCount(2)
                             .setPAttachments(attachments)
                             .setSubpassCount(1)
                             .setPSubpasses(&subpass)
                             .setDependencyCount(2)
                             .setPDependencies(dependencies);

    auto result = device.createRenderPass(&rp_info, nullptr, &render_pass);
    VERIFY(result == vk::Result::eSuccess);
}

vk::ShaderModule Cube::prepare_shader_module(const uint32_t *code, size_t size) {
    const auto moduleCreateInfo = vk::ShaderModuleCreateInfo().setCodeSize(size).setPCode(code);

    vk::ShaderModule module;
    auto result = device.createShaderModule(&moduleCreateInfo, nullptr, &module);
    VERIFY(result == vk::Result::eSuccess);

    return module;
}

void Cube::prepare_texture_buffer(const char *filename, texture_object *tex_obj) {
    int32_t tex_width;
    int32_t tex_height;

    if (!loadTexture(filename, NULL, NULL, &tex_width, &tex_height)) {
        ERR_EXIT("Failed to load textures", "Load Texture Failure");
    }

    tex_obj->tex_width = tex_width;
    tex_obj->tex_height = tex_height;

    auto const buffer_create_info = vk::BufferCreateInfo()
                                        .setSize(tex_width * tex_height * 4)
                                        .setUsage(vk::BufferUsageFlagBits::eTransferSrc)
                                        .setSharingMode(vk::SharingMode::eExclusive)
                                        .setQueueFamilyIndexCount(0)
                                        .setPQueueFamilyIndices(nullptr);

    auto result = device.createBuffer(&buffer_create_info, nullptr, &tex_obj->buffer);
    VERIFY(result == vk::Result::eSuccess);

    vk::MemoryRequirements mem_reqs;
    device.getBufferMemoryRequirements(tex_obj->buffer, &mem_reqs);

    tex_obj->mem_alloc.setAllocationSize(mem_reqs.size);
    tex_obj->mem_alloc.setMemoryTypeIndex(0);

    vk::MemoryPropertyFlags requirements =
        vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent;
    auto pass = memory_type_from_properties(memory_properties,
                                            mem_reqs.memoryTypeBits,
                                            requirements,
                                            &tex_obj->mem_alloc.memoryTypeIndex);
    VERIFY(pass == true);

    result = device.allocateMemory(&tex_obj->mem_alloc, nullptr, &(tex_obj->mem));
    VERIFY(result == vk::Result::eSuccess);

    result = device.bindBufferMemory(tex_obj->buffer, tex_obj->mem, 0);
    VERIFY(result == vk::Result::eSuccess);

    vk::SubresourceLayout layout;
    layout.rowPitch = tex_width * 4;
    auto data = device.mapMemory(tex_obj->mem, 0, tex_obj->mem_alloc.allocationSize);
    VERIFY(data.result == vk::Result::eSuccess);

    if (!loadTexture(filename, (uint8_t *)data.value, &layout, &tex_width, &tex_height)) {
        fprintf(stderr, "Error loading texture: %s\n", filename);
    }

    device.unmapMemory(tex_obj->mem);
}

void Cube::prepare_texture_image(const char *filename,
                                 texture_object *tex_obj,
                                 vk::ImageTiling tiling,
                                 vk::ImageUsageFlags usage,
                                 vk::MemoryPropertyFlags required_props) {
    int32_t tex_width;
    int32_t tex_height;
    if (!loadTexture(filename, nullptr, nullptr, &tex_width, &tex_height)) {
        ERR_EXIT("Failed to load textures", "Load Texture Failure");
    }

    tex_obj->tex_width = tex_width;
    tex_obj->tex_height = tex_height;

    auto const image_create_info = vk::ImageCreateInfo()
                                       .setImageType(vk::ImageType::e2D)
                                       .setFormat(vk::Format::eR8G8B8A8Unorm)
                                       .setExtent({(uint32_t)tex_width, (uint32_t)tex_height, 1})
                                       .setMipLevels(1)
                                       .setArrayLayers(1)
                                       .setSamples(vk::SampleCountFlagBits::e1)
                                       .setTiling(tiling)
                                       .setUsage(usage)
                                       .setSharingMode(vk::SharingMode::eExclusive)
                                       .setQueueFamilyIndexCount(0)
                                       .setPQueueFamilyIndices(nullptr)
                                       .setInitialLayout(vk::ImageLayout::ePreinitialized);

    auto result = device.createImage(&image_create_info, nullptr, &tex_obj->image);
    VERIFY(result == vk::Result::eSuccess);

    vk::MemoryRequirements mem_reqs;
    device.getImageMemoryRequirements(tex_obj->image, &mem_reqs);

    tex_obj->mem_alloc.setAllocationSize(mem_reqs.size);
    tex_obj->mem_alloc.setMemoryTypeIndex(0);

    auto pass = memory_type_from_properties(memory_properties,
                                            mem_reqs.memoryTypeBits,
                                            required_props,
                                            &tex_obj->mem_alloc.memoryTypeIndex);
    VERIFY(pass == true);

    result = device.allocateMemory(&tex_obj->mem_alloc, nullptr, &(tex_obj->mem));
    VERIFY(result == vk::Result::eSuccess);

    result = device.bindImageMemory(tex_obj->image, tex_obj->mem, 0);
    VERIFY(result == vk::Result::eSuccess);

    if (required_props & vk::MemoryPropertyFlagBits::eHostVisible) {
        auto const subres = vk::ImageSubresource()
                                .setAspectMask(vk::ImageAspectFlagBits::eColor)
                                .setMipLevel(0)
                                .setArrayLayer(0);
        vk::SubresourceLayout layout;
        device.getImageSubresourceLayout(tex_obj->image, &subres, &layout);

        auto data = device.mapMemory(tex_obj->mem, 0, tex_obj->mem_alloc.allocationSize);
        VERIFY(data.result == vk::Result::eSuccess);

        if (!loadTexture(filename, (uint8_t *)data.value, &layout, &tex_width, &tex_height)) {
            fprintf(stderr, "Error loading texture: %s\n", filename);
        }

        device.unmapMemory(tex_obj->mem);
    }

    tex_obj->imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal;
}

void Cube::prepare_textures() {
    vk::Format const tex_format = vk::Format::eR8G8B8A8Unorm;
    vk::FormatProperties props;
    gpu.getFormatProperties(tex_format, &props);

    for (uint32_t i = 0; i < texture_count; i++) {
        if ((props.linearTilingFeatures & vk::FormatFeatureFlagBits::eSampledImage) &&
            !option.use_staging_buffer) {
            /* Device can texture using linear textures */
            prepare_texture_image(tex_files[i],
                                  &textures[i],
                                  vk::ImageTiling::eLinear,
                                  vk::ImageUsageFlagBits::eSampled,
                                  vk::MemoryPropertyFlagBits::eHostVisible |
                                      vk::MemoryPropertyFlagBits::eHostCoherent);
            // Nothing in the pipeline needs to be complete to start, and don't allow fragment
            // shader to run until layout transition completes
            set_image_layout(textures[i].image,
                             vk::ImageAspectFlagBits::eColor,
                             vk::ImageLayout::ePreinitialized,
                             textures[i].imageLayout,
                             vk::AccessFlagBits(),
                             vk::PipelineStageFlagBits::eTopOfPipe,
                             vk::PipelineStageFlagBits::eFragmentShader);
            staging_texture.image = vk::Image();
        } else if (props.optimalTilingFeatures & vk::FormatFeatureFlagBits::eSampledImage) {
            /* Must use staging buffer to copy linear texture to optimized */

            prepare_texture_buffer(tex_files[i], &staging_texture);

            prepare_texture_image(tex_files[i],
                                  &textures[i],
                                  vk::ImageTiling::eOptimal,
                                  vk::ImageUsageFlagBits::eTransferDst |
                                      vk::ImageUsageFlagBits::eSampled,
                                  vk::MemoryPropertyFlagBits::eDeviceLocal);

            set_image_layout(textures[i].image,
                             vk::ImageAspectFlagBits::eColor,
                             vk::ImageLayout::ePreinitialized,
                             vk::ImageLayout::eTransferDstOptimal,
                             vk::AccessFlagBits(),
                             vk::PipelineStageFlagBits::eTopOfPipe,
                             vk::PipelineStageFlagBits::eTransfer);

            auto const subresource = vk::ImageSubresourceLayers()
                                         .setAspectMask(vk::ImageAspectFlagBits::eColor)
                                         .setMipLevel(0)
                                         .setBaseArrayLayer(0)
                                         .setLayerCount(1);

            auto const copy_region = vk::BufferImageCopy()
                                         .setBufferOffset(0)
                                         .setBufferRowLength(staging_texture.tex_width)
                                         .setBufferImageHeight(staging_texture.tex_height)
                                         .setImageSubresource(subresource)
                                         .setImageOffset({0, 0, 0})
                                         .setImageExtent({(uint32_t)staging_texture.tex_width,
                                                          (uint32_t)staging_texture.tex_height,
                                                          1});

            cmd.copyBufferToImage(staging_texture.buffer,
                                  textures[i].image,
                                  vk::ImageLayout::eTransferDstOptimal,
                                  1,
                                  &copy_region);

            set_image_layout(textures[i].image,
                             vk::ImageAspectFlagBits::eColor,
                             vk::ImageLayout::eTransferDstOptimal,
                             textures[i].imageLayout,
                             vk::AccessFlagBits::eTransferWrite,
                             vk::PipelineStageFlagBits::eTransfer,
                             vk::PipelineStageFlagBits::eFragmentShader);
        } else {
            assert(!"No support for R8G8B8A8_UNORM as texture image format");
        }

        auto const samplerInfo = vk::SamplerCreateInfo()
                                     .setMagFilter(vk::Filter::eNearest)
                                     .setMinFilter(vk::Filter::eNearest)
                                     .setMipmapMode(vk::SamplerMipmapMode::eNearest)
                                     .setAddressModeU(vk::SamplerAddressMode::eClampToEdge)
                                     .setAddressModeV(vk::SamplerAddressMode::eClampToEdge)
                                     .setAddressModeW(vk::SamplerAddressMode::eClampToEdge)
                                     .setMipLodBias(0.0f)
                                     .setAnisotropyEnable(VK_FALSE)
                                     .setMaxAnisotropy(1)
                                     .setCompareEnable(VK_FALSE)
                                     .setCompareOp(vk::CompareOp::eNever)
                                     .setMinLod(0.0f)
                                     .setMaxLod(0.0f)
                                     .setBorderColor(vk::BorderColor::eFloatOpaqueWhite)
                                     .setUnnormalizedCoordinates(VK_FALSE);

        auto result = device.createSampler(&samplerInfo, nullptr, &textures[i].sampler);
        VERIFY(result == vk::Result::eSuccess);

        auto const viewInfo = vk::ImageViewCreateInfo()
                                  .setImage(textures[i].image)
                                  .setViewType(vk::ImageViewType::e2D)
                                  .setFormat(tex_format)
                                  .setSubresourceRange(vk::ImageSubresourceRange(
                                      vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1));

        result = device.createImageView(&viewInfo, nullptr, &textures[i].view);
        VERIFY(result == vk::Result::eSuccess);
    }
}

vk::ShaderModule Cube::prepare_vs() {
    const uint32_t vertShaderCode[] = {
#include "cube.vert.inc"
    };

    vert_shader_module = prepare_shader_module(vertShaderCode, sizeof(vertShaderCode));

    return vert_shader_module;
}

void Cube::set_image_layout(vk::Image image,
                            vk::ImageAspectFlags aspectMask,
                            vk::ImageLayout oldLayout,
                            vk::ImageLayout newLayout,
                            vk::AccessFlags srcAccessMask,
                            vk::PipelineStageFlags src_stages,
                            vk::PipelineStageFlags dest_stages) {
    assert(cmd);

    auto DstAccessMask = [](vk::ImageLayout const &layout) {
        vk::AccessFlags flags;

        switch (layout) {
        case vk::ImageLayout::eTransferDstOptimal:
            // Make sure anything that was copying from this image has
            // completed
            flags = vk::AccessFlagBits::eTransferWrite;
            break;
        case vk::ImageLayout::eColorAttachmentOptimal:
            flags = vk::AccessFlagBits::eColorAttachmentWrite;
            break;
        case vk::ImageLayout::eDepthStencilAttachmentOptimal:
            flags = vk::AccessFlagBits::eDepthStencilAttachmentWrite;
            break;
        case vk::ImageLayout::eShaderReadOnlyOptimal:
            // Make sure any Copy or CPU writes to image are flushed
            flags = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eInputAttachmentRead;
            break;
        case vk::ImageLayout::eTransferSrcOptimal:
            flags = vk::AccessFlagBits::eTransferRead;
            break;
        case vk::ImageLayout::ePresentSrcKHR:
            flags = vk::AccessFlagBits::eMemoryRead;
            break;
        default:
            break;
        }

        return flags;
    };

    auto const barrier =
        vk::ImageMemoryBarrier()
            .setSrcAccessMask(srcAccessMask)
            .setDstAccessMask(DstAccessMask(newLayout))
            .setOldLayout(oldLayout)
            .setNewLayout(newLayout)
            .setSrcQueueFamilyIndex(VK_QUEUE_FAMILY_IGNORED)
            .setDstQueueFamilyIndex(VK_QUEUE_FAMILY_IGNORED)
            .setImage(image)
            .setSubresourceRange(vk::ImageSubresourceRange(aspectMask, 0, 1, 0, 1));

    cmd.pipelineBarrier(
        src_stages, dest_stages, vk::DependencyFlagBits(), 0, nullptr, 0, nullptr, 1, &barrier);
}

void Cube::update_data_buffer(ResourceIndex active_index) {
    mat4x4 VP;
    mat4x4_mul(VP, projection_matrix, view_matrix);

    // Rotate around the Y axis
    mat4x4 Model;
    mat4x4_dup(Model, model_matrix);
    mat4x4_rotate_Y(model_matrix, Model, (float)degreesToRadians(spin_angle));
    mat4x4_orthonormalize(model_matrix, model_matrix);

    mat4x4 MVP;
    mat4x4_mul(MVP, VP, model_matrix);

    memcpy(draw_resources[active_index].uniform_memory_ptr, (const void *)&MVP[0][0], sizeof(MVP));
}

/* Convert ppm image data from header file into RGBA texture image */
#include "lunarg.ppm.h"
bool Cube::loadTexture(const char *filename,
                       uint8_t *rgba_data,
                       vk::SubresourceLayout *layout,
                       int32_t *width,
                       int32_t *height) {
    (void)filename;
    char *cPtr;
    cPtr = (char *)lunarg_ppm;
    if ((unsigned char *)cPtr >= (lunarg_ppm + lunarg_ppm_len) || strncmp(cPtr, "P6\n", 3)) {
        return false;
    }
    while (strncmp(cPtr++, "\n", 1))
        ;
    sscanf(cPtr, "%u %u", width, height);
    if (rgba_data == NULL) {
        return true;
    }
    while (strncmp(cPtr++, "\n", 1))
        ;
    if ((unsigned char *)cPtr >= (lunarg_ppm + lunarg_ppm_len) || strncmp(cPtr, "255\n", 4)) {
        return false;
    }
    while (strncmp(cPtr++, "\n", 1))
        ;
    for (int y = 0; y < *height; y++) {
        uint8_t *rowPtr = rgba_data;
        for (int x = 0; x < *width; x++) {
            memcpy(rowPtr, cPtr, 3);
            rowPtr[3] = 255; /* Alpha of 1 */
            rowPtr += 4;
            cPtr += 3;
        }
        rgba_data += layout->rowPitch;
    }
    return true;
}
