#include <sys/mman.h>

#include <fcntl.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <linux/dma-buf.h>
#include <sys/ioctl.h>
#include <gbm.h>

#include <string.h>
#include <stdio.h>

#include <iostream>
#include <string>
#include <vector>
#include <unordered_set>
#include <unordered_map>

#include <EGL/egl.h>
#include <EGL/eglext.h>
#include <GLES2/gl2.h>
#include <GLES2/gl2ext.h>

#include <GLES3/gl3.h>
#include <GLES3/gl3ext.h>

#include <ni_p2p_ioctl.h>
#include <ni_device_api.h>
#include <ni_av_codec.h>
#include <ni_util.h>
#include <ni_rsrc_api.h>


#define NI_DO_SCALE_HWDOWNLOAD 1

#if defined(NI_DO_SCALE_HWDOWNLOAD) && NI_DO_SCALE_HWDOWNLOAD == 1
static const std::unordered_map<ni_pix_fmt_t, int> ni_fix_format_to_gc620{
    {NI_PIX_FMT_NV12, GC620_NV12},
    {NI_PIX_FMT_YUV420P, GC620_I420},
    {NI_PIX_FMT_P010LE, GC620_P010_MSB},
    {NI_PIX_FMT_YUV420P10LE, GC620_I010},
    {NI_PIX_FMT_YUYV422, GC620_YUYV},
    {NI_PIX_FMT_UYVY422, GC620_UYVY},
    {NI_PIX_FMT_NV16, GC620_NV16},
    {NI_PIX_FMT_RGBA, GC620_RGBA8888},
    {NI_PIX_FMT_BGR0, GC620_BGRX8888},
    {NI_PIX_FMT_BGRA, GC620_BGRA8888},
    {NI_PIX_FMT_ABGR, GC620_ABGR8888},
    {NI_PIX_FMT_ARGB, GC620_ARGB8888},
    {NI_PIX_FMT_BGRP, GC620_RGB888_PLANAR},
};

static const std::unordered_map<int, ni_pix_fmt_t> gc620_to_ni_pix_fmt{
    {GC620_NV12, NI_PIX_FMT_NV12},
    {GC620_I420, NI_PIX_FMT_YUV420P},
    {GC620_P010_MSB, NI_PIX_FMT_P010LE},
    {GC620_I010, NI_PIX_FMT_YUV420P10LE},
    {GC620_YUYV, NI_PIX_FMT_YUYV422},
    {GC620_UYVY, NI_PIX_FMT_UYVY422},
    {GC620_NV16, NI_PIX_FMT_NV16},
    {GC620_RGBA8888, NI_PIX_FMT_RGBA},
    {GC620_BGRX8888, NI_PIX_FMT_BGR0},
    {GC620_BGRA8888, NI_PIX_FMT_BGRA},
    {GC620_ABGR8888, NI_PIX_FMT_ABGR},
    {GC620_ARGB8888, NI_PIX_FMT_ARGB},
    {GC620_RGB888_PLANAR, NI_PIX_FMT_BGRP},
};

static int hwdl_frame(ni_session_context_t *p_ctx,
                      ni_session_data_io_t *p_session_data, ni_frame_t *p_src_frame,
                      int output_format)
{
    niFrameSurface1_t *src_surf = (niFrameSurface1_t *)(p_src_frame->p_data[3]);
    int ret = 0;
    int pixel_format;

    switch (output_format)
    {
    case GC620_I420:
        pixel_format = NI_PIX_FMT_YUV420P;
        break;
    case GC620_RGBA8888:
        pixel_format = NI_PIX_FMT_RGBA;
        break;
    case GC620_RGB888_PLANAR:
        pixel_format = NI_PIX_FMT_BGRP;
        break;
    default:
        ni_log(NI_LOG_ERROR, "Pixel format not supported.\n");
        return NI_RETCODE_INVALID_PARAM;
    }

    ret = ni_frame_buffer_alloc_dl(&(p_session_data->data.frame),
                                   src_surf->ui16width, src_surf->ui16height,
                                   pixel_format);

    if (ret != NI_RETCODE_SUCCESS)
    {
        return NI_RETCODE_ERROR_MEM_ALOC;
    }

    p_ctx->is_auto_dl = false;
    ret = ni_device_session_hwdl(p_ctx, p_session_data, src_surf);
    if (ret <= 0)
    {
        ni_frame_buffer_free(&p_session_data->data.frame);
        return ret;
    }
    return ret;
}


static void init_2D_params(ni_scaler_input_params_t *p_scaler_params,
                           ni_scaler_opcode_t op, int in_rec_width,
                           int in_rec_height, int in_rec_x, int in_rec_y,
                           int out_rec_x, int out_rec_y)
{
    p_scaler_params->op = op;
    // input_format/width/height, output_format/width/height should be assigned by users for all ops
    if (op == NI_SCALER_OPCODE_CROP)
    {
        // fixed numbers
        p_scaler_params->out_rec_width = 0;
        p_scaler_params->out_rec_height = 0;
        p_scaler_params->out_rec_x = 0;
        p_scaler_params->out_rec_y = 0;
        p_scaler_params->rgba_color = 0;

        // params set by user
        p_scaler_params->in_rec_width = in_rec_width;
        p_scaler_params->in_rec_height = in_rec_height;
        p_scaler_params->in_rec_x = in_rec_x;
        p_scaler_params->in_rec_y = in_rec_y;
    }
    else if (op == NI_SCALER_OPCODE_SCALE)
    {
        // fixed params
        p_scaler_params->in_rec_width = 0;
        p_scaler_params->in_rec_height = 0;
        p_scaler_params->in_rec_x = 0;
        p_scaler_params->in_rec_y = 0;

        p_scaler_params->out_rec_width = 0;
        p_scaler_params->out_rec_height = 0;
        p_scaler_params->out_rec_x = 0;
        p_scaler_params->out_rec_y = 0;

        p_scaler_params->rgba_color = 0;
    }
    else if (op == NI_SCALER_OPCODE_PAD)
    {
        // fixed params
        p_scaler_params->in_rec_width = p_scaler_params->input_width;
        p_scaler_params->in_rec_height = p_scaler_params->input_height;
        p_scaler_params->in_rec_x = 0;
        p_scaler_params->in_rec_y = 0;

        p_scaler_params->out_rec_width = p_scaler_params->input_width;
        p_scaler_params->out_rec_height = p_scaler_params->input_height;

        /*
            Scaler uses BGRA color, or ARGB in little-endian
            ui32RgbaColor = (s->rgba_color[3] << 24) | (s->rgba_color[0] << 16) |
                            (s->rgba_color[1] << 8) | s->rgba_color[2];
            here p_scaler_params->rgba_color = ui32RgbaColor;
        */
        p_scaler_params->rgba_color =
            4278190080; // now padding color is black

        // params set by user
        p_scaler_params->out_rec_x = out_rec_x;
        p_scaler_params->out_rec_y = out_rec_y;
    }
    else if (op == NI_SCALER_OPCODE_OVERLAY)
    {
        // fixed params
        // set the in_rec params to the w/h of overlay(the upper) frames
        p_scaler_params->in_rec_width = p_scaler_params->input_width;
        p_scaler_params->in_rec_height = p_scaler_params->input_height;

        // the output w/h is the main frame's w/h (main frame is the lower/background frame)
        p_scaler_params->out_rec_width = p_scaler_params->output_width;
        p_scaler_params->out_rec_height = p_scaler_params->output_height;
        p_scaler_params->out_rec_x = 0;
        p_scaler_params->out_rec_y = 0;
        p_scaler_params->rgba_color = 0;

        // params set by user
        p_scaler_params->in_rec_x = in_rec_x;
        p_scaler_params->in_rec_y = in_rec_x;
    }
}

static int scaler_session_open(ni_session_context_t *p_scaler_ctx,
                               int iXcoderGUID, ni_scaler_opcode_t op)
{
    int ret = 0;

    p_scaler_ctx->session_id = NI_INVALID_SESSION_ID;

    p_scaler_ctx->device_handle = NI_INVALID_DEVICE_HANDLE;
    p_scaler_ctx->blk_io_handle = NI_INVALID_DEVICE_HANDLE;
    p_scaler_ctx->hw_id = iXcoderGUID;
    p_scaler_ctx->device_type = NI_DEVICE_TYPE_SCALER;
    p_scaler_ctx->scaler_operation = op;
    p_scaler_ctx->keep_alive_timeout = NI_DEFAULT_KEEP_ALIVE_TIMEOUT;

    ret = ni_device_session_open(p_scaler_ctx, NI_DEVICE_TYPE_SCALER);

    if (ret != NI_RETCODE_SUCCESS)
    {
        ni_log(NI_LOG_ERROR, "Error: ni_scaler_session_open() failure!\n");
        return -1;
    }
    else
    {

        ni_log(NI_LOG_INFO, "Scaler session open: device_handle %d, session_id %u.\n",
               p_scaler_ctx->device_handle, p_scaler_ctx->session_id);
        return 0;
    }
}

static void ni_set_bit_depth_and_encoding_type(int8_t *p_bit_depth,
                                               int8_t *p_enc_type,
                                               ni_pix_fmt_t pix_fmt)
{

    // bit depth is 1 for 8-bit format, 2 for 10-bit format
    // encoding type should be 1 for planar or packed, 0 for semi-planar

    switch (pix_fmt)
    {
    case NI_PIX_FMT_YUV420P:
        *p_bit_depth = 1; // 8-bits per component
        *p_enc_type = 1;  // planar
        break;

    case NI_PIX_FMT_YUV420P10LE:
        *p_bit_depth = 2; // 10-bits per component
        *p_enc_type = 1;  // planar
        break;

    case NI_PIX_FMT_NV12:
        *p_bit_depth = 1; // 8-bits per component
        *p_enc_type = 0;  // semi-planar
        break;
    case NI_PIX_FMT_8_TILED4X4:
        *p_bit_depth = 1;                              // 8-bits per component
        *p_enc_type = NI_PIXEL_PLANAR_FORMAT_TILED4X4; // semi-planar
        break;
    case NI_PIX_FMT_P010LE:
        *p_bit_depth = 2; // 10-bits per component
        *p_enc_type = 0;  // semi-planar
        break;

    case NI_PIX_FMT_YUYV422:
    case NI_PIX_FMT_UYVY422:
        *p_bit_depth = 1; // 8-bits per component
        *p_enc_type = 1;  // packed
        break;

    case NI_PIX_FMT_NV16:
        *p_bit_depth = 1; // 8-bits per component
        *p_enc_type = 0;  // semi-planar
        break;

    case NI_PIX_FMT_BGRP:
    case NI_PIX_FMT_RGBA:
    case NI_PIX_FMT_BGRA:
    case NI_PIX_FMT_ABGR:
    case NI_PIX_FMT_ARGB:
    case NI_PIX_FMT_BGR0:
        *p_bit_depth = 1; // 8-bits per component
        *p_enc_type = 1;  // packed or planar
        break;

    default:
        fprintf(stderr, "unexpected pixel format\n");

        // use default values if we've supported a new pixel format
        *p_bit_depth = 1; // 8-bits per component
        *p_enc_type = 1;  // planar or packed
        break;
    }
}

static int do_2D(ni_session_context_t *p_ctx,
                 ni_frame_t *p_frame_in_up,
                 ni_frame_t *p_frame_in_bg,
                 ni_session_data_io_t *p_data_out,
                 ni_scaler_input_params_t scaler_params)
{
    int ret = 0;
    ret = ni_frame_buffer_alloc_hwenc(&p_data_out->data.frame,
                                      scaler_params.output_width,
                                      scaler_params.output_height, 0);
    if (ret != 0)
    {
        return -1;
    }

    niFrameSurface1_t *frame_surface_up = (niFrameSurface1_t *)(p_frame_in_up->p_data[3]);
    niFrameSurface1_t *frame_surface_bg = (niFrameSurface1_t *)(p_frame_in_bg->p_data[3]);

    ret = ni_scaler_input_frame_alloc(p_ctx, scaler_params, frame_surface_up);
    if (ret != 0)
    {
        return -1;
    }

    ret = ni_scaler_dest_frame_alloc(p_ctx, scaler_params, frame_surface_bg);
    if (ret != 0)
    {
        return -1;
    }

    ret = ni_device_session_read_hwdesc(p_ctx, p_data_out, NI_DEVICE_TYPE_SCALER);
    if (ret < 0)
    {
        ni_frame_buffer_free(p_frame_in_up);
        ni_frame_buffer_free(p_frame_in_bg);
        ni_frame_buffer_free(&p_data_out->data.frame);
    }

    niFrameSurface1_t *frame_surface_output = (niFrameSurface1_t *)(p_data_out->data.frame.p_data[3]);
    if (!frame_surface_output)
    {
        fprintf(stderr, "%s invalid surface\n", __func__);
        ret = -1;
        return ret;
    }

    frame_surface_output->ui16width = p_data_out->data.frame.video_width;
    frame_surface_output->ui16height = p_data_out->data.frame.video_height;
    ni_set_bit_depth_and_encoding_type(&frame_surface_output->bit_depth, &frame_surface_output->encoding_type, gc620_to_ni_pix_fmt.at(scaler_params.output_format));

    return ret;
}

static ni_pixel_planar_format get_pixel_planar(ni_pix_fmt_t pix_fmt)
{
    ni_pixel_planar_format ret = NI_PIXEL_PLANAR_MAX;
    switch (pix_fmt)
    {
    case NI_PIX_FMT_NV12:
    case NI_PIX_FMT_P010LE:
        ret = NI_PIXEL_PLANAR_FORMAT_SEMIPLANAR;
        break;
    case NI_PIX_FMT_8_TILED4X4:
    case NI_PIX_FMT_10_TILED4X4:
        ret = NI_PIXEL_PLANAR_FORMAT_TILED4X4;
        break;
    case NI_PIX_FMT_YUV420P:
    case NI_PIX_FMT_YUV420P10LE:
    case NI_PIX_FMT_ABGR: /* 32-bit ABGR packed        */
    case NI_PIX_FMT_ARGB:
    case NI_PIX_FMT_RGBA:
    case NI_PIX_FMT_BGRA:
        ret = NI_PIXEL_PLANAR_FORMAT_PLANAR;
        break;
    default:
        break;
    }

    return ret;
}

static void set_parameter_remove_previous(ni_session_context_t *p_enc_ctx, ni_frame_t *p_origin_frame, ni_frame_t *p_to_enc_frame)
{
    p_to_enc_frame->end_of_stream = p_origin_frame->end_of_stream;
    p_to_enc_frame->start_of_stream = p_origin_frame->start_of_stream;

    memset(p_enc_ctx->enc_change_params, 0,
           sizeof(ni_encoder_change_params_t));

    p_to_enc_frame->extra_data_len = NI_APP_ENC_FRAME_META_DATA_SIZE;
    p_to_enc_frame->roi_len = 0;
    p_to_enc_frame->reconf_len = 0;
    p_to_enc_frame->sei_total_len = 0;
    p_to_enc_frame->force_pic_qp = 0;
}

static void set_the_aux_data_to_encoder(ni_session_context_t *p_enc_ctx, ni_frame_t *p_origin_frame, ni_frame_t *p_to_enc_frame, ni_frame_t *tmp_frame, ni_xcoder_params_t *api_params)
{
    int should_send_sei_with_frame = ni_should_send_sei_with_frame(
        p_enc_ctx, p_to_enc_frame->ni_pict_type, api_params);

    // data buffer for various SEI: HDR mastering display color volume, HDR
    // content light level, close caption, User data unregistered, HDR10+
    // etc.
    uint8_t mdcv_data[NI_MAX_SEI_DATA];
    uint8_t cll_data[NI_MAX_SEI_DATA];
    uint8_t cc_data[NI_MAX_SEI_DATA];
    uint8_t udu_data[NI_MAX_SEI_DATA];
    uint8_t hdrp_data[NI_MAX_SEI_DATA];

    ni_enc_prep_aux_data(
        p_enc_ctx, p_to_enc_frame, tmp_frame,
        (ni_codec_format_t)p_enc_ctx->codec_format, should_send_sei_with_frame, mdcv_data,
        cll_data, cc_data, udu_data, hdrp_data);

    p_to_enc_frame->extra_data_len += p_to_enc_frame->sei_total_len;

    if (p_to_enc_frame->reconf_len || p_to_enc_frame->sei_total_len ||
        (api_params->roi_demo_mode &&
         api_params->cfg_enc_params.roi_enable))
    {
        p_to_enc_frame->extra_data_len += sizeof(ni_encoder_change_params_t);
    }

    ni_frame_buffer_alloc_hwenc(p_to_enc_frame, p_origin_frame->video_width,
                                p_origin_frame->video_height,
                                (int)(p_to_enc_frame->extra_data_len));

    uint8_t *p_src[NI_MAX_NUM_DATA_POINTERS];

    p_src[0] = p_origin_frame->p_data[0];
    p_src[1] = p_origin_frame->p_data[1];
    p_src[2] = p_origin_frame->p_data[2];
    p_src[3] = p_origin_frame->p_data[3];

    int is_semiplanar = get_pixel_planar((ni_pix_fmt_t)(p_enc_ctx->pixel_format)) == NI_PIXEL_PLANAR_FORMAT_SEMIPLANAR;

    ni_enc_copy_aux_data(p_enc_ctx, p_to_enc_frame,
                         tmp_frame,
                         (ni_codec_format_t)p_enc_ctx->codec_format, mdcv_data, cll_data,
                         cc_data, udu_data, hdrp_data, true,
                         is_semiplanar);

    ni_copy_hw_descriptors((uint8_t **)(p_to_enc_frame->p_data), p_src);
}

static int set_parameter(ni_session_context_t *p_enc_ctx, ni_frame_t *p_origin_frame, ni_frame_t *p_to_enc_frame, ni_xcoder_params_t *api_params)
{

    set_parameter_remove_previous(p_enc_ctx, p_origin_frame, p_to_enc_frame);

    ni_session_data_io_t tmp_data{};
    ni_frame_t *tmp_frame = &tmp_data.data.frame;

    ni_aux_data_t *aux_data = nullptr;

    // set the parameter to tmp_frame and then copy it to p_to_enc_frame
    // reference prep_reconf_demo_data in ni_device_test.c to set the parameter

    // set bit rate
    {
        // const int32_t bit_rate = 10000;
        const int32_t bit_rate = 1000000;
        aux_data = ni_frame_new_aux_data(
            tmp_frame, NI_FRAME_AUX_DATA_BITRATE, sizeof(int32_t));
        if (!aux_data)
        {
            ni_log(NI_LOG_ERROR,
                   "Error %s(): no mem for reconf BR aux_data\n",
                   __func__);
            return -1;
        }
        *((int32_t *)aux_data->data) = bit_rate;
    }

    // set the frame rate by api
    // {
    //     ni_framerate_t framerate;
    //     framerate.framerate_num = 60;
    //     framerate.framerate_denom = 1;
    //     ni_reconfig_framerate(p_enc_ctx, &framerate);
    // }

    set_the_aux_data_to_encoder(p_enc_ctx, p_origin_frame, p_to_enc_frame, tmp_frame, api_params);

    ni_frame_wipe_aux_data(tmp_frame);

    return 0;
}

static int do_something_with_the_downloaded_frame(ni_frame_t *frame)
{
    // Y
    (void)frame->p_data[0];
    (void)frame->data_len[0];

    // U
    (void)frame->p_data[1];
    (void)frame->data_len[1];

    // V
    (void)frame->p_data[2];
    (void)frame->data_len[2];

    (void)frame;
    return 0;
}

#endif
/*!****************************************************************************
 *  \brief  Recycle hw frame back to Quadra
 *
 *  \param [in] p2p_frame - hw frame to recycle
 *
 *  \return  Returns NI_RETCODE_SUCCESS or error
 *******************************************************************************/
int recycle_frame(ni_frame_t *p2p_frame)
{
    ni_retcode_t rc;

    rc = ni_hwframe_p2p_buffer_recycle(p2p_frame);

    if (rc != NI_RETCODE_SUCCESS)
    {
        fprintf(stderr, "Recycle failed\n");
    }

    return rc;
}

/*!****************************************************************************
 * \brief   Import a dma buf to a Quadra device
 *
 * \param  [in] p_session - upload session to the Quadra device
 *         [in] frame     - frame of the proxy GPU card containing the dma buf fd
 *         [in] frame_size - frame size in bytes
 *         [out] dma_addrs - DMA addresses of the GPU frame
 *
 * \return Returns 0 on success, -1 otherwise
 ******************************************************************************/
static int import_dma_buf(
    ni_session_context_t *p_session,
    int dma_buf_fd,
    unsigned long frame_size,
    ni_p2p_sgl_t *dma_addr)
{
    struct netint_iocmd_import_dmabuf uimp;
    int ret, i;

    uimp.fd = dma_buf_fd;
    uimp.flags = 0; // import
    uimp.domain = p_session->domain;
    uimp.bus = p_session->bus;
    uimp.dev = p_session->dev;
    uimp.fn = p_session->fn;

    // Pass frame size to kernel driver. Only necessary if the kernel
    // driver has been specially compiled for customer A1. Otherwise,
    // this can be skipped.
    uimp.dma_len[0] = frame_size;

    ret = ioctl(p_session->netint_fd, NETINT_IOCTL_IMPORT_DMABUF, &uimp);

    if (ret == 0)
    {
        for (i = 0; i < uimp.nents; i++)
        {
            dma_addr->ui32DMALen[i] = uimp.dma_len[i];
            dma_addr->ui64DMAAddr[i] = uimp.dma_addr[i];
        }
        dma_addr->ui32NumEntries = uimp.nents;
    }

    return ret;
}

/*!****************************************************************************
 * \brief   Unimport a dma buf to a Quadra device
 *
 * \param  [in] p_session - upload session to the Quadra device
 *         [in] frame     - frame of the GPU card containing the dma buf fd
 *
 * \return Returns 0 on success, -1 otherwise
 ******************************************************************************/
static int unimport_dma_buf(
    ni_session_context_t *p_session,
    int dma_buf_fd)
{
    struct netint_iocmd_import_dmabuf uimp;
    int ret;

    uimp.fd = dma_buf_fd;
    uimp.flags = 1; // unimport
    uimp.domain = p_session->domain;
    uimp.bus = p_session->bus;
    uimp.dev = p_session->dev,
    uimp.fn = p_session->fn;

    ret = ioctl(p_session->netint_fd, NETINT_IOCTL_IMPORT_DMABUF, &uimp);

    return ret;
}

/*!****************************************************************************
 *  \brief  Prepare frame on the encoding Quadra device
 *
 *  \param [in] p_upl_ctx           pointer to caller allocated upload
 *                                  session context
 *         [in] input_video_width   video width
 *         [in] input_video_height  video height
 *         [out] p2p_frame          p2p frame
 *
 *  \return  0  on success
 *          -1  on error
 ******************************************************************************/
int enc_prepare_frame(ni_session_context_t *p_upl_ctx, int input_video_width,
                      int input_video_height, ni_frame_t *p2p_frame)
{
    int ret = 0;

    p2p_frame->start_of_stream = 0;
    p2p_frame->end_of_stream = 0;
    p2p_frame->force_key_frame = 0;
    p2p_frame->extra_data_len = 0;

    // Allocate a hardware ni_frame structure for the encoder
    if (ni_frame_buffer_alloc_hwenc(
            p2p_frame, input_video_width, input_video_height,
            (int)p2p_frame->extra_data_len) != NI_RETCODE_SUCCESS)
    {
        fprintf(stderr, "Error: could not allocate hw frame buffer!\n");
        ret = -1;
        goto fail_out;
    }

#ifndef _WIN32
    if (ni_device_session_acquire_for_read(p_upl_ctx, p2p_frame))
    {
        fprintf(stderr, "Error: failed ni_device_session_acquire()\n");
        ret = -1;
        goto fail_out;
    }
#endif

    return ret;

fail_out:

    ni_frame_buffer_free(p2p_frame);
    return ret;
}

/*!****************************************************************************
 *  \brief  Send the Quadra encoder a hardware frame which triggers
 *          Quadra to encode the frame
 *
 *  \param  [in] p_enc_ctx              pointer to encoder context
 *          [in] p_in_frame             pointer to hw frame
 *          [in] input_exhausted        flag indicating this is the last frame
 *          [in/out] need_to_resend     flag indicating need to re-send
 *
 *  \return  0 on success
 *          -1 on failure
 ******************************************************************************/
int encoder_encode_frame(ni_session_context_t *p_enc_ctx,
                         ni_frame_t *p_in_frame, int input_exhausted,
                         int &need_to_resend, int &enc_eos_sent)
{
    static int started = 0;
    int oneSent;
    ni_session_data_io_t in_data;

    ni_log2(p_enc_ctx, NI_LOG_DEBUG, "===> encoder_encode_frame <===\n");

    if (enc_eos_sent == 1)
    {
        ni_log2(p_enc_ctx, NI_LOG_DEBUG, "encoder_encode_frame: ALL data (incl. eos) sent "
                                         "already!\n");
        return 0;
    }

    if (need_to_resend)
    {
        goto send_frame;
    }

    p_in_frame->start_of_stream = 0;

    // If this is the first frame, mark the frame as start-of-stream
    if (!started)
    {
        started = 1;
        p_in_frame->start_of_stream = 1;
    }

    // If this is the last frame, mark the frame as end-of-stream
    p_in_frame->end_of_stream = input_exhausted ? 1 : 0;
    p_in_frame->force_key_frame = 0;

send_frame:

    in_data.data.frame = *p_in_frame;
    oneSent =
        ni_device_session_write(p_enc_ctx, &in_data, NI_DEVICE_TYPE_ENCODER);

    if (oneSent < 0)
    {
        fprintf(stderr,
                "Error: failed ni_device_session_write() for encoder\n");
        need_to_resend = 1;
        return -1;
    }
    else if (oneSent == 0 && !p_enc_ctx->ready_to_close)
    {
        need_to_resend = 1;
        ni_log2(p_enc_ctx, NI_LOG_DEBUG, "NEEDED TO RESEND");
    }
    else
    {
        need_to_resend = 0;

        ni_log2(p_enc_ctx, NI_LOG_DEBUG, "encoder_encode_frame: total sent data size=%u\n",
                p_in_frame->data_len[3]);

        ni_log2(p_enc_ctx, NI_LOG_DEBUG, "encoder_encode_frame: success\n");

        if (p_enc_ctx->ready_to_close)
        {
            enc_eos_sent = 1;
        }
    }

    return 0;
}

/*!****************************************************************************
 *  \brief  Receive output packet data from the Quadra encoder
 *
 *  \param  [in] p_enc_ctx              pointer to encoder session context
 *          [in] p_out_data             pointer to output data session
 *          [in] p_file                 pointer to file to write the packet
 *          [out] total_bytes_received  running counter of bytes read
 *          [in] print_time             1 = print the time
 *
 *  \return 0 - success got packet
 *          1 - received eos
 *          2 - got nothing, need retry
 *         -1 - failure
 ******************************************************************************/
int encoder_receive_data(ni_session_context_t *p_enc_ctx,
                         ni_session_data_io_t *p_out_data, FILE *p_file)
{
    int packet_size = NI_MAX_TX_SZ;
    int rc = 0;
    int end_flag = 0;
    int rx_size = 0;
    int meta_size = p_enc_ctx->meta_size;
    ni_packet_t *p_out_pkt = &(p_out_data->data.packet);
    static int received_stream_header = 0;

    ni_log2(p_enc_ctx, NI_LOG_DEBUG, "===> encoder_receive_data <===\n");

    if (NI_INVALID_SESSION_ID == p_enc_ctx->session_id ||
        NI_INVALID_DEVICE_HANDLE == p_enc_ctx->blk_io_handle)
    {
        ni_log2(p_enc_ctx, NI_LOG_DEBUG, "encode session not opened yet, return\n");
        return 0;
    }

    if (p_file == NULL)
    {
        ni_log2(p_enc_ctx, NI_LOG_ERROR, "Bad file pointer, return\n");
        return -1;
    }

    rc = ni_packet_buffer_alloc(p_out_pkt, packet_size);
    if (rc != NI_RETCODE_SUCCESS)
    {
        fprintf(stderr, "Error: malloc packet failed, ret = %d!\n", rc);
        return -1;
    }

    /*
     * The first data read from the encoder session context
     * is a stream header read.
     */
    if (!received_stream_header)
    {
        /* Read the encoded stream header */
        rc = ni_encoder_session_read_stream_header(p_enc_ctx, p_out_data);

        if (rc > 0)
        {
            /* Write out the stream header */
            if (fwrite((uint8_t *)p_out_pkt->p_data + meta_size,
                       p_out_pkt->data_len - meta_size, 1, p_file) != 1)
            {
                fprintf(stderr, "Error: writing data %u bytes error!\n",
                        p_out_pkt->data_len - meta_size);
                fprintf(stderr, "Error: ferror rc = %d\n", ferror(p_file));
            }
            received_stream_header = 1;
        }
        else if (rc != 0)
        {
            fprintf(stderr, "Error: reading header %d\n", rc);
            return -1;
        }

        /* This shouldn't happen */
        if (p_out_pkt->end_of_stream)
        {
            return 1;
        }
        else if (rc == 0)
        {
            return 2;
        }
    }

receive_data:
    rc = ni_device_session_read(p_enc_ctx, p_out_data, NI_DEVICE_TYPE_ENCODER);

    end_flag = p_out_pkt->end_of_stream;
    rx_size = rc;

    ni_log2(p_enc_ctx, NI_LOG_DEBUG, "encoder_receive_data: received data size=%d\n", rx_size);

    if (rx_size > meta_size)
    {
        if (fwrite((uint8_t *)p_out_pkt->p_data + meta_size,
                   p_out_pkt->data_len - meta_size, 1, p_file) != 1)
        {
            fprintf(stderr, "Error: writing data %u bytes error!\n",
                    p_out_pkt->data_len - meta_size);
            fprintf(stderr, "Error: ferror rc = %d\n", ferror(p_file));
        }
    }
    else if (rx_size != 0)
    {
        fprintf(stderr, "Error: received %d bytes, <= metadata size %d!\n",
                rx_size, meta_size);
        return -1;
    }
    else if (!end_flag &&
             (((ni_xcoder_params_t *)(p_enc_ctx->p_session_config))
                  ->low_delay_mode))
    {
        ni_log2(p_enc_ctx, NI_LOG_DEBUG, "low delay mode and NO pkt, keep reading...\n");
        goto receive_data;
    }

    if (end_flag)
    {
        printf("Encoder Receiving done\n");
        return 1;
    }
    else if (0 == rx_size)
    {
        return 2;
    }

    ni_log2(p_enc_ctx, NI_LOG_DEBUG, "encoder_receive_data: success\n");

    return 0;
}

/*!****************************************************************************
 *  \brief  Open an encoder session to Quadra
 *
 *  \param  [out] p_enc_ctx         pointer to an encoder session context
 *          [in]  dst_codec_format  AVC or HEVC
 *          [in]  iXcoderGUID       id to identify the Quadra device
 *          [in]  p_enc_params      sets the encoder parameters
 *          [in]  width             width of frames to encode
 *          [in]  height            height of frames to encode
 *
 *  \return 0 if successful, < 0 otherwise
 ******************************************************************************/
int encoder_open_session(ni_session_context_t *p_enc_ctx, int dst_codec_format,
                         int iXcoderGUID, ni_xcoder_params_t *p_enc_params,
                         int width, int height, ni_frame_t *p_frame, ni_pix_fmt_t pix_fmt)
{
    int ret = 0;

    // Enable hardware frame encoding
    p_enc_ctx->hw_action = NI_CODEC_HW_ENABLE;
    p_enc_params->hwframes = 1;

    // Provide the first frame to the Quadra encoder
    p_enc_params->p_first_frame = p_frame;

    // Specify codec, AVC vs HEVC
    p_enc_ctx->codec_format = dst_codec_format;

    p_enc_ctx->p_session_config = p_enc_params;
    p_enc_ctx->session_id = NI_INVALID_SESSION_ID;

    // Assign the card GUID in the encoder context to open a session
    // to that specific Quadra device
    p_enc_ctx->device_handle = NI_INVALID_DEVICE_HANDLE;
    p_enc_ctx->blk_io_handle = NI_INVALID_DEVICE_HANDLE;
    p_enc_ctx->hw_id = iXcoderGUID;

    p_enc_ctx->pixel_format = pix_fmt;

    ni_encoder_set_input_frame_format(p_enc_ctx, p_enc_params, width, height, 8,
                                      NI_FRAME_LITTLE_ENDIAN, 1);

    // Encoder will operate in P2P mode
    ret = ni_device_session_open(p_enc_ctx, NI_DEVICE_TYPE_ENCODER);
    if (ret != NI_RETCODE_SUCCESS)
    {
        fprintf(stderr, "Error: encoder open session failure\n");
    }
    else
    {
        printf("Encoder device %d session open successful\n", iXcoderGUID);
    }

    return ret;
}

/*!****************************************************************************
 *  \brief  Open an upload session to Quadra
 *
 *  \param  [out] p_upl_ctx   pointer to an upload context of the open session
 *          [in]  iXcoderGUID pointer to  Quadra card hw id
 *          [in]  width       width of the frames
 *          [in]  height      height of the frames
 *          [in]  p2p         p2p session
 *
 *  \return 0 if successful, < 0 otherwise
 ******************************************************************************/
int uploader_open_session(ni_session_context_t *p_upl_ctx, int *iXcoderGUID,
                          int width, int height, int p2p, ni_pix_fmt_t pix_fmt)
{
    int ret = 0;
    ni_pix_fmt_t frame_format;

    p_upl_ctx->session_id = NI_INVALID_SESSION_ID;

    // Assign the card GUID in the encoder context
    p_upl_ctx->device_handle = NI_INVALID_DEVICE_HANDLE;
    p_upl_ctx->blk_io_handle = NI_INVALID_DEVICE_HANDLE;

    // Assign the card id to specify the specific Quadra device
    p_upl_ctx->hw_id = *iXcoderGUID;

    // Assign the pixel format we want to use
    frame_format = pix_fmt;

    // Set the input frame format of the upload session
    ni_uploader_set_frame_format(p_upl_ctx, width, height, frame_format, 1);

    ret = ni_device_session_open(p_upl_ctx, NI_DEVICE_TYPE_UPLOAD);
    if (ret != NI_RETCODE_SUCCESS)
    {
        fprintf(stderr, "Error: uploader_open_session failure!\n");
        return ret;
    }
    else
    {
        printf("Uploader device %d session opened successfully\n",
               *iXcoderGUID);
        *iXcoderGUID = p_upl_ctx->hw_id;
    }

    // Create a P2P frame pool for the uploader sesson of pool size 1
    ret = ni_device_session_init_framepool(p_upl_ctx, 1, p2p);
    if (ret < 0)
    {
        fprintf(stderr, "Error: Can't create frame pool\n");
        ni_device_session_close(p_upl_ctx, 1, NI_DEVICE_TYPE_UPLOAD);
    }
    else
    {
        printf("Uploader device %d configured successfully\n", *iXcoderGUID);
    }

    return ret;
}

static void release_egl_resource(GLuint fbo, GLuint texture, 
                                 EGLDisplay egl_display, EGLImageKHR egl_image, EGLContext egl_context, EGLSyncKHR sync,
                                 PFNEGLDESTROYIMAGEKHRPROC eglDestroyImageKHR, PFNEGLDESTROYSYNCKHRPROC eglDestroySyncKHR)
{
    glDeleteFramebuffers(1, &fbo);
    glDeleteTextures(1, &texture);
    eglDestroyImageKHR(egl_display, egl_image);
    eglDestroySyncKHR(egl_display, sync);
    eglDestroyContext(egl_display, egl_context);
    eglTerminate(egl_display);
}

static void release_gbm_resource(int drm_fd, gbm_device *gbm, gbm_bo *bo, int dma_buf_fd_gbm)
{
    if (dma_buf_fd_gbm >= 0)
    {
        close(dma_buf_fd_gbm);
    }
    if (bo)
    {
        gbm_bo_destroy(bo);
    }
    if (gbm)
    {
        gbm_device_destroy(gbm);
    }
    if (drm_fd)
    {
        close(drm_fd);
    }
}

// just for checking the result
static void draw(const int w, const int h)
{
#define UPDATE_DRAW_NUMBER 30
    static int count = 0;
    static std::vector<std::vector<double>> color{
        {1.0, 0.7, 0.2, 1.0},
        {0.2, 1.0, 0.7, 1.0},
        {0.7, 0.2, 1.0, 1.0},
    }; // RGBA

    ++count;

    if (count % UPDATE_DRAW_NUMBER == 0)
    {
        auto back = color[color.size() - 1];
        for (int i = color.size() - 1; i > 0; --i)
        {
            color[i] = color[i - 1];
        }
        color[0] = back;
    }

#if 1
    glEnable(GL_SCISSOR_TEST);

    for (int i = 0; i < 3; ++i)
    {
        glScissor(0, i * h / 3, w, ((i + 1 == 3) ? w - i * h / 3 : h / 3));
        // do render
        glViewport(0, 0, w, h);
        glClearColor(color[i][0], color[i][1], color[i][2], color[i][3]);
        glClear(GL_COLOR_BUFFER_BIT);
    }

    glDisable(GL_SCISSOR_TEST);
#else

    glViewport(0, 0, w, h);
    glClearColor(color[0][0], color[0][1], color[0][2], color[0][3]);
    glClear(GL_COLOR_BUFFER_BIT);

#endif

    // glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
    glFinish();

    // wait for all OpenGL commands to complete.
    eglWaitGL();
    eglWaitNative(EGL_CORE_NATIVE_ENGINE);

    return;
}

#if __cplusplus > 201703L
[[maybe_unused]] static void write_RGBA(FILE *file, const int w, const int h, void *addr)
{
    if (w == 0 || h == 0 || addr == nullptr || file == nullptr)
    {
        fprintf(stderr, "Failed to write_RGBA\n");
    }
    for (int i = 0; i < w * h * 4; ++i)
    {
        if (i % (4 * 4) == 0)
        {
            fprintf(file, "\n");
        }
        // std::cout << (int)((char *)addr_gpu)[i] << " ";
        fprintf(file, "%d ", (int)(((char *)addr)[i]));
    }
}

[[maybe_unused]] static int try_to_lock_fd(int dma_buf_fd)
{
    int ret = 0;
    struct dma_buf_sync sync_end = {
        .flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW,
    };

    ret = ioctl(dma_buf_fd, DMA_BUF_IOCTL_SYNC, &sync_end);

    if (ret < 0)
    {
        perror("Warning: failed to lock fd. Operate: DMA_BUF_IOCTL_SYNC\n");
    }

    return ret;
}

[[maybe_unused]] static int unlock_fd(int dma_buf_fd)
{
    int ret = 0;
    struct dma_buf_sync sync_end = {
        .flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW,
    };

    ret = ioctl(dma_buf_fd, DMA_BUF_IOCTL_SYNC, &sync_end);

    if (ret < 0)
    {
        perror("Warning: failed to unlock fd. Operate: MA_BUF_IOCTL_SYNC\n");
    }

    return ret;
}
#endif

static void checkEGLError(const char *msg)
{
    EGLint error = eglGetError();
    if (error != EGL_SUCCESS)
    {
        std::cerr << msg << ": EGL error 0x" << std::hex << error << std::endl;
        exit(1);
    }
}

int main(int argc, const char *argv[])
{
    const int w = 1920;
    const int h = 1080;

    const int input_video_width = w;
    const int input_video_height = h;

    const int arg_width = w;
    const int arg_height = h;

    const ni_pix_fmt_t pix_fmt = NI_PIX_FMT_RGBA;

    const int dst_codec_format = NI_CODEC_FORMAT_H265;

    int frame_size_just_for_A1 = 0;

    int draw_count = 0;
    const int all_draw_count = 500;

    bool do_scale_and_download = false;

    FILE *p_file = NULL;

    std::unordered_set<std::string> parameters(argv + 1, argv + argc);

    const std::string do_scale_and_download_parameter = "--do_scale_and_download";
    do_scale_and_download = (parameters.find(do_scale_and_download_parameter) != parameters.end());

    if(do_scale_and_download)
    {
        std::cout << "Do scale and download" << std::endl;
    }

    std::string drm_parameter_name_prefix{"--drm_device="};
    std::string drm_driver_name;
    // bool find_drm_name{false};
    for (const std::string &parameter : parameters)
    {
        if (parameter.size() > drm_parameter_name_prefix.size() &&
            parameter.compare(0, drm_parameter_name_prefix.size(), drm_parameter_name_prefix) == 0)
        {
            drm_driver_name = parameter.substr(drm_parameter_name_prefix.size());
            break;
        }
    }

    if (drm_driver_name == "")
    {
        std::cerr << "drm device is not specified. "
                     "Please use the option --drm_device=/absolute/path/to/drm/device to set the device path.\n";
        return -1;
    }

    std::string output_parameter_name_prefix{"--output="};
    std::string output_file_name = "p2p_read_gbm_egl_test.h265";
    // bool find_drm_name{false};
    for (const std::string &parameter : parameters)
    {
        if (parameter.size() > output_parameter_name_prefix.size() &&
            parameter.compare(0, output_parameter_name_prefix.size(), output_parameter_name_prefix) == 0)
        {
            output_file_name = parameter.substr(output_parameter_name_prefix.size());
            break;
        }
    }

    if (output_file_name == "")
    {
        std::cerr << "output file name is empty. "
                     "Please use the option --output=path/to/output to set the output path.\n";
        return -1;
    }


    std::string enc_mem_allocate_param_prefix{"--alloc_strategy="};
    std::string enc_mem_allocate_strategy = "0";
    // bool find_drm_name{false};
    for (const std::string &parameter : parameters)
    {
        if (parameter.size() > enc_mem_allocate_param_prefix.size() &&
            parameter.compare(0, enc_mem_allocate_param_prefix.size(), enc_mem_allocate_param_prefix) == 0)
        {
            enc_mem_allocate_strategy = parameter.substr(enc_mem_allocate_param_prefix.size());
            break;
        }
    }

    if (enc_mem_allocate_strategy == "")
    {
        std::cerr << "output file name is empty. "
                     "Please use the option --output=path/to/output to set the output path.\n";
        return -1;
    }

    if (pix_fmt == NI_PIX_FMT_NV12)
    {
        frame_size_just_for_A1 = w * h * 3 / 2;
    }
    else
    {
        // RGBA
        frame_size_just_for_A1 = w * h * 4;
    }

    p_file = fopen(output_file_name.c_str(), "w+");

    // open DRM device
    int drm_fd = open(drm_driver_name.c_str(), O_RDWR | O_CLOEXEC);
    if (drm_fd < 0)
    {
        std::cerr << "Failed to open DRM device: " << drm_driver_name << "\n";
        return -1;
    }

    // create GBM device
    gbm_device *gbm = gbm_create_device(drm_fd);
    if (!gbm)
    {
        std::cerr << "Failed to create GBM device\n";
        close(drm_fd);
        return -1;
    }

#if 1
    // create GBM buffer object
    gbm_bo *bo = gbm_bo_create(gbm, w, h, GBM_FORMAT_ARGB8888,
                               GBM_BO_USE_RENDERING | GBM_BO_USE_LINEAR);
    if (!bo)
    {
        std::cerr << "Failed to create GBM buffer object\n";
        gbm_device_destroy(gbm);
        close(drm_fd);
        return -1;
    }

    // get DMA-BUF fd
    int dma_buf_fd = gbm_bo_get_fd(bo);
    if (dma_buf_fd < 0)
    {
        std::cerr << "Failed to get DMA-BUF file descriptor\n";
        gbm_bo_destroy(bo);
        gbm_device_destroy(gbm);
        close(drm_fd);
        return -1;
    }
#else
    // todo: find a way to get fixed dma-buf fd
#endif

    std::cout << "DMA-BUF file descriptor: " << dma_buf_fd << std::endl;

    EGLDisplay egl_display = eglGetDisplay((EGLNativeDisplayType)gbm);
    if (egl_display == EGL_NO_DISPLAY)
    {
        std::cerr << "Failed to get EGL display\n";
        release_gbm_resource(drm_fd, gbm, bo, dma_buf_fd);
        return -1;
    }

    if (!eglInitialize(egl_display, nullptr, nullptr))
    {
        std::cerr << "Failed to initialize EGL\n";
        checkEGLError("eglInitialize");
        release_gbm_resource(drm_fd, gbm, bo, dma_buf_fd);
        return -1;
    }

    // get EGL configure
    EGLint num_configs;
    EGLConfig egl_config;
    EGLint egl_config_attribs[] = {
        EGL_SURFACE_TYPE, EGL_WINDOW_BIT,
        EGL_RED_SIZE, 8,
        EGL_GREEN_SIZE, 8,
        EGL_BLUE_SIZE, 8,
        EGL_ALPHA_SIZE, 8,
        EGL_RENDERABLE_TYPE, EGL_OPENGL_ES3_BIT,
        EGL_NONE};

    if (!eglChooseConfig(egl_display, egl_config_attribs, &egl_config, 1, &num_configs) || num_configs == 0)
    {
        std::cerr << "Failed to choose EGL config\n";
        checkEGLError("eglChooseConfig");
        eglTerminate(egl_display);
        release_gbm_resource(drm_fd, gbm, bo, dma_buf_fd);
        return -1;
    }

    // create EGL context
    EGLint egl_context_attribs[] = {
        EGL_CONTEXT_CLIENT_VERSION, 3,
        EGL_NONE};
    EGLContext egl_context = eglCreateContext(egl_display, egl_config, EGL_NO_CONTEXT, egl_context_attribs);
    if (egl_context == EGL_NO_CONTEXT)
    {
        std::cerr << "Failed to create EGL context\n";
        checkEGLError("eglCreateContext");
        eglTerminate(egl_display);
        release_gbm_resource(drm_fd, gbm, bo, dma_buf_fd);
        return -1;
    }

    if (!(eglMakeCurrent(egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, egl_context) == EGL_TRUE))
    {
        std::cerr << "Failed to make the EGLContext current.\n";
        eglTerminate(egl_display);
        release_gbm_resource(drm_fd, gbm, bo, dma_buf_fd);
        return -1;
    }

    // create EGLImage
    EGLint image_attrs[] = {
        EGL_WIDTH, w,
        EGL_HEIGHT, h,
        EGL_LINUX_DRM_FOURCC_EXT, GBM_FORMAT_ARGB8888,
        EGL_DMA_BUF_PLANE0_FD_EXT, dma_buf_fd,
        EGL_DMA_BUF_PLANE0_OFFSET_EXT, 0,
        EGL_DMA_BUF_PLANE0_PITCH_EXT, (int)gbm_bo_get_stride(bo),
        EGL_NONE};

    PFNEGLCREATEIMAGEKHRPROC eglCreateImageKHR = (PFNEGLCREATEIMAGEKHRPROC)eglGetProcAddress("eglCreateImageKHR");
    PFNGLEGLIMAGETARGETTEXTURE2DOESPROC glEGLImageTargetTexture2DOES = (PFNGLEGLIMAGETARGETTEXTURE2DOESPROC)eglGetProcAddress("glEGLImageTargetTexture2DOES");
    PFNEGLDESTROYIMAGEKHRPROC eglDestroyImageKHR = (PFNEGLDESTROYIMAGEKHRPROC)eglGetProcAddress("eglDestroyImageKHR");

    PFNEGLCREATESYNCKHRPROC eglCreateSyncKHR = (PFNEGLCREATESYNCKHRPROC)eglGetProcAddress("eglCreateSyncKHR");
    PFNEGLDESTROYSYNCKHRPROC eglDestroySyncKHR = (PFNEGLDESTROYSYNCKHRPROC)eglGetProcAddress("eglDestroySyncKHR");
    // PFNEGLCLIENTWAITSYNCKHRPROC eglClientWaitSyncKHR = (PFNEGLCLIENTWAITSYNCKHRPROC)eglGetProcAddress("eglClientWaitSyncKHR");
    PFNEGLWAITSYNCKHRPROC eglWaitSyncKHR = (PFNEGLWAITSYNCKHRPROC)eglGetProcAddress("eglWaitSyncKHR");
    
    
    if (!eglCreateImageKHR || !glEGLImageTargetTexture2DOES || !eglDestroyImageKHR || 
        !eglCreateSyncKHR || !eglDestroySyncKHR || !eglWaitSyncKHR)
    {
        std::cerr << "Failed to load egl extension functions\n";
        eglDestroyContext(egl_display, egl_context);
        eglTerminate(egl_display);
        release_gbm_resource(drm_fd, gbm, bo, dma_buf_fd);
        return -1;
    }

    // create the sync for refreshing the GPU memory
    EGLSyncKHR sync = eglCreateSyncKHR(egl_display, EGL_SYNC_FENCE_KHR, NULL);
    if (sync == EGL_NO_SYNC_KHR) {
        std::cerr << "Failed to create EGLSyncKHR\n";
        eglDestroyContext(egl_display, egl_context);
        eglTerminate(egl_display);
        release_gbm_resource(drm_fd, gbm, bo, dma_buf_fd);
        return -1;
    }

    EGLImageKHR egl_image = eglCreateImageKHR(egl_display, EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, nullptr, image_attrs);
    if (egl_image == EGL_NO_IMAGE_KHR)
    {
        std::cerr << "Failed to create EGLImage\n";
        checkEGLError("eglCreateImageKHR");
        eglDestroyContext(egl_display, egl_context);
        eglTerminate(egl_display);
        release_gbm_resource(drm_fd, gbm, bo, dma_buf_fd);
        return -1;
    }

    // create an OpenGL texture
    GLuint texture;
    glGenTextures(1, &texture);
    glBindTexture(GL_TEXTURE_2D, texture);
    glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, egl_image);
    checkEGLError("glEGLImageTargetTexture2DOES");

    // create and bind a FBO
    GLuint fbo;
    glGenFramebuffers(1, &fbo);
    glBindFramebuffer(GL_FRAMEBUFFER, fbo);
    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture, 0);

    if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE)
    {
        std::cerr << "Framebuffer is not complete\n";
        checkEGLError("glCheckFramebufferStatus");
        release_egl_resource(fbo, texture, egl_display, egl_image, egl_context, sync, eglDestroyImageKHR, eglDestroySyncKHR);
        release_gbm_resource(drm_fd, gbm, bo, dma_buf_fd);
        return -1;
    }

    /*  GBM EGL init ok*/

    /* INIT QUADRA */

    int iXcoderGUID = 0;
    int input_exhausted = 0;
    int ret = 0;

    ni_xcoder_params_t api_param{};
    ni_session_context_t enc_ctx{.device_handle = NI_INVALID_DEVICE_HANDLE, .blk_io_handle = NI_INVALID_DEVICE_HANDLE};
    ni_session_context_t upl_ctx{.device_handle = NI_INVALID_DEVICE_HANDLE, .blk_io_handle = NI_INVALID_DEVICE_HANDLE};

#if defined(NI_DO_SCALE_HWDOWNLOAD) && NI_DO_SCALE_HWDOWNLOAD == 1
    ni_session_context_t scale_ctx{.device_handle = NI_INVALID_DEVICE_HANDLE, .blk_io_handle = NI_INVALID_DEVICE_HANDLE};
    ni_scaler_input_params_t scale_params{};
    ni_session_data_io_t scale_out_frame{};
#endif


    ni_frame_t p2p_frame{};
    ni_frame_t do_scale_and_download_enc_frame{};
    ni_session_data_io_t out_packet{};
    ni_p2p_sgl_t dma_addrs{};

    int send_fin_flag = 0;
    int receive_fin_flag = 0;
    int need_to_resend = 0;
    int enc_eos_sent = 0;


#define READ_TO_FLUSH_THE_GPU_DATA 0

#if defined(READ_TO_FLUSH_THE_GPU_DATA) && READ_TO_FLUSH_THE_GPU_DATA == 1
    bool read_gpu_data_to_flush = false;

    std::string read_gpu_data_parameter{"--read_gpu_flush"};
    read_gpu_data_to_flush = (parameters.find(read_gpu_data_parameter) != parameters.end()); 

    void *mmap_dma_buf_fd_addr = nullptr;
    std::vector<uint8_t> read_buffer(frame_size_just_for_A1);
#endif

    if (ni_device_session_context_init(&enc_ctx) < 0)
    {
        fprintf(stderr, "Error: init encoder context error\n");
        goto end;
    }

    if (ni_device_session_context_init(&upl_ctx) < 0)
    {
        fprintf(stderr, "Error: init uploader context error\n");
        goto end;
    }

    // Open a P2P upload session to the destination Quadra device that will
    // be doing the video encoding
    if (uploader_open_session(&upl_ctx, &iXcoderGUID, input_video_width, input_video_height, 0, pix_fmt))
    {
        goto end;
    }

#if defined(NI_DO_SCALE_HWDOWNLOAD) && NI_DO_SCALE_HWDOWNLOAD == 1
    if(do_scale_and_download)
    {
        scale_params.output_format = ni_fix_format_to_gc620.at(NI_PIX_FMT_YUV420P);
        scale_params.output_width = w / 2;
        scale_params.output_height = h / 2;
        scale_params.input_format = ni_fix_format_to_gc620.at(pix_fmt);
        scale_params.input_width = w;
        scale_params.input_height = h;
        init_2D_params(&scale_params, NI_SCALER_OPCODE_SCALE, 0, 0, 0, 0, 0, 0);

        if (scaler_session_open(&scale_ctx, iXcoderGUID, scale_params.op))
        {
            fprintf(stderr, "Error: open uploader context error\n");
            ret = -1;
            goto end;
        }

        if (0 != ni_scaler_frame_pool_alloc(&scale_ctx, scale_params))
        {
            fprintf(stderr, "Error: open scale alloc frame pool context error\n");
            ret = -1;
            goto end;
        }
    }

#endif


    ret = enc_prepare_frame(&upl_ctx, input_video_width, input_video_height,
                            &p2p_frame);

    if (ret < 0)
    {
        goto end;
    }

    // Configure the encoder parameter structure. We'll use some basic
    // defaults: 30 fps, 200000 bps CBR encoding, AVC or HEVC encoding
    if (ni_encoder_init_default_params(&api_param, 30, 1, 200000, arg_width,
                                       arg_height, (ni_codec_format_t)enc_ctx.codec_format) < 0)
    {
        fprintf(stderr, "Error: encoder init default set up error\n");
        goto end;
    }

    // For P2P demo, change some of the encoding parameters from
    // the default. Enable low delay encoding.
    if ((ret = ni_encoder_params_set_value(&api_param, "lowDelay", "1")) !=
        NI_RETCODE_SUCCESS)
    {
        fprintf(stderr, "Error: can't set low delay mode %d\n", ret);
        goto end;
    }

    // Use a GOP preset of 9 which represents a GOP pattern of
    // IPPPPPPP....This will be low latency encoding.
    if ((ret = ni_encoder_params_set_value(&api_param, "gopPresetIdx", "9")) !=
        NI_RETCODE_SUCCESS)
    {
        fprintf(stderr, "Error: can't set gop preset %d\n", ret);
        goto end;
    }

    if ((ret = ni_encoder_params_set_value(&api_param, "encMemAllocateStrategy", enc_mem_allocate_strategy.c_str())) !=
        NI_RETCODE_SUCCESS)
    {
        fprintf(stderr, "Error: can't set encMemAllocateStrategy %d\n", ret);
        goto end;
    }

    if (pix_fmt == NI_PIX_FMT_RGBA)
    {
        // Quadra encoder always generates full range YCbCr
        if (ni_encoder_params_set_value(&api_param, "videoFullRangeFlag", "1") !=
            NI_RETCODE_SUCCESS)
        {
            fprintf(stderr, "Error: can't set video full range\n");
            goto end;
        }

        // sRGB has the same color primaries as BT.709/IEC-61966-2-1
        if (ni_encoder_params_set_value(&api_param, "colorPri", "1") !=
            NI_RETCODE_SUCCESS)
        {
            fprintf(stderr, "Error: can't set color primaries\n");
            goto end;
        }

        // Quadra encoder converts to YUV420 using BT.709 matrix
        if (ni_encoder_params_set_value(&api_param, "colorSpc", "1") !=
            NI_RETCODE_SUCCESS)
        {
            fprintf(stderr, "Error: can't set color space\n");
            goto end;
        }

        // sRGB transfer characteristics is IEC-61966-2-1
        if (ni_encoder_params_set_value(&api_param, "colorTrc", "13") !=
            NI_RETCODE_SUCCESS)
        {
            fprintf(stderr, "Error: can't set color transfer characteristics\n");
            goto end;
        }
    }

    // Open the encoder session with given parameters
    ret = encoder_open_session(&enc_ctx, dst_codec_format, iXcoderGUID,
                               &api_param, arg_width, arg_height, &p2p_frame, pix_fmt);

    if (ret < 0)
    {
        fprintf(stderr, "Could not open encoder session\n");
        goto end;
    }
    
    /*
        If the address associated with this dma_buf_fd changes during each rendering process, 
        we need to re-execute the unimport and import operations after each rendering is completed.
    */
    ret = import_dma_buf(&upl_ctx, dma_buf_fd, frame_size_just_for_A1, &dma_addrs);
    if (ret < 0)
    {
        fprintf(stderr, "Cannot import dma buffer %d\n", ret);
        ret = -1;
        goto end;
    }

    while (send_fin_flag == 0 || receive_fin_flag == 0)
    {
        // Fill the frame buffer with YUV data while the previous frame is being encoded
        if (!input_exhausted && need_to_resend == 0)
        {
            draw(w, h);
            eglWaitSyncKHR(egl_display, sync, 0);
        }

#if defined(READ_TO_FLUSH_THE_GPU_DATA) && READ_TO_FLUSH_THE_GPU_DATA == 1
        if(read_gpu_data_to_flush)
        {
            mmap_dma_buf_fd_addr = mmap(NULL, frame_size_just_for_A1, PROT_READ, MAP_SHARED, dma_buf_fd, 0);
            if(mmap_dma_buf_fd_addr == MAP_FAILED)
            {
                std::cerr << "Failed to map buffer\n";
                goto end;
            }

            memcpy(read_buffer.data(), mmap_dma_buf_fd_addr, frame_size_just_for_A1);

            munmap(mmap_dma_buf_fd_addr, frame_size_just_for_A1);
        }
#endif

        // Execute a P2P read into the frame
        if (ni_p2p_recv(&upl_ctx, &dma_addrs, &p2p_frame) != NI_RETCODE_SUCCESS)
        {
            fprintf(stderr, "Error: can't read frame\n");
            ret = 1;
            goto end;
        }

#if defined(NI_DO_SCALE_HWDOWNLOAD) && NI_DO_SCALE_HWDOWNLOAD == 1

        if(do_scale_and_download)
        {
            do_2D(&scale_ctx, &p2p_frame, &p2p_frame, &scale_out_frame, scale_params);

            ni_session_data_io_t hwdl_session_data{};

            ret = hwdl_frame(&scale_ctx, &hwdl_session_data, &scale_out_frame.data.frame,
                             ni_fix_format_to_gc620.at(NI_PIX_FMT_YUV420P));

            if (ret < 0)
            {
                std::cout << "download failed" << std::endl;
                ret = -1;
                goto end;
            }

            ni_hwframe_buffer_recycle2((niFrameSurface1_t *)(scale_out_frame.data.frame.p_data[3]));

            ni_frame_t &downloaded_frame = hwdl_session_data.data.frame;

            {

                (void)do_something_with_the_downloaded_frame(&downloaded_frame);
                // get some information
                int need_to_set = true;
                if (need_to_set)
                {
                    set_parameter(&enc_ctx, &p2p_frame, &do_scale_and_download_enc_frame, &api_param);
                }
            }

            ///
            ni_frame_buffer_free(&hwdl_session_data.data.frame);

        }

        if(do_scale_and_download)
        {
            // Encode the frame
            send_fin_flag = encoder_encode_frame(&enc_ctx, &do_scale_and_download_enc_frame,
                                                 input_exhausted, need_to_resend, enc_eos_sent);

        }
        else
#endif
        {
            // Encode the frame
            send_fin_flag = encoder_encode_frame(&enc_ctx, &p2p_frame,
                                                 input_exhausted, need_to_resend, enc_eos_sent);
        }

        // Error, exit
        if (send_fin_flag == 2)
        {
            ret = 1;
            goto end;
        }

        // Receive encoded packet data from the encoder
        receive_fin_flag = encoder_receive_data(
            &enc_ctx, &out_packet, p_file);

        // Error or eos
        if (receive_fin_flag < 0 || out_packet.data.packet.end_of_stream)
        {
            ret = 1;
            goto end;
        }

        ++draw_count;
        std::cout << "frame: " << draw_count << std::endl;
        if (draw_count == all_draw_count)
        {
            input_exhausted = 1;
        }
    }

    ret = 0;

end:
    int eos_sent = (ret == 0);

#if defined(READ_TO_FLUSH_THE_GPU_DATA) && READ_TO_FLUSH_THE_GPU_DATA == 1
    if(mmap_dma_buf_fd_addr)
    {
        munmap(mmap_dma_buf_fd_addr, frame_size_just_for_A1);
    }
#endif

    if (dma_buf_fd >= 0 && dma_addrs.ui32NumEntries)
    {
        if (dma_addrs.ui32NumEntries > 0)
        {
            unimport_dma_buf(&upl_ctx, dma_buf_fd);
        }
        memset(&dma_addrs, 0, sizeof(dma_addrs));
    }

    // enc_frame is copied from p2p_frame
    if (p2p_frame.p_data[3])
    {
        recycle_frame(&p2p_frame);
    }

    if (upl_ctx.device_handle != NI_INVALID_DEVICE_HANDLE)
    {
        ni_device_session_close(&upl_ctx, eos_sent, NI_DEVICE_TYPE_UPLOAD);
    }

    if (enc_ctx.device_handle != NI_INVALID_EVENT_HANDLE)
    {
        ni_device_session_close(&enc_ctx, eos_sent, NI_DEVICE_TYPE_UPLOAD);
    }

    ni_device_session_context_clear(&enc_ctx);
    ni_device_session_context_clear(&upl_ctx);

    ni_frame_buffer_free(&p2p_frame);

    ni_packet_buffer_free(&(out_packet.data.packet));

    std::unordered_set<int> handles;
    handles.insert(upl_ctx.device_handle);
    handles.insert(upl_ctx.blk_io_handle);
    handles.insert(enc_ctx.device_handle);
    handles.insert(enc_ctx.blk_io_handle);

    for (int fd : handles)
    {
        ni_device_close(fd);
    }

    if (p_file)
    {
        fclose(p_file);
    }

    release_egl_resource(fbo, texture, egl_display, egl_image, egl_context, sync, eglDestroyImageKHR, eglDestroySyncKHR);
    release_gbm_resource(drm_fd, gbm, bo, dma_buf_fd);

    printf("All done\n");
}