/*
 * Copyright (c) 2010 Nicolas George
 * Copyright (c) 2011 Stefano Sabatini
 * Copyright (c) 2014 Andrey Utkin
 * Copyright (C) 2024 NETINT Technologies
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/**
 * @file
 * API for ai to create netint network, put input into network,
 * get output from network
 * @example netint_network_311.c
 *
 * @added by minglong.zhang@netint.ca
 * use libxcoder api to create netint network, put input into network,
 * get output from network.
 */

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "netint_network_311.h"
#include "ni_log.h"

static int init_hwframe_scale(NiNetworkContext *network_ctx,
        int scale_format, int scale_width, int scale_height,
        int devid, int devfd, int blkfd,
        int keep_alive_timeout)
{
    int retval;
    ni_quadra_filter_t *scale_api_ctx = &network_ctx->scale_api_ctx;
    int ret = 0;

    scale_api_ctx->device_handle     = devfd;
    scale_api_ctx->blk_io_handle     = blkfd;
    scale_api_ctx->device_type       = NI_QUADRA_DEVICE_TYPE_SCALER;
    scale_api_ctx->scaler_operation  = NI_QUADRA_SCALER_OPCODE_SCALE;
    scale_api_ctx->hw_id             = devid;
    scale_api_ctx->keep_alive_timeout = keep_alive_timeout;

    retval = ni_quadra_init_filters(scale_api_ctx);
    if (retval < 0) {
        pr_err("could not open scaler session\n");
        ret = NIERROR(EIO);
        goto out;
    }

    pr_log("initialize scaler, %dx%d, format %d\n",
            scale_width, scale_height, scale_format);

    /* Create scale frame pool on device */
    retval = ni_quadra_filter_device_alloc_frame(scale_api_ctx,
                             NIALIGN(scale_width, 2),
                             NIALIGN(scale_height, 2),
                             scale_format,
                             NI_QUADRA_SCALER_FLAG_IO | NI_QUADRA_SCALER_FLAG_PC,
                             0, // rec width
                             0, // rec height
                             0, // rec X pos
                             0, // rec Y pos
                             NUM_NETWORK_FRAME, // rgba color/pool size
                             0, // frame index
                             NI_QUADRA_DEVICE_TYPE_SCALER);
    if (retval < 0) {
        pr_err("could not build frame pool\n");
        ni_quadra_filter_close(scale_api_ctx);
        ret = NIERROR(EIO);
        goto out;
    }

out:
    return ret;
}

static void cleanup_hwframe_scale(NiNetworkContext *network_ctx)
{
    ni_quadra_filter_t *scale_api_ctx = &network_ctx->scale_api_ctx;
    ni_quadra_filter_close(scale_api_ctx);
}

void ni_cleanup_network_context(NiNetworkContext *network_ctx, bool hwframe)
{
    if (network_ctx) {
        ni_quadra_filter_close(&network_ctx->npu_api_ctx);
        if (hwframe == true) {
            cleanup_hwframe_scale(network_ctx);
        }
        free(network_ctx);
    }
}

int ni_alloc_network_context(NiNetworkContext **p_network_ctx,
        bool hwframe, int devid, int keep_alive_timeout, int scale_format,
        int scale_width, int scale_height, const char *nbg_file, int device_handle, int blk_io_handle)
{
    int retval;
    NiNetworkContext *network_ctx;
    int ret;

    if ((nbg_file == NULL) || (access(nbg_file, R_OK) != 0)) {
        pr_err("invalid network binary path\n");
        return NIERROR(EINVAL);
    }

    network_ctx = (NiNetworkContext *)calloc(1, sizeof(NiNetworkContext));
    if (!network_ctx) {
        pr_err("failed to allocate network context\n");
        return NIERROR(ENOMEM);
    }

    if (hwframe) {
        network_ctx->npu_api_ctx.hw_action = NI_QUADRA_CODEC_HW_ENABLE;
    }
    network_ctx->npu_api_ctx.device_handle = device_handle;
    network_ctx->npu_api_ctx.blk_io_handle = blk_io_handle;
    network_ctx->npu_api_ctx.hw_id = devid;

    network_ctx->npu_api_ctx.device_type = NI_QUADRA_DEVICE_TYPE_AI;
    network_ctx->npu_api_ctx.keep_alive_timeout = keep_alive_timeout;
    retval = ni_quadra_init_filters(&network_ctx->npu_api_ctx);
    if (retval != 0) {
        pr_err("failed to open npu session. retval %d\n",
               retval);
        return NIERROR(EIO);
    }

    retval = ni_quadra_ai_config_network_binary(&network_ctx->npu_api_ctx,
                                         &network_ctx->network_data,
                                         nbg_file);
    if (retval != 0) {
        pr_err("failed to configure npu session. retval %d\n",
               retval);
        ret = NIERROR(EIO);
        goto failed_out;
    }

    if (scale_width != network_ctx->network_data.linfo.in_param[0].sizes[0] ||
        scale_height != network_ctx->network_data.linfo.in_param[0].sizes[1]) {
        pr_err("input dimensions not match: expect %dx%d, actual %dx%d\n",
                scale_width, scale_height,
                network_ctx->network_data.linfo.in_param[0].sizes[0],
                network_ctx->network_data.linfo.in_param[0].sizes[1]);
        ret = NIERROR(EINVAL);
        goto failed_out;
    }

    if (hwframe) {
        ret = init_hwframe_scale(network_ctx, scale_format, scale_width,
                scale_height, devid, network_ctx->npu_api_ctx.device_handle,
                network_ctx->npu_api_ctx.blk_io_handle, keep_alive_timeout);
        if (ret != 0) {
            pr_err("failed to initialize hw scale\n");
            goto failed_out;
        }
    }
    *p_network_ctx = network_ctx;
    return 0;

failed_out:
    ni_cleanup_network_context(network_ctx, hwframe);
    return ret;
}

static int ni_hwframe_dwl(NiNetworkContext *network_ctx, void *p_session_data,
        niquadraFrameSurface1_t *src_surf, int output_format)
{
    int ret = 0;
    int pixel_format;
    ni_quadra_filter_t *scale_ctx = &network_ctx->scale_api_ctx;

    switch (output_format)
    {
        case 0x103://GC620_I420
            pixel_format = NI_QUADRA_PIX_FMT_YUV420P;
            break;
        case 4://GC620_BGRA8888
            pixel_format = NI_QUADRA_PIX_FMT_RGBA;
            break;
        case 0x10C://GC620_RGB888_PLANAR
            pixel_format = NI_QUADRA_PIX_FMT_BGRP;
            break;
        default:
            pr_err("Pixel format not supported.\n");
            return -1;
    }

    ret = ni_quadra_filter_frame_buffer_alloc_dl(p_session_data,
            src_surf->ui16width, src_surf->ui16height,
            pixel_format);

    if (ret != 0)
    {
        return -1;
    }

    ni_quadra_filter_session_dl_set(scale_ctx->api_ctx, false);
    ret = ni_quadra_device_session_hwdl(scale_ctx->api_ctx, p_session_data, src_surf);
    if (ret <= 0)
    {
        ni_quadra_frame_buffer_free(p_session_data);
        return ret;
    }
    return ret;
}

int write_rawvideo_data(FILE *p_file, int width, int height, int format,
                        void *p_out_frame)
{
    if (p_file && p_out_frame)
    {
        uint8_t **p_data = ni_quadra_get_frame_data(p_out_frame);
        if (format == 0x103)//GC620_I420
        {
            int i, j;
            for (i = 0; i < 3; i++)
            {
                uint8_t *src = p_data[i];
                int write_width = width;
                int write_height = height;
                int plane_width = width;
                int plane_height = height;

                write_width *= 1;   // bit depth 1

                if (i == 1 || i == 2)
                {
                    plane_height /= 2;
                    // U/V stride size is multiple of 128, following the calculation
                    // in ni_decoder_frame_buffer_alloc
                    plane_width =
                        (((int)(write_width) / 2 * 1 + 127) / 128) * 128;
                    write_height /= 2;
                    write_width /= 2;
                }

                for (j = 0; j < plane_height; j++)
                {
                    if (j < write_height &&
                        fwrite(src, write_width, 1, p_file) != 1)
                    {
                        pr_err("Error: writing data plane %d: height %d error! ret = %d\n",
                                i, plane_height, ferror(p_file));
                    }
                    src += plane_width;
                }
            }
        } else if (format == 4)//GC620_BGRA8888
        {
            uint8_t *src = p_data[0];
            if (fwrite(src, width * height * 4, 1, p_file) != 1)
            {
                pr_err("Error: ferror rc = %d\n", ferror(p_file));
            }
        } else if (format == 0x10C)//GC620_RGB888_PLANAR
        {
            uint8_t *src;
            for (int i = 0; i < 3; i++)
            {
                src = p_data[i];
                if (fwrite(src, width * height, 1, p_file) != 1)
                {
                    pr_err("Error: ferror rc = %d\n", ferror(p_file));
                }
            }
        }

        if (fflush(p_file))
        {
            pr_err("Error: writing data frame flush failed! errno %d\n",
                    errno);
        }
    }
    return 0;
}

static int ni_scale_dwl(NiNetworkContext *network_ctx, niquadraFrameSurface1_t *src_frame,
        int width, int height, int output_format)
{
    void *hwdl_session_data = network_ctx->scale_api_ctx.dst_frame;
    static int frame_number = 0;
    int ret = ni_hwframe_dwl(network_ctx, hwdl_session_data, src_frame, output_format);
    if (ret > 0) {
        char name[256] = { 0 };
        FILE *fp;
        snprintf(name, sizeof(name), "scale/scaled_%d.dat", frame_number);
        fp = fopen(name, "wb");
        if (fp) {
            write_rawvideo_data(fp, width, height, output_format,
                    hwdl_session_data);
            fclose(fp);
        }
    }

    frame_number++;
    ni_quadra_frame_buffer_free(hwdl_session_data);
    return ret;
}

static int ni_hwframe_scale(NiNetworkContext *network_ctx,
        niquadraFrameSurface1_t *in_frame, crop_box *roi_box,
        int pic_width, int pic_height, NiNetworkFrame *out_frame)
{
    ni_quadra_filter_t *scale_api_ctx = &network_ctx->scale_api_ctx;
    int retcode;


    /*
     * Allocate device input frame. This call won't actually allocate a frame,
     * but sends the incoming hardware frame index to the scaler manager
     */
    if (roi_box) {
        retcode = ni_quadra_filter_device_alloc_frame(
            scale_api_ctx, NIALIGN(pic_width, 2), NIALIGN(pic_height, 2),
            0x103, 0, roi_box->w, roi_box->h, roi_box->x, roi_box->y,
            in_frame->ui32nodeAddress, in_frame->ui16FrameIdx, NI_QUADRA_DEVICE_TYPE_SCALER);
    } else {
        retcode = ni_quadra_filter_device_alloc_frame(
            scale_api_ctx, NIALIGN(pic_width, 2), NIALIGN(pic_height, 2),
            0x103, 0, 0, 0, 0, 0, in_frame->ui32nodeAddress, in_frame->ui16FrameIdx, NI_QUADRA_DEVICE_TYPE_SCALER);
    }

    if (retcode != 0) {
        pr_err("Can't allocate device input frame %d\n",
               retcode);
        return NIERROR(ENOMEM);
    }

    /* Allocate hardware device destination frame. This acquires a frame from
     * the pool */
    retcode = ni_quadra_filter_device_alloc_frame(
        scale_api_ctx, NIALIGN(out_frame->scale_width, 2),
        NIALIGN(out_frame->scale_height, 2),
        out_frame->scale_format, NI_QUADRA_SCALER_FLAG_IO, 0, 0,
        0, 0, 0, -1, NI_QUADRA_DEVICE_TYPE_SCALER);
    if (retcode != 0) {
        pr_err("Can't allocate device output frame %d\n",
               retcode);
        return NIERROR(ENOMEM);
    }

    retcode = ni_quadra_device_session_read_hwdesc(
            scale_api_ctx->api_ctx, out_frame->api_frame, NI_QUADRA_DEVICE_TYPE_SCALER);
    if (retcode != 0) {
        pr_err("Cannot read hwdesc\n");
        return NIERROR(EIO);
    }
    // download raw data, only for test
    if (0) {
        printf("dump scaled output, scale width %d, height %d, format %d\n",
                out_frame->scale_width, out_frame->scale_height, out_frame->scale_format);
        niquadraFrameSurface1_t *filt_frame_surface;
        filt_frame_surface = (niquadraFrameSurface1_t *)ni_quadra_filter_get_data3(out_frame->api_frame);
        filt_frame_surface->ui16width = out_frame->scale_width;
        filt_frame_surface->ui16height = out_frame->scale_height;
        filt_frame_surface->bit_depth = 1;
        filt_frame_surface->encoding_type = NI_QUADRA_PIXEL_PLANAR_FORMAT_PLANAR;
        printf("filtered frame: width %d, height %d\n", filt_frame_surface->ui16width,
                filt_frame_surface->ui16height);
        ni_scale_dwl(network_ctx, filt_frame_surface, out_frame->scale_width,
                out_frame->scale_height, out_frame->scale_format);
    }

    return 0;
}

int ni_set_network_input(NiNetworkContext *network_ctx, bool hwframe,
        void *in_frame, crop_box *roi_box,
        int pic_width, int pic_height, NiNetworkFrame *out_frame,
        bool blockable)
{
    int ret = 0;
    int retval;


    if (hwframe) {
        niquadraFrameSurface1_t *filt_frame_surface;
        ret = ni_hwframe_scale(network_ctx, (niquadraFrameSurface1_t *)ni_quadra_filter_get_data3(in_frame),
                roi_box, pic_width, pic_height, out_frame);
        if (ret != 0) {
            pr_err("Error run hwframe scale\n");
            goto out;
        }

        filt_frame_surface =
                (niquadraFrameSurface1_t *)ni_quadra_filter_get_data3(out_frame->api_frame);
        //pr_log("filt frame surface frameIdx %d\n",
        //        filt_frame_surface->ui16FrameIdx);
        //fflush(stdout);

        /* allocate output buffer */
        retval = ni_quadra_filter_device_alloc_frame(&network_ctx->npu_api_ctx, 0, 0, 0, 0, 0, 0, 0, 0,
                filt_frame_surface->ui32nodeAddress,
                filt_frame_surface->ui16FrameIdx,
                NI_QUADRA_DEVICE_TYPE_AI);
        if (retval != 0) {
            pr_err("failed to alloc hw input frame\n");
            ret = NIERROR(ENOMEM);
            goto out;
        }
    } else {
        retval = ni_quadra_device_session_write(network_ctx->npu_api_ctx.api_ctx, in_frame, NI_QUADRA_DEVICE_TYPE_AI);
        if (retval < 0) {
            return NIERROR(EIO);
        } else if (retval == 0) {
            return NIERROR(EAGAIN);
        }
    }

out:
    return ret;
}

int ni_invoke_network_inference(NiNetworkContext *network_ctx, bool hwframe)
{
    return 0;
}

int ni_get_network_output(NiNetworkContext *network_ctx, bool hwframe,
        NiNetworkFrame *out_frame, bool blockable, bool convert,
        uint8_t **data)
{
    int ret = 0;
    int retval;
    ni_quadra_filter_t *npu_api_ctx = &network_ctx->npu_api_ctx;

redo:
    retval = ni_quadra_device_session_read(npu_api_ctx->api_ctx, out_frame->api_packet, NI_QUADRA_DEVICE_TYPE_AI);
    if (retval < 0) {
        pr_err("read hwdesc retval %d\n", retval);
        ret = NIERROR(EIO);
        goto out;
    } else if (retval == 0) {
        if (blockable) {
            goto redo;
        } else {
            ret = NIERROR(EAGAIN);
            goto out;
        }
    }

    if (hwframe) {
        niquadraFrameSurface1_t *filt_frame_surface =
                (niquadraFrameSurface1_t *)ni_quadra_filter_get_data3(out_frame->api_frame);
        ni_quadra_hwframe_buffer_recycle(filt_frame_surface,
                filt_frame_surface->device_handle);
    }

    if (convert) {
        int i;
        static int frame_number = 0;
        for (i = 0; i < network_ctx->network_data.output_num; i++) {

            retval = ni_quadra_network_layer_convert_output(out_frame->api_packet,(float *)data[i],
                        ni_quadra_ai_network_layer_dims(&network_ctx->network_data.linfo.out_param[i]) * sizeof(float),
                        &network_ctx->network_data,i);
            if (retval != 0) {
                pr_err("failed to read layer %d output. retval %d\n",
                        i, retval);
                ret = NIERROR(EIO);
                goto out;
            }
        }
        frame_number++;
    }
out:
    return ret;
}

