/*
 * Copyright (c) 2001 Fabrice Bellard
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/**
 * @file
 *
 * AV1 tile decoding with multi-threading
 *
 * @added by tom.han@netint.ca
 * @updated by leo.ma@netint.ca
 */

#include <libavcodec/avcodec.h>
#if LIBAVCODEC_VERSION_MAJOR >= 59
#include <libavcodec/bsf.h>
#endif
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libavutil/time.h>
#include <libavutil/avassert.h>
#include <signal.h>
#include <unistd.h>
#ifdef __linux__
#include <sys/sysinfo.h>
#include <getopt.h>
#endif
#include <libavutil/ffversion.h>
#include <libavcodec/ni_av1_extradata.h>
//#include <libavcodec/ni_hevc_extradata.h>

#include <ni_rsrc_api.h>

#define container_of(ptr, type, member) \
    ((type *)((char *)(ptr) - (size_t)&(((type *)0)->member)))

#define list_entry(ptr, type, member) \
    container_of(ptr, type, member)

#define list_first_entry(ptr, type, field)  list_entry((ptr)->next, type, field)

struct list_head {
    struct list_head *next, *prev;
};

static inline void INIT_LIST_HEAD(struct list_head *list)
{
    list->next = list->prev = list;
}

static inline int list_empty(const struct list_head *head)
{
    return head->next == head;
}

static inline void __list_add(struct list_head *new, struct list_head *prev, struct list_head *next)
{
    next->prev = new;
    new->next = next;
    new->prev = prev;
    prev->next = new;
}

static inline void list_add(struct list_head *_new, struct list_head *head)
{
    __list_add(_new, head, head->next);
}

static inline void list_add_tail(struct list_head *_new, struct list_head *head)
{
    __list_add(_new, head->prev, head);
}

static inline void __list_del(struct list_head *entry)
{
    entry->next->prev = entry->prev;
    entry->prev->next = entry->next;
}

static inline void list_del(struct list_head *entry)
{
    __list_del(entry);
    entry->next = entry->prev = NULL;
}

struct bs_pkt_node {
    AVPacket *pkt;
    struct list_head link;
};

struct dec_frm_node {
    AVFrame *frame;
    struct list_head link;
};

struct codec_instance {
    int id;
    const char *codec_name;

    int pkt_num;
    struct list_head bs_pkt_list;

    int frm_num;
    struct list_head dec_frm_list;
    pthread_mutex_t frm_mtx;

    AVFrame *decoded_frame;
    AVCodecContext *dec_ctx;

    int send_packets;
    int decoded_frames;
};

struct codec_instance_node {
    struct codec_instance *inst;
    struct list_head link;
};

struct tile_thread_context {
    pthread_t tid;
    int nb_instances;
    struct tile_codec_context *codec_ctx;
    struct list_head inst_list;
    pthread_mutex_t inst_mtx;
    pthread_cond_t inst_cond;
};

struct tile_codec_context {
    struct tile_thread_context *workers;
    struct codec_instance *instances;
    AVBSFContext *bsf_ctx;
    pthread_mutex_t mutex;
    pthread_cond_t cond;
    int active_workers;
    int total_threads;
    int total_frm_num;
    int nb_tiles;
    int row;
    int col;
    int finish;
    int save;
};

static struct tile_codec_context *p_tile_codec_ctx;
static int assemble_frame = 0;
static AVFrame *uni_frame;
static FILE *out_fp;

static void usage(void)
{
    printf("Usage:\n"
           "-i, --input          input tiled hevc file\n"
           "-c, --codec          decoder codec name, ie. hevc, h265_ni_quadra_dec\n"
           "-o, --output         path to output YUV file, set \"null\" not save as file\n"
           "-j, --threads        number of threads to use, default value is number of processors.\n"
           "-h, --help           print this help message\n");
}

static void print_report(int is_last_report, int64_t timer_start, int64_t curr_time)
{
    int i;
    char buf[100] = {'\0'};
    double t, fps;
    size_t num, curr_frames;
    static size_t last_frames = 0;
    static int64_t last_time = -1;

    t = (curr_time - last_time) / 1000.0; // in millisecond
    if (!is_last_report) {
        if (last_time == -1) {
            last_time = curr_time;
            return;
        }
        if ((curr_time - last_time) < 500000)
            return;
        last_time = curr_time;
    }

    curr_frames = ULONG_MAX;
    for (i = 0; i < p_tile_codec_ctx->nb_tiles; i++) {
        num = p_tile_codec_ctx->instances[i].decoded_frames;
        if (curr_frames > num) {
            curr_frames = num;
        }
    }
    fps = t > 1 ? (curr_frames - last_frames) * 1000 / t : 0;
    last_frames = curr_frames;

    snprintf(&buf[0], sizeof(buf) - 1, "fps=%3.*f ", (fps < 9.95), fps);
    fprintf(stderr, "Tile decoding %s   %c", buf, is_last_report ? '\n' : '\r');

    if (is_last_report) {
        t = (curr_time - timer_start) / 1000.0; // in millisecond
        fps = curr_frames * 1000 / t;
        fprintf(stderr, "Average fps=%3.f\n", fps);
    }
    fflush(stderr);
}

static void write_save(AVFrame *frame, int width, int height, FILE* f)
{
  int i;

  if (frame->format == AV_PIX_FMT_YUV420P10LE ||
      frame->format == AV_PIX_FMT_YUV420P10BE ||
      frame->format == AV_PIX_FMT_YUV420P10)
  {
    width *= 2;
  }

  for (i = 0; i < height; i++)
    fwrite(frame->data[0] + i * frame->linesize[0], 1, width, f);
  for (i = 0; i < height / 2; i++)
    fwrite(frame->data[1] + i * frame->linesize[1], 1, width / 2, f);
  for (i = 0; i < height / 2; i++)
    fwrite(frame->data[2] + i * frame->linesize[2], 1, width / 2, f);
}

static int frame_dispatch(struct codec_instance *inst, AVFrame *frame)
{
    struct dec_frm_node *dfn;

    dfn = (struct dec_frm_node *) malloc(sizeof(*dfn));
    if (!dfn) {
        return AVERROR(ENOMEM);
    }

    dfn->frame = av_frame_alloc();
    if (!dfn->frame) {
        free(dfn);
        return AVERROR(ENOMEM);
    }

    av_frame_move_ref(dfn->frame, frame);

    pthread_mutex_lock(&inst->frm_mtx);
    list_add_tail(&dfn->link, &inst->dec_frm_list);
    inst->frm_num++;
    pthread_mutex_unlock(&inst->frm_mtx);
    return 0;
}

static struct dec_frm_node *frame_fetch(struct codec_instance *inst)
{
    struct dec_frm_node *dfn;

    pthread_mutex_lock(&inst->frm_mtx);
    if (list_empty(&inst->dec_frm_list)) {
         pthread_mutex_unlock(&inst->frm_mtx);
         return NULL;
    }

    dfn = list_first_entry(&inst->dec_frm_list, struct dec_frm_node, link);
    list_del(&dfn->link);
    inst->frm_num--;
    pthread_mutex_unlock(&inst->frm_mtx);

    return dfn;
}

static int reap_frame(void)
{
    int i, r, c, h, ret;
    struct codec_instance *inst, *instances;
    struct dec_frm_node *dfn;
    AVFrame *frame;
    int x_offset;
    int y_offset;
    int this_width;
    int this_height;

    instances = p_tile_codec_ctx->instances;
    if (!uni_frame->buf[0]) {
        av_assert0(p_tile_codec_ctx->col * p_tile_codec_ctx->row == p_tile_codec_ctx->nb_tiles);

        /* check each tile column and row size */
        for (c = 0; c < p_tile_codec_ctx->col; c++) {
            int first_width = instances[c].dec_ctx->width;
            for (r = 1; r < p_tile_codec_ctx->row; r++) {
                if (first_width != instances[r * p_tile_codec_ctx->col + c].dec_ctx->width) {
                    av_log(NULL, AV_LOG_ERROR, "ERROR: invalid width. first width %d, width %d\n",
                           first_width, instances[r * p_tile_codec_ctx->col + c].dec_ctx->width);
                    return AVERROR(EINVAL);
                }
            }
        }

        for (r = 0; r < p_tile_codec_ctx->row; r++) {
            int first_height = instances[r * p_tile_codec_ctx->col].dec_ctx->height;
            for (c = 1; c < p_tile_codec_ctx->col; c++) {
                if (first_height != instances[r * p_tile_codec_ctx->col + c].dec_ctx->height) {
                    av_log(NULL, AV_LOG_ERROR, "ERROR: invalid height. first height %d, height %d\n",
                           first_height, instances[r * p_tile_codec_ctx->col + c].dec_ctx->height);
                    return AVERROR(EINVAL);
                }
            }
        }

        x_offset = y_offset = 0;
        for (i = 0; i < p_tile_codec_ctx->col; i++) {
            av_log(NULL, AV_LOG_DEBUG, "col %d width %d, x_offset %d\n", i,
                   instances[i].dec_ctx->width, x_offset);
            x_offset += instances[i].dec_ctx->width;
        }

        for (i = 0; i < p_tile_codec_ctx->row; i++) {
            av_log(NULL, AV_LOG_DEBUG, "row %d height %d, y_offset %d\n", i,
                   instances[i * p_tile_codec_ctx->col].dec_ctx->height, y_offset);
            y_offset += instances[i * p_tile_codec_ctx->col].dec_ctx->height;
        }

        av_log(NULL, AV_LOG_DEBUG, "x_offset %d, y_offset %d\n", x_offset, y_offset);

        uni_frame->format = instances[0].dec_ctx->pix_fmt;
        uni_frame->width = x_offset;
        uni_frame->height = y_offset;

        ret = av_frame_get_buffer(uni_frame, 32);
        if (ret < 0) {
            return ret;
        }

        ret = av_frame_make_writable(uni_frame);
        if (ret < 0) {
            return ret;
        }
    }

    av_log(NULL, AV_LOG_DEBUG, "uni_frame pixel %d/%d, linesize %d/%d/%d\n",
           uni_frame->width, uni_frame->height, uni_frame->linesize[0],
           uni_frame->linesize[1], uni_frame->linesize[2]);

    frame = NULL;
    dfn = NULL;
    y_offset = 0;
    for (r = 0; r < p_tile_codec_ctx->row; r++) {
        x_offset = 0;
        for (c = 0; c < p_tile_codec_ctx->col; c++) {
            inst = &instances[r * p_tile_codec_ctx->col + c];
            dfn = frame_fetch(inst);
            av_assert0(dfn && dfn->frame);
            frame = dfn->frame;
            av_assert0(frame->width == inst->dec_ctx->width &&
                       frame->height == inst->dec_ctx->height);

            this_width = frame->width;
            this_height = frame->height;

            av_log(NULL, AV_LOG_DEBUG, "x_offset %d, y_offset %d\n", x_offset, y_offset);
            av_log(NULL, AV_LOG_DEBUG, "tile %d/%d, frame pixel %d/%d, linesize %d/%d/%d\n",
                                       c, r, frame->width, frame->height, frame->linesize[0],
                                       frame->linesize[1], frame->linesize[2]);

            /* assemble frame */
            for (h = 0; h < frame->height; h++) {
                //Y
                memcpy(uni_frame->data[0] + (h + y_offset) * uni_frame->linesize[0] + x_offset,
                       frame->data[0] + h * frame->linesize[0], frame->width);
            }

            for (h = 0; h < frame->height / 2; h++) {
                //U
                memcpy(uni_frame->data[1] + (h + y_offset / 2) * uni_frame->linesize[1] + x_offset / 2,
                       frame->data[1] + h * frame->linesize[1], frame->width / 2);
                //V
                memcpy(uni_frame->data[2] + (h + y_offset / 2) * uni_frame->linesize[2] + x_offset / 2,
                       frame->data[2] + h * frame->linesize[2], frame->width / 2);
            }

            x_offset += this_width;

            av_frame_free(&frame);
            free(dfn);
        }

        y_offset += this_height;
    }

    if (p_tile_codec_ctx->save) {
        write_save(uni_frame, uni_frame->width, uni_frame->height, out_fp);
    }

    return 0;
}

static inline int frame_available(void)
{
    int i;

    for (i = 0; i < p_tile_codec_ctx->nb_tiles; i++) {
        if (p_tile_codec_ctx->instances[i].frm_num == 0) {
            return 0;
        }
    }

    return 1;
}

static void frame_assemble_sync(int assemble, int64_t t0)
{
    int64_t t1;
    int done = 0;

    if (!assemble) {
        return;
    }

    while (!p_tile_codec_ctx->finish) {
        t1 = av_gettime_relative();
        pthread_mutex_lock(&p_tile_codec_ctx->mutex);
        if (p_tile_codec_ctx->active_workers == 0) {
            done = 1;
        }
        pthread_mutex_unlock(&p_tile_codec_ctx->mutex);

        if (frame_available()) {
            reap_frame();
        } else if (done) {
            break;
        }

        print_report(0, t0, t1);
    }
}

static void frame_decode_sync(void)
{
    pthread_mutex_lock(&p_tile_codec_ctx->mutex);
    while (p_tile_codec_ctx->active_workers < p_tile_codec_ctx->total_threads) {
        pthread_cond_wait(&p_tile_codec_ctx->cond, &p_tile_codec_ctx->mutex);
    }
    pthread_mutex_unlock(&p_tile_codec_ctx->mutex);
}

/* Send one packet to decoder, attempt to read all available decoded frames from decoder,
 * saving YUV to file. If run with with *pkt=NULL, flush the decoder of remaining frames
 * until decoder reports EOF
 */
static int do_decode(struct codec_instance *inst, AVFrame *frame, AVPacket *pkt)
{
    int ret;
    AVCodecContext *dec_ctx = inst->dec_ctx;

    if (pkt == NULL) {
        av_log(NULL, AV_LOG_DEBUG, "stream %d: flush\n", inst->id);
    } else {
        inst->send_packets++;
        av_log(NULL, AV_LOG_DEBUG, "stream %d: send frame %d\n", inst->id, inst->send_packets);
    }

    // Send a frame packet to decoder
    ret = avcodec_send_packet(dec_ctx, pkt);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "stream %d: Error sending a packet for decoding\n", inst->id);
        return ret;
    }

    /* Run this loop if a packet was sucessfully sent during avcodec_send_packet() OR
     * no packet was sent because this function is in flushing mode OR
     * a frame was sucessfully received by avcodec_receive_frame()
     */
    while (ret >= 0 || !pkt) {
        // Attempt to retreive a decoded frame
        ret = avcodec_receive_frame(dec_ctx, frame);

        // Failed to get a frame because decoder output buffer is empty
        if (ret == AVERROR(EAGAIN)) {
            // If function not in flushing mode
            if (pkt) {
                return ret; // Exit this function to send next packet
            } else {
                continue; // Loop in this function to flush decoder
            }
        }
        // Decoder reports EOF, flushing is complete
        else if (ret == AVERROR_EOF) {
            av_log(NULL, AV_LOG_INFO, "stream %d: decoder EOF reached\n", inst->id);
            av_log(NULL, AV_LOG_WARNING, "Waiting for YUV file writing...\n");
            return ret;
        }
        // Decoding error occured, possibly bad packet in input
        else if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "stream %d: Error during decoding: %s\n", inst->id, av_err2str(ret));
            return ret;
        }

        inst->decoded_frames++;

        if (assemble_frame) {
            ret = frame_dispatch(inst, frame);
            if (ret < 0) {
                av_log(NULL, AV_LOG_ERROR, "stream %d: failed to add frame\n", inst->id);
                return ret;
            }
        } else {
            av_frame_unref(frame);
        }

        av_log(NULL, AV_LOG_DEBUG, "stream %d: receive frame %d\n", inst->id,
               inst->decoded_frames);
    }

    return ret;
}

static void *decode_routine(void *arg)
{
    struct tile_thread_context *worker = (struct tile_thread_context *) arg;
    struct codec_instance *inst;
    struct bs_pkt_node *bpn;
    struct codec_instance_node *cin;
    struct AVPacket *pkt;
    struct tile_codec_context *codec_ctx = worker->codec_ctx;

    av_log(NULL, AV_LOG_INFO, "tile_thread_context %ld: enter\n", worker->tid);

    // I am ready
    pthread_mutex_lock(&codec_ctx->mutex);
    codec_ctx->active_workers++;
    if (codec_ctx->active_workers == codec_ctx->total_threads) {
        pthread_cond_signal(&codec_ctx->cond);
    }
    pthread_mutex_unlock(&codec_ctx->mutex);

    for (; ;) {
        pthread_mutex_lock(&worker->inst_mtx);

        while (list_empty(&worker->inst_list)) {
            pthread_cond_wait(&worker->inst_cond, &worker->inst_mtx);
        }

        if (codec_ctx->finish) {
            pthread_mutex_unlock(&worker->inst_mtx);
            break;
        }

        cin = list_first_entry(&worker->inst_list, struct codec_instance_node, link);
        av_assert0(cin != NULL && cin->inst != NULL);
        list_del(&cin->link);
        inst = cin->inst;
        worker->nb_instances--;

        pthread_mutex_unlock(&worker->inst_mtx);

        // fetch packet
        bpn = list_first_entry(&inst->bs_pkt_list, struct bs_pkt_node, link);
        av_assert0(bpn != NULL);
        list_del(&bpn->link);
        inst->pkt_num--;
        pkt = bpn->pkt;
        free(bpn);
        free(cin);

        if (!pkt) {
            av_log(NULL, AV_LOG_INFO, "stream %d: receive flush decoder pkt, pkt_num %d\n",
                   inst->id, inst->pkt_num);
            break;
        } else {
            do_decode(inst, inst->decoded_frame, pkt);
            av_packet_free(&pkt);
        }
    }

    // Flush decoder
    av_log(NULL, AV_LOG_DEBUG, "stream %d: ready to flush decoder\n", inst->id);
    do_decode(inst, inst->decoded_frame, NULL);

    // Release tile codec context condvar.
    pthread_mutex_lock(&codec_ctx->mutex);
    codec_ctx->active_workers--;
    pthread_mutex_unlock(&codec_ctx->mutex);

    return (void *) 0;
}

static int av1_frame_split_bsf_init(AVStream *stream)
{
    int ret = 0;
    const AVBitStreamFilter *filter;
    const char *bsf_name = "ni_av1_frame_split";

    filter = av_bsf_get_by_name(bsf_name);
    if (!filter) {
        av_log(NULL, AV_LOG_ERROR, "can not find bsf: %s\n", bsf_name);
        return AVERROR(EINVAL);
    }

    ret = av_bsf_alloc(filter, &p_tile_codec_ctx->bsf_ctx);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "failed to allocate bsf\n");
        return ret;
    }

    ret = avcodec_parameters_copy(p_tile_codec_ctx->bsf_ctx->par_in, stream->codecpar);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "failed to copy parameters to bsf\n");
        goto out;
    }

    ret = av_bsf_init(p_tile_codec_ctx->bsf_ctx);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "failed to initialize bitstream filter\n");
        goto out;
    }

    return 0;

out:
    av_bsf_free(&p_tile_codec_ctx->bsf_ctx);
    return ret;
}

static int decoder_open(AVCodecContext **ctx, AVStream *st, const char *codec_name)
{
    int ret;
    const AVCodec *dec;
    AVCodecContext *dec_ctx;

    dec = avcodec_find_decoder_by_name(codec_name);
    if (!dec) {
        av_log(NULL, AV_LOG_ERROR, "failed to find codec %s\n", codec_name);
        return AVERROR(EINVAL);
    }

    // Allocate a codec context for the decoder
    dec_ctx = avcodec_alloc_context3(dec);
    if (!dec_ctx) {
        av_log(NULL, AV_LOG_ERROR, "failed to allocate the codec context\n");
        return AVERROR(ENOMEM);
    }

    // Copy codec parameters from input stream to output codec context
    if ((ret = avcodec_parameters_to_context(dec_ctx, st->codecpar)) < 0) {
        av_log(NULL, AV_LOG_ERROR, "failed to copy codec parameters to decoder context\n");
        avcodec_free_context(&dec_ctx);
        return ret;
    }

    /* For some codecs, such as msmpeg4 and mpeg4, width and height
     * MUST be initialized there because this information is not
     * available in the bitstream.
     */
    dec_ctx->width = 1920;
    dec_ctx->height = 1080;
    dec_ctx->time_base = (AVRational){1, 25};
    dec_ctx->framerate = (AVRational){25, 1};

    ret = avcodec_open2(dec_ctx, dec, NULL);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "failed to open codec\n");
        avcodec_free_context(&dec_ctx);
        return ret;
    }

    *ctx = dec_ctx;
    return 0;
}

/* For an AVFormatContext, select first stream of AVMediaType and
 * setup its decoder AVCodecContext
 */
static int input_file_open(AVFormatContext *fmt_ctx, enum AVMediaType type,
                           int *stream_index, int *tile_num)
{
    int i, ret;
    AVStream *st;
    AVCodecParameters *p_codecpar;
    const AVCodecDescriptor *p_desc;

    av_log(NULL, AV_LOG_DEBUG, "open_codec_context()\n");
    ret = avformat_find_stream_info(fmt_ctx, NULL);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Could not find %s stream in input file \n",
               av_get_media_type_string(type));
        return ret;
    }

    // Find lowest indexed video stream
    *stream_index = 0;
    for (i = 0; i < fmt_ctx->nb_streams; i++) {
        if (fmt_ctx->streams[i]->codecpar->codec_type == type) {
            av_log(NULL, AV_LOG_INFO, "found first %s stream at index %d\n",
                   av_get_media_type_string(type), i);
            *stream_index = i;
            break;
        }
    }

    st = fmt_ctx->streams[*stream_index];
    p_codecpar = st->codecpar;
    if (p_codecpar->codec_id != AV_CODEC_ID_AV1) {
        av_log(NULL, AV_LOG_ERROR, "this decoder does not support this codec. ");
        p_desc = avcodec_descriptor_get(p_codecpar->codec_id);
        p_desc ? av_log(NULL, AV_LOG_ERROR, "Name: %s\n", p_desc->name) : \
                 av_log(NULL, AV_LOG_ERROR, "ID: %d\n", p_codecpar->codec_id);
        return AVERROR(EINVAL);
    }

    if (p_codecpar->extradata && p_codecpar->extradata_size) {
        ret = av_av1_extract_tiles_from_extradata(p_codecpar->extradata,
                                                  p_codecpar->extradata_size,
                                                  &p_tile_codec_ctx->row,
                                                  &p_tile_codec_ctx->col);

        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "failed to extract tiles\n");
            if (tile_num) {
                *tile_num = 0;
            }
        } else {
            av_log(NULL, AV_LOG_INFO, "number of tiles is %dx%d\n",
                   p_tile_codec_ctx->col, p_tile_codec_ctx->row);
            if (tile_num) {
                *tile_num = p_tile_codec_ctx->row * p_tile_codec_ctx->col;
            }
        }
    }

    return 0;
}

static int codec_instance_init(struct codec_instance *inst, AVStream *stream)
{
    int ret;
    char name[64] = {0};

    inst->pkt_num = 0;
    inst->frm_num = 0;
    INIT_LIST_HEAD(&inst->bs_pkt_list);
    INIT_LIST_HEAD(&inst->dec_frm_list);

    ret = decoder_open(&inst->dec_ctx, stream, inst->codec_name);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "failed to open decode codec %d\n", inst->id);
        return ret;
    }
    else if (inst->dec_ctx->pix_fmt == AV_PIX_FMT_YUV420P10LE ||
             inst->dec_ctx->pix_fmt == AV_PIX_FMT_YUV420P10BE ||
             inst->dec_ctx->pix_fmt == AV_PIX_FMT_YUV420P10) {
        av_log(NULL, AV_LOG_ERROR, "ERROR: decoding of 10 bit bitstream not supported \n");
        ret = AVERROR(EINVAL);
        goto fail_alloc_frame;
    }

    inst->decoded_frame = av_frame_alloc();
    if (inst->decoded_frame == NULL) {
        av_log(NULL, AV_LOG_ERROR, "failed to allocate frame %d\n", inst->id);
        ret = AVERROR(ENOMEM);
        goto fail_alloc_frame;
    }

    ret = pthread_mutex_init(&inst->frm_mtx, NULL);
    if (ret) {
        goto fail_init_frm_mtx;
    }

    snprintf(name, sizeof(name) - 1, "output-%d.%s", inst->id, inst->codec_name);
    return 0;

fail_init_frm_mtx:
    av_frame_free(&inst->decoded_frame);
fail_alloc_frame:
    avcodec_free_context(&inst->dec_ctx);

    return ret;
}

static int tile_thread_context_init(struct tile_thread_context *worker)
{
    int ret;

    ret = pthread_mutex_init(&worker->inst_mtx, NULL);
    if (ret) {
        goto end;
    }

    ret = pthread_cond_init(&worker->inst_cond, NULL);
    if (ret) {
        goto end;
    }

    return 0;

end:
    pthread_mutex_destroy(&worker->inst_mtx);
    pthread_cond_destroy(&worker->inst_cond);

    return ret;
}

static void codec_instance_cleanup(struct codec_instance *inst)
{
    if (inst) {
        av_frame_free(&inst->decoded_frame);
        avcodec_free_context(&inst->dec_ctx);
        pthread_mutex_destroy(&inst->frm_mtx);
    }
}

static void packet_dispatch(struct tile_thread_context *worker,
                            struct codec_instance *inst, AVPacket *pkt)
{
    struct bs_pkt_node *bpn;
    struct codec_instance_node *cin;

    bpn = (struct bs_pkt_node *) malloc(sizeof(*bpn));
    av_assert0(bpn);
    bpn->pkt = pkt;
    list_add_tail(&bpn->link, &inst->bs_pkt_list);
    inst->pkt_num++;

    cin = (struct codec_instance_node *) malloc(sizeof(*cin));
    av_assert0(cin);
    cin->inst = inst;

    pthread_mutex_lock(&worker->inst_mtx);
    list_add_tail(&cin->link, &worker->inst_list);
    worker->nb_instances++;
    pthread_cond_signal(&worker->inst_cond);
    pthread_mutex_unlock(&worker->inst_mtx);
}

static int packet_split_bsf(AVPacket *pkt)
{
    int ret;
    int id;
    uint8_t *side_data;
    AVPacket *splited_pkt;
    struct codec_instance *inst;
    struct tile_thread_context *worker;

    if (pkt) {
        av_log(NULL, AV_LOG_DEBUG, "send bsf: size %d,dts %ld,pts %ld,elems %d\n",
               pkt->size, pkt->dts, pkt->pts, pkt->side_data_elems);
    } else {
        av_log(NULL, AV_LOG_DEBUG, "flush bsf\n");
    }

    ret = av_bsf_send_packet(p_tile_codec_ctx->bsf_ctx, pkt);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "failed to send packet to bsf\n");
        goto out;
    }

    for (; ;) {
        splited_pkt = av_packet_alloc();
        av_assert0(splited_pkt);

        ret = av_bsf_receive_packet(p_tile_codec_ctx->bsf_ctx, splited_pkt);
        if (ret == AVERROR(EAGAIN)) {
            break;
        } else if (ret < 0) {
            if (ret == AVERROR_EOF) {
                av_log(NULL, AV_LOG_INFO, "Main thread: split bsf eof reached\n");
            } else {
                av_log(NULL, AV_LOG_ERROR, "Failed to receive packet from bsf: %s\n", av_err2str(ret));
            }
            break;
        }

        side_data = av_packet_get_side_data(splited_pkt, AV_PKT_DATA_SLICE_ADDR, NULL);
        id = side_data ? *((int *)side_data) : 0;
        av_log(NULL, AV_LOG_DEBUG, "recv bsf: size %d,dts %ld,pts %ld,elems %d,tile %d\n",
               splited_pkt->size, splited_pkt->dts, splited_pkt->pts,
               splited_pkt->side_data_elems, id);

        worker = &p_tile_codec_ctx->workers[id % p_tile_codec_ctx->total_threads];
        inst = &p_tile_codec_ctx->instances[id];
        packet_dispatch(worker, inst, splited_pkt);
    }

    p_tile_codec_ctx->total_frm_num++;

out:
    av_packet_free(&splited_pkt);
    return ret;
}

static int get_ni_devices_cnt(ni_device_type_t dev_type)
{
    int dev_cnt = -1;
    // Store ni_device_t in heap as it is ~450KB
    ni_device_t *coders = malloc(sizeof(ni_device_t));

    if (NULL == coders) {
        av_log(NULL, AV_LOG_ERROR, "Error failed to malloc ni_device_t\n");
        return -1;
    }
    memset(coders, 0, sizeof(ni_device_t));

    if (ni_rsrc_list_all_devices(coders) != NI_RETCODE_SUCCESS) {
        av_log(NULL, AV_LOG_ERROR, "Failed to get available xcoders.\n");
        free(coders);
        return -1;
    }

    dev_cnt = coders->xcoder_cnt[dev_type];
    free(coders);

    return dev_cnt;
}

static void sigint_handler(int signo)
{
    p_tile_codec_ctx->finish = 1;
    av_log(NULL, AV_LOG_INFO, "%s(): signo %d\n", __func__, signo);
}

int main(int argc, char **argv)
{
    int64_t t0, t1;
    const char *filename = NULL;
    const char *output_filename = NULL;
    AVPacket pkt;
    AVFormatContext *fmt_ctx = NULL;
    int video_stream_idx = -1;
    int i, ret, nb_tiles, nb_procs, nb_threads = 0;
    const char *codec_name = NULL;
    int opt, opt_index;
    const char *opt_string = "ai:o:c:j:h";
    static struct option long_options[] = {
        {"input",    required_argument, NULL, 'i'},
        {"codec",    required_argument, NULL, 'c'},
        {"threads",  required_argument, NULL, 'j'},
        {"output",   optional_argument, NULL, 'o'},
        {"assemble", no_argument,       NULL, 'a'},
        {"help",     no_argument,       NULL, 'h'},
        { NULL,      0,                 NULL,  0 },
    };

    while ((opt = getopt_long(argc, argv, opt_string, long_options, &opt_index)) != -1) {
        switch (opt) {
        case 'i':
            filename = optarg;
            break;
        case 'c':
            codec_name = optarg;
            break;
        case 'o':
            output_filename = optarg;
            assemble_frame = 1;
            break;
        case 'a':
            assemble_frame = 1;
            break;
        case 'j':
            nb_threads = atoi(optarg);
            break;
        case 'h':
            usage();
            return 0;
        default:
            usage();
            return -1;
        }
    }

    if (!filename) {
        usage();
        return -1;
    }

    if (!codec_name) {
        codec_name = "libaom-av1";
    }

    // Signal handler
    if (SIG_ERR == signal(SIGINT, sigint_handler)) {
        av_log(NULL, AV_LOG_ERROR, "failed to set signal handler\n");
        ret = errno;
        goto end;
    }

    av_log_set_level(AV_LOG_INFO);

    p_tile_codec_ctx = av_mallocz(sizeof(*p_tile_codec_ctx));
    if (p_tile_codec_ctx == NULL) {
        av_log(NULL, AV_LOG_ERROR, "failed to allocate tile_codec_context\n");
        ret = -ENOMEM;
        goto end;
    }

    av_log(NULL, AV_LOG_INFO, "avformat_open_input\n");
    // Open input file and allocate format context
    ret = avformat_open_input(&fmt_ctx, filename, NULL, NULL);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Could not open source file %s\n", filename);
        return -AVUNERROR(ret);
    }

    av_log(NULL, AV_LOG_INFO, "input_file_open\n");
    // Setup decoder context for video stream from input_file
    ret = input_file_open(fmt_ctx, AVMEDIA_TYPE_VIDEO, &video_stream_idx, &nb_tiles);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Could not open codec context\n");
        return -AVUNERROR(ret);
    }
    av_log(NULL, AV_LOG_INFO, "input_file_open done\n");

    if (nb_tiles == 0) {
        av_log(NULL, AV_LOG_ERROR, "non tiles video stream. not support yet\n");
        ret = -AVERROR(EINVAL);
        goto end;
    }

    ret = av1_frame_split_bsf_init(fmt_ctx->streams[video_stream_idx]);
    if (ret) {
        av_log(NULL, AV_LOG_ERROR, "failed to init hevc frame split bsf.\n");
        return -AVUNERROR(ret);
    }

    // Print information regarding input file to stderr
    av_dump_format(fmt_ctx, 0, filename, 0);

    nb_procs = get_nprocs();
    if (nb_threads > 0) {
        if (nb_threads > nb_tiles) {
            av_log(NULL, AV_LOG_WARNING, "The number of threads %d cannot "
                   "exceed number of tiles %d. ", nb_threads, nb_tiles);
            nb_threads = nb_procs < nb_tiles ? nb_procs : nb_tiles;
            av_log(NULL, AV_LOG_WARNING, "Change the number of threads into "
                   "%d.\n ", nb_threads);
        }
    } else {
        nb_threads = nb_procs < nb_tiles ? nb_procs : nb_tiles;
    }

    p_tile_codec_ctx->nb_tiles = nb_tiles;
    p_tile_codec_ctx->total_threads = nb_threads;
    p_tile_codec_ctx->save = 0;

    av_log(NULL, AV_LOG_DEBUG, "number of tiles %d number of threads %d\n", nb_tiles, nb_threads);


    if (output_filename) {
        if (strcmp(output_filename, "null")) {
            p_tile_codec_ctx->save = 1;
            out_fp = fopen(output_filename, "wb");
            if (!out_fp) {
                av_log(NULL, AV_LOG_ERROR, "failed to open output file %s\n", strerror(errno));
                ret = -errno;
                goto end;
            }
        }
    }

    uni_frame = av_frame_alloc();
    if (!uni_frame) {
        av_log(NULL, AV_LOG_ERROR, "failed to allocate uni_frame\n");
        ret = AVERROR(ENOMEM);
        goto end;
    }

    ret = pthread_mutex_init(&p_tile_codec_ctx->mutex, NULL);
    if (ret) {
        av_log(NULL, AV_LOG_ERROR, "failed to init tile_codec_context mutex\n");
        ret = AVERROR(ENOMEM);
        goto end;
    }

    ret = pthread_cond_init(&p_tile_codec_ctx->cond, NULL);
    if (ret) {
        av_log(NULL, AV_LOG_ERROR, "failed to init tile_codec_context condvar\n");
        ret = AVERROR(ENOMEM);
        goto end;
    }

    p_tile_codec_ctx->instances = av_mallocz(sizeof(struct codec_instance) * nb_tiles);
    if (p_tile_codec_ctx->instances == NULL) {
        av_log(NULL, AV_LOG_ERROR, "failed to allocate codec_instances\n");
        ret = AVERROR(ENOMEM);
        goto end;
    }

    av_log(NULL, AV_LOG_DEBUG, "video_stream_idx %d codec_name %s total_threads %d\n", video_stream_idx, codec_name, p_tile_codec_ctx->total_threads);

    for (i = 0; i < nb_tiles; i++) {
        struct codec_instance *inst = &p_tile_codec_ctx->instances[i];
        inst->id = i;
        inst->codec_name = codec_name;
        ret = codec_instance_init(inst, fmt_ctx->streams[video_stream_idx]);
        if (ret) {
            av_log(NULL, AV_LOG_ERROR, "failed to initialize codec instance %d\n", i);
            ret = -AVUNERROR(ret);
            goto end;
        }
    }

    p_tile_codec_ctx->workers = av_mallocz(sizeof(struct tile_thread_context) * p_tile_codec_ctx->total_threads);
    if (p_tile_codec_ctx->workers == NULL) {
        av_log(NULL, AV_LOG_ERROR, "failed to allocate codec threads\n");
        ret = AVERROR(ENOMEM);
        goto end;
    }

    for (i = 0; i < p_tile_codec_ctx->total_threads; i++) {
        struct tile_thread_context *worker = &p_tile_codec_ctx->workers[i];
        worker->nb_instances = 0;
        worker->codec_ctx = p_tile_codec_ctx;
        INIT_LIST_HEAD(&worker->inst_list);

        ret = tile_thread_context_init(worker);
        if (ret) {
            av_log(NULL, AV_LOG_ERROR, "failed to initialize codec inst %d\n", i);
            ret = -AVUNERROR(ret);
            goto end;
        }

        ret = pthread_create(&worker->tid, NULL, &decode_routine, worker);
        if (ret) {
            av_log(NULL, AV_LOG_ERROR, "failed to create thread %d: %s\n", i, strerror(errno));
            goto end;
        }
    }

    // Wait for all threads running...
    frame_decode_sync();

    // Read packets from file and decode them
    t0 = av_gettime_relative();
    while (!p_tile_codec_ctx->finish && av_read_frame(fmt_ctx, &pkt) >= 0) {
        t1 = av_gettime_relative();
        if (pkt.stream_index == video_stream_idx) {
            ret = packet_split_bsf(&pkt);
            if (ret != AVERROR(EAGAIN)) {
                ret = -AVUNERROR(ret);
                break;
            }
        }
        av_packet_unref(&pkt);
        print_report(0, t0, t1);
    }

    // Flush decoder
    av_log(NULL, AV_LOG_INFO, "Main thread: ready to flush all decoders\n");
    packet_split_bsf(NULL);
    for (i = 0; i < nb_tiles; i++) {
        packet_dispatch(&p_tile_codec_ctx->workers[i % p_tile_codec_ctx->total_threads],
                        &p_tile_codec_ctx->instances[i], NULL);
    }

    // Assemble all decoded frames
    frame_assemble_sync(assemble_frame, t0);
    print_report(1, t0, av_gettime_relative());

    // Finish all threads
    p_tile_codec_ctx->finish = 1;
    for (i = 0; i < p_tile_codec_ctx->total_threads; i++) {
        struct tile_thread_context *worker = &p_tile_codec_ctx->workers[i];
        void *result;

        av_log(NULL, AV_LOG_INFO, "thread %ld is going to exit\n", worker->tid);
        pthread_mutex_lock(&worker->inst_mtx);
        pthread_cond_signal(&worker->inst_cond);
        pthread_mutex_unlock(&worker->inst_mtx);

        if (pthread_join(worker->tid, &result) == 0) {
            if ((long) result != 0) {
                ret = (int)((long) result);
                av_log(NULL, AV_LOG_WARNING, "thread %ld exit result %d\n", worker->tid, ret);
            }
        }

        pthread_mutex_destroy(&worker->inst_mtx);
        pthread_cond_destroy(&worker->inst_cond);
    }

    for (i = 0; i < nb_tiles; i++) {
        codec_instance_cleanup(&p_tile_codec_ctx->instances[i]);
    }

    av_log(NULL, AV_LOG_INFO, "main thread is going to exit\n");
    ret = 0;

end:
    pthread_mutex_destroy(&p_tile_codec_ctx->mutex);
    pthread_cond_destroy(&p_tile_codec_ctx->cond);
    av_bsf_free(&p_tile_codec_ctx->bsf_ctx);
    avformat_close_input(&fmt_ctx);
    av_frame_free(&uni_frame);
    free(p_tile_codec_ctx->instances);
    free(p_tile_codec_ctx->workers);
    free(p_tile_codec_ctx);
    if (out_fp) {
        av_log(NULL, AV_LOG_WARNING, "Waiting for YUV file sync...\n");
        fclose(out_fp);
    }
    av_log(NULL, AV_LOG_INFO, "End. retval = %d\n", ret);
    return ret;
}
