/*
 * Copyright (c) 2010 Nicolas George
 * Copyright (c) 2011 Stefano Sabatini
 * Copyright (c) 2014 Andrey Utkin
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/**
 * @file
 * API example for decoding, ni_xstack filtering and encoding pipeline
 * @example ni_xstack.c
 *
 * use multiple threads to run filtering/transcoding.
 */

#define _GNU_SOURCE
#include <stdlib.h>
#include <unistd.h>
#include <getopt.h>
#include <pthread.h>
#include <signal.h>
#include <assert.h>
#include <string.h>

#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#include <libavutil/pixdesc.h>
#include <libavutil/time.h>
#include <libavutil/avassert.h>
#include <libavutil/channel_layout.h>
#include <libavutil/fifo.h>
#include <libswscale/swscale.h>
#include <ni_device_api.h>

#define NI_MAX_XSTACK_INPUTS  50
#define NI_MAX_XSTACK_FILTER  4
#define NI_MAX_XSTACK_OUTPUTS 4
#define NI_MAX_DEC_CAPACITY   3
#define NI_MAX_ENC_CAPACITY   2
#define NI_MAX_MUX_CAPACITY   25
#define NI_MAX_DEMUX_CAPACITY 25

#define MAX_WIDTH             7680
#define MAX_HEIGHT            4800
#define MIN_WIDTH             128
#define MIN_HEIGHT            96

#define DEFAULT_TIME_BASE     1200000
#define DEFAULT_FPS           25
#define DEFAULT_YUV_PTS_STEP  DEFAULT_TIME_BASE / DEFAULT_FPS

#define IS_FFMPEG_61_AND_ABOVE                                                \
    ((LIBAVFILTER_VERSION_MAJOR > 9) ||                                       \
     (LIBAVFILTER_VERSION_MAJOR == 9 && LIBAVFILTER_VERSION_MINOR >= 12))

#define IS_FFMPEG_70_AND_ABOVE                                                \
    ((LIBAVFILTER_VERSION_MAJOR > 10) ||                                      \
     (LIBAVFILTER_VERSION_MAJOR == 10 && LIBAVFILTER_VERSION_MINOR >= 1))

#if IS_FFMPEG_70_AND_ABOVE
#define NIFifo AVFifo
#else
#define NIFifo AVFifoBuffer
#endif

#define FREE_AND_NULLIFY(ptr) do { \
    if ((ptr) != NULL) {           \
        free(ptr);                 \
        (ptr) = NULL;              \
    }                              \
} while(0)

typedef struct decoder_worker decoder_worker;
typedef struct encoder_worker encoder_worker;
typedef struct filter_worker filter_worker;
typedef struct demuxer_worker demuxer_worker;
typedef struct muxer_worker muxer_worker;

// common struct
typedef struct common {
    pthread_mutex_t lock;

    int exit_filt_num;
    int exit_dec_num;
    int exit_enc_num;
    int exit_demux_num;
    int exit_mux_num;
} common;

typedef enum {
    UNKNOW,
    HW_VIDEO,
    SW_VIDEO,
    SW_PICTURE,
} input_type;

// Program input parameters for decoders filters encoders
typedef struct input_info {
    input_type type;
    char input_name[256];
    char decoder_name[32];
    char decoder_params[256];

    int input_width;
    int input_height;
    int scaled_width;
    int scaled_height;
} input_info;

typedef struct filter_info {
    char filter_desc[2048];
} filter_info;

typedef struct output_info {
    char output_name[256];
} output_info;

typedef enum thread_state_t {
    THREAD_STATE_RUNNING = 0,
    THREAD_STATE_EXIT_ISSUED,
    THREAD_STATE_EXIT_PROCESSED,
    INVALID_THREAD_STATE,
} thread_state_t;

typedef enum {
    FILT_DEC_FRAME_NEED = 0,
    FILT_DEC_FRAME_VALID,     ///< valid frame, send to filter source
    FILT_DEC_FRAME_SKIP,      ///< skip sending frame
    FILT_DEC_FRAME_EOS,       ///< dec get eos filt get NULL
    FILT_DEC_FRAME_EXIT       ///< exit filter
} filt_dec_frame_t;

// filter thread parameters
// one xstack entry contains a buffersrc that is connected to a decoder
typedef struct _ni_xstack_entry {
    pthread_mutex_t lock;
    pthread_cond_t  frame_cond;
    pthread_cond_t  eos_cond;

    char name[16];
    int eos_flag;

    NIFifo *dec_frame_fifo;
    int list_block;

    AVFrame *first_frame;
    AVFrame *last_frame;
    AVFilterContext *buffersrc_ctx;
    AVBufferSrcParameters *buffersrc_par;

    decoder_worker *worker;
    filter_worker *xstack;
    enum AVPixelFormat hw_pixfmt;
    int width, height;
    AVRational fps; // source framerate
    AVRational par; // source pixel aspect ratio
    AVRational time_base; // source time base
    int64_t last_pts; //for sw last frame pts
    int pts_step;

    filt_dec_frame_t frame_status;
} ni_xstack_entry_t;

// one xstack exit contains a buffersink that is connected to a encoder
typedef struct _ni_xstack_exit {
    AVFilterContext *buffersink_ctx;
    AVFrame *filter_frame;
    encoder_worker *enc_worker;
} ni_xstack_exit_t;

// the Quadra xstack filter graph
typedef struct filter_worker {
    pthread_t tid;
    pthread_mutex_t filter_lock;
    pthread_mutex_t ret_lock;
    pthread_cond_t  init_cond;
    pthread_cond_t  flush_cond;

    int index;
    int init;
    int inputs;
    int outputs;
    int shortest;
    int sync;

    ni_xstack_entry_t *src_pads[NI_MAX_XSTACK_INPUTS];
    int num_src_pads;
    ni_xstack_exit_t *dst_pads[NI_MAX_XSTACK_OUTPUTS];
    int num_dst_pads;

    char filter_desc[2048]; // full text of filter parameter description
    AVFilterGraph *filter_graph;

    int input_eos_num; // how many end-of-stream of inputs got so far
    int64_t latest_stream_start; // latest stream start time, used for setting Image pts
    int64_t cur_filter_pts;
    int filter_ret;

    unsigned int filtered_frames;
    ///flag to indicate if filter is in flush state
    bool flushed;
    common *common;
} filter_worker;

// decoder thread parameters
// decoding task description
typedef struct demuxer_worker {
    pthread_t tid;
    pthread_mutex_t packet_lock;
    pthread_cond_t consume_cond;
    pthread_cond_t produce_cond;

    thread_state_t should_exit;
    int index;
    int demux_started;
    int input_eos;
    int demux_pkt_num;
    int video_index;
    int64_t target_dts;

    NIFifo *demux_packet_fifo;
    AVFormatContext *ifmt_ctx;
    AVPacket *demux_packet;
    decoder_worker *dec_worker;
    common *common;
} demuxer_worker;

typedef struct decoder_worker {
    pthread_t tid;
    pthread_mutex_t frame_lock;
    pthread_mutex_t list_lock;
    pthread_cond_t  list_cond;

    thread_state_t should_exit;
    int index;
    input_type type;
    char input_file[256];
    int devid;

    union {
        struct{ // only for hw input
            char decoder_name[32];
            char decoder_params[256];
        } hw;
        struct{
            //only for sw yuv input
            FILE *input_fp;
            unsigned int yuv_frame_cnt;
            int input_width;
            int input_height;
            // for sw yuv and sw picture input
            int width;
            int height;
            bool need_scale;
            enum AVPixelFormat pix_fmt;
            struct SwsContext* sws_ctx;
            AVBufferRef* hwdevice_upload;
            AVBufferRef* hwctx_upload;
            unsigned int sw_loop;
        } sw;
    } in;

    demuxer_worker *demux_worker;
    AVFormatContext *ifmt_ctx;
    AVCodecContext *dec_ctx;

    int64_t last_decoded_pts;
    int64_t last_decoded_dts;
    int64_t start_pts;
    int video_index;
    unsigned int decoded_frames;

    filter_worker **xstack;
    ni_xstack_entry_t *stack_entry[NI_MAX_XSTACK_FILTER];
    AVFrame *decoded_frame;
    AVFrame *wait_free_frame;
    NIFifo *wait_to_free_list;

    common *common;
} decoder_worker;

// encoder thread parameters
// encoding task description
typedef struct encoder_worker {
    pthread_t tid;
    pthread_mutex_t frame_lock;
    pthread_cond_t consume_cond;
    pthread_cond_t produce_cond;

    int filter_index;
    int enc_index;
    char encoder_name[32];
    char encoder_params[256];
    char output_name[256];
    int width;
    int height;

    AVCodecContext *enc_ctx;
    int64_t last_encoded_pts;
    int64_t last_encoded_dts;

    int device_id;
    AVRational timebase;
    int force_source_keyframe;

    int enc_started;
    thread_state_t should_exit;
    int filter_flush;

    NIFifo *enc_frame_fifo;
    AVFrame *filtered_frame;

    unsigned long encoded_frames;
    unsigned long encoder_output_frames;

    filter_worker *xstack;
    muxer_worker *mux_worker;
    common *common;
} encoder_worker;

typedef struct muxer_worker {
    pthread_t tid;
    pthread_mutex_t packet_lock;
    pthread_cond_t consume_cond;
    pthread_cond_t produce_cond;

    int filter_index;
    int enc_index;
    int mux_pkt_num;
    int mux_started;
    int encoder_flush;
    thread_state_t should_exit;
    int live;
    AVRational timebase;
    NIFifo *mux_packet_fifo;
    AVFormatContext *ofmt_ctx;
    AVPacket *encoded_packet;
    encoder_worker *enc_worker;
    common *common;
} muxer_worker;

filter_worker *xstack_workers[NI_MAX_XSTACK_FILTER] = {0};
int filter_num = 0;
demuxer_worker *demuxer_workers[NI_MAX_XSTACK_INPUTS] = {0};
int active_demuxer_workers = 0;
decoder_worker *decoder_workers[NI_MAX_XSTACK_INPUTS] = {0};
int active_decoder_workers = 0;
encoder_worker *encoder_workers[NI_MAX_XSTACK_FILTER][NI_MAX_XSTACK_OUTPUTS] = {0};
int active_encoder_workers = 0;
muxer_worker *muxer_workers[NI_MAX_XSTACK_FILTER][NI_MAX_XSTACK_OUTPUTS] = {0};
int active_muxer_workers = 0;

unsigned short  BMP=0x4D42,
                JPG=0xD8FF,
                PNG[4]={0x5089,0x474E,0x0A0D,0x0A1A},
                GIF[3]={0x4947,0x3846,0x6139};

// main thread status
int global_stop = 0;
int print_stat = 1;

// COMMON FUNCTION
// init common
static common *alloc_common(void) {
    common *common;
    int ret;

    common = malloc(sizeof(struct common));
    if (common == NULL) {
        av_log(NULL, AV_LOG_ERROR, "%s failed.\n", __func__);
        return NULL;
    }

    memset(common, 0, sizeof(struct common));

    ret = pthread_mutex_init(&common->lock, NULL);
    if (ret) {
        free(common);
        return NULL;
    }
    return common;
}

static void free_common(common *common) {
    if (common) {
        pthread_mutex_destroy(&common->lock);
    }
}

// fifo operations (for fifo storing filtered frames to be encoded)
//static inline int is_fifo_empty(NIFifo *fifo) {return av_fifo_size(fifo) < sizeof(AVFrame*);}

//static inline int get_fifo_size(NIFifo *fifo) {return av_fifo_size(fifo) / sizeof(AVFrame*);}

#if IS_FFMPEG_70_AND_ABOVE
static inline int is_fifo_empty(AVFifo *fifo)
{
    return av_fifo_can_read(fifo) ? 0 : 1;
}
#else
static inline int is_fifo_empty(AVFifoBuffer *fifo)
{
    return av_fifo_size(fifo) < sizeof(AVFrame*);
}
#endif

#if IS_FFMPEG_70_AND_ABOVE
static inline int is_fifo_full(AVFifo *fifo)
{
    return av_fifo_can_write(fifo) ? 0 : 1;
}
#else
static inline int is_fifo_full(AVFifoBuffer *fifo)
{
    return av_fifo_space(fifo) < sizeof(AVFrame*);
}
#endif

#if IS_FFMPEG_70_AND_ABOVE
static inline int get_fifo_size(AVFifo *fifo) {
    return av_fifo_can_read(fifo);
}
#else
static inline int get_fifo_size(AVFifoBuffer *fifo) {
    return av_fifo_size(fifo) / sizeof(AVFrame*);
}
#endif

#if IS_FFMPEG_70_AND_ABOVE
static void free_fifo(AVFifo *fifo) {
    av_fifo_freep2(&fifo);
}
#else
static void free_fifo(AVFifoBuffer *fifo) {
    return av_fifo_free(fifo);
}
#endif

static inline int frame_get_ref_count(AVFrame *frame) {
    return (!frame || !frame->buf[0]) ? 0 : av_buffer_get_ref_count(frame->buf[0]);
}

void drain_fifo(NIFifo *fifo, const char *caller_hint1, const char * caller_hint2) {
    AVFrame *frame;
    av_log(NULL, AV_LOG_DEBUG, "%s %s %s size: %d\n", __func__,
           caller_hint1 ? caller_hint1 : "?",
           caller_hint2 ? caller_hint2 : "?", get_fifo_size(fifo));
    while (!is_fifo_empty(fifo)) {
#if IS_FFMPEG_70_AND_ABOVE
        av_fifo_read(fifo, &frame, 1);
#else
        av_fifo_generic_read(fifo, &frame, sizeof(AVFrame *), NULL);
#endif
        av_log(NULL, AV_LOG_DEBUG, "force free ui16FrameIdx = [%d] ref_count = %d\n",
               ((niFrameSurface1_t *)(frame->buf[0]->data))->ui16FrameIdx,
               frame_get_ref_count(frame));
        av_frame_free(&frame);
    }
}

// judge whether still frame exist in all filter entry
static int entry_empty(filter_worker *f) {
    int ret = true;
    for (int i = 0; i < f->num_src_pads; i++) {
        ret &= is_fifo_empty(f->src_pads[i]->dec_frame_fifo);
    }
    return ret;
}

// create an ni_xstack entry for each opened input video stream
static int create_new_src_pad(filter_worker *f, decoder_worker *dec_worker,
                              AVRational fps, AVRational sar, AVRational tb) {
    ni_xstack_entry_t *entry;

    entry = malloc(sizeof(ni_xstack_entry_t));
    if (entry) {
        pthread_mutex_init(&entry->lock, NULL);
        pthread_cond_init(&entry->frame_cond, NULL);
        pthread_cond_init(&entry->eos_cond, NULL);
        snprintf(entry->name, sizeof(entry->name), "src_%d_%d", f->index, dec_worker->index);

        entry->eos_flag = 0;
#if IS_FFMPEG_70_AND_ABOVE
        entry->dec_frame_fifo = av_fifo_alloc2(NI_MAX_DEC_CAPACITY, sizeof(AVFrame*), AV_FIFO_FLAG_AUTO_GROW);
#else
        entry->dec_frame_fifo = av_fifo_alloc_array(NI_MAX_DEC_CAPACITY, sizeof(AVFrame*));
#endif
        entry->list_block = 0;
        entry->last_frame = av_frame_alloc();
        entry->buffersrc_ctx = NULL;
        entry->buffersrc_par = NULL;
        entry->worker = dec_worker;
        entry->xstack = f;
        entry->hw_pixfmt = AV_PIX_FMT_NI_QUAD;
        entry->fps = fps;
        entry->par = sar;
        entry->time_base = tb;
        entry->frame_status = FILT_DEC_FRAME_NEED;

        dec_worker->stack_entry[f->index] = entry;
        f->src_pads[dec_worker->index] = entry;

        pthread_mutex_lock(&f->filter_lock);
        f->num_src_pads++;
        pthread_mutex_unlock(&f->filter_lock);

        av_log(NULL, AV_LOG_DEBUG, "%s Filter graph %d src pad index %d name %s "
               " slot %d created, total pads: %d.\n",
               __func__, f->index, dec_worker->index, entry->name, dec_worker->index, f->num_src_pads);
    }

    pthread_mutex_lock(&f->filter_lock);
    if (f->num_src_pads == f->inputs) {
        av_log(NULL, AV_LOG_DEBUG, "filter init signal\n");
        pthread_cond_signal(&f->init_cond);
    }
    pthread_mutex_unlock(&f->filter_lock);

    return 0;
}

// create an ni_xstack exit for each output video stream
static int create_new_dst_pad(filter_worker *f, encoder_worker *enc_worker) {
    ni_xstack_exit_t *exit;

    // create in init filter graph, filter_lock has already kept
    exit = malloc(sizeof(ni_xstack_exit_t));
    if (exit) {
        exit->buffersink_ctx = NULL;
        exit->filter_frame = av_frame_alloc();
        exit->enc_worker = enc_worker;
        f->dst_pads[enc_worker->enc_index] = exit;
        av_log(NULL, AV_LOG_DEBUG, "%s Filter graph %d dst pad %d created.\n",
               __func__, f->index, enc_worker->enc_index);
        // f->num_dst_pads++;
    } else {
        av_log(NULL, AV_LOG_ERROR, "%s alloc dst pad failed.\n",
               __func__);
        return -1;
    }

    return 0;
}

// get the number of decoder exit
static int get_decoder_eos_num_total(filter_worker *f) {
    int ret = 0;
    pthread_mutex_lock(&f->filter_lock);
    ret = f->input_eos_num;
    pthread_mutex_unlock(&f->filter_lock);
    return ret;
}

// add frame to list
static int list_append_frame(NIFifo *fifo, AVFrame *frame) {
    int ret;
    niFrameSurface1_t* p_data3 = (niFrameSurface1_t*)(frame->buf[0]->data);
    int ref_count = av_buffer_get_ref_count(frame->buf[0]);

    if (is_fifo_full(fifo)) {
        av_log(NULL, AV_LOG_ERROR, "wait to free fifo space has full, it is unexpected!!!\n");
        return -1;
    }
#if IS_FFMPEG_70_AND_ABOVE
    ret = av_fifo_write(fifo, &frame, 1);
#else
    ret = av_fifo_generic_write(fifo, &frame, sizeof(AVFrame*), NULL);
#endif
    av_log(NULL, AV_LOG_DEBUG, "filt frame %p width %d height %d\n", frame, frame->width, frame->height);
    av_log(NULL, AV_LOG_DEBUG, "%s frame ptr %p ref_cnt %d queued, ui16FrameIdx = [%d] , fifo size: %d\n",
           __func__, frame, ref_count, p_data3->ui16FrameIdx, get_fifo_size(fifo));
    return ret < 0 ? ret : 0;
}

// unref the frame until filter work not ref it anymore
static int list_recycle_frames(NIFifo *fifo) {
    AVFrame *tmp_frame;
    int ref_count;
    niFrameSurface1_t* p_data3;
    int fifo_size = get_fifo_size(fifo);

    // going through the fifo to check
#if IS_FFMPEG_70_AND_ABOVE
    av_fifo_peek(fifo, &tmp_frame, 1, 0);
#else
    av_fifo_generic_peek_at(fifo, &tmp_frame, 0, sizeof(AVFrame*), NULL);
#endif
    if (fifo_size && tmp_frame && tmp_frame->buf[0]) {
        p_data3 = (niFrameSurface1_t*)(tmp_frame->buf[0]->data);
        ref_count = av_buffer_get_ref_count(tmp_frame->buf[0]);
    }
    else {
        p_data3 = NULL;
        ref_count = -1;
        av_log(NULL, AV_LOG_ERROR, "ref count is -1\n");
    }

    if (p_data3 && 1 == ref_count) {
#if IS_FFMPEG_70_AND_ABOVE
        av_fifo_drain2(fifo, 1);
#else
        av_fifo_drain(fifo, sizeof(AVFrame*));
#endif
        fifo_size = get_fifo_size(fifo);
        av_log(NULL, AV_LOG_DEBUG, "%s ref_cnt == 1, unref frame %p ui16FrameIdx = [%d] DevHandle %d , num_nodes now: %d\n", __func__, tmp_frame, p_data3->ui16FrameIdx, p_data3->device_handle, fifo_size);
        av_frame_free(&tmp_frame);
    } else if (p_data3 && ref_count >= 2) {
        av_log(NULL, AV_LOG_DEBUG, "%s ref %d  >= 2 %p ui16FrameIdx = [%d] , removed from list, fifo size now: %d\n", __func__, ref_count, tmp_frame, p_data3->ui16FrameIdx, fifo_size);
    } else {
        av_log(NULL, AV_LOG_ERROR, "%s %p data error, p_data3 %p ui16FrameIdx = [%d] ref_cnt = %d , NOT drained\n",
                __func__, tmp_frame, p_data3, p_data3 ? p_data3->ui16FrameIdx : -1,
                p_data3 ? av_buffer_get_ref_count(tmp_frame->buf[0]) : -1);
    }
    return fifo_size;
}

// add frame to list for decoder threads and filter threads
static int enqueue_frame(NIFifo *fifo, const AVFrame *inframe) {
    int ret;
    if (!inframe) {
        av_log(NULL, AV_LOG_ERROR, "input frame is null\n");
        return -1;
    }

    av_log(NULL, AV_LOG_DEBUG, "%s frame %p data %p extended_data %p\n",
           __func__, inframe, inframe->data, inframe->extended_data);

    // expand frame buffer fifo if not enough space
    if (is_fifo_full(fifo)) {
        av_log(NULL, AV_LOG_ERROR, "dec or enc fifo space has full, it is unexpected!!!\n");
        return -1;
    }

    AVFrame *buffered_frame = av_frame_alloc();
    if (!buffered_frame) {
        av_log(NULL, AV_LOG_ERROR, "create buffered frame failed\n");
        return -1;
    }
    // call av_frame_ref to increase buffer ref count / preserve buffer
    ret = av_frame_ref(buffered_frame, inframe);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "%s: av_frame_ref ERROR %d!!!\n", __func__, ret);
        return ret;
    }

#if IS_FFMPEG_70_AND_ABOVE
    ret = av_fifo_write(fifo, &buffered_frame, 1);
#else
    ret = av_fifo_generic_write(fifo, &buffered_frame, sizeof(AVFrame*), NULL);
#endif

    av_log(NULL, AV_LOG_DEBUG, "%s enc frame queued, fifo size: %d\n",
           __func__, get_fifo_size(fifo));
    return ret < 0 ? ret : 0;
}

// place a decoded frame into queue for filter
static int send_decode_frame(ni_xstack_entry_t *entry, AVFrame *dec_frame) {
    int ret;
    if (dec_frame) {
        av_log(NULL, AV_LOG_DEBUG, "%s stream send dec frame -> pts=%ld,dts=%ld index %d\n",
               __func__, dec_frame->pts, dec_frame->pkt_dts, entry->worker->index);
    }
    pthread_mutex_lock(&entry->lock);
    if (entry->list_block) {
        av_log(NULL, AV_LOG_INFO, "filter %d list has been block\n", entry->xstack->index);
        pthread_mutex_unlock(&entry->lock);
        return 0;
    }
    ret = enqueue_frame(entry->dec_frame_fifo, dec_frame);
    if (ret < 0) {
        pthread_mutex_unlock(&entry->lock);
        av_log(NULL, AV_LOG_ERROR, "send_decode_frame failed\n");
        return ret;
    }
    pthread_cond_signal(&entry->frame_cond);
    pthread_mutex_unlock(&entry->lock);
    return ret;
}

// place a filtered frame into queue for encoding
static int send_encode_frame(encoder_worker *enc_worker, AVFrame *filt_frame) {
    int ret = 0;

    if (filt_frame) {
        av_log(NULL, AV_LOG_DEBUG, "%s: send frame <- pts=%ld,dts=%ld index %d_%d\n",
               __func__, filt_frame->pts, filt_frame->pkt_dts,
               enc_worker->filter_index, enc_worker->enc_index);
    }

    pthread_mutex_lock(&enc_worker->frame_lock);
    if (is_fifo_full(enc_worker->enc_frame_fifo)) {
        av_log(NULL, AV_LOG_DEBUG, "enc fifo space full, wait!\n");
        pthread_cond_wait(&enc_worker->produce_cond, &enc_worker->frame_lock);
    }
    ret = enqueue_frame(enc_worker->enc_frame_fifo, filt_frame);
    if (ret < 0) {
        pthread_mutex_unlock(&enc_worker->frame_lock);
        av_log(NULL, AV_LOG_ERROR, "send_encode_frame failed\n");
        return ret;
    }
    pthread_cond_signal(&enc_worker->consume_cond);
    pthread_mutex_unlock(&enc_worker->frame_lock);

    return ret;
}

// place a packet into queue for decode
static int send_demux_packet(demuxer_worker *demux_worker, AVPacket *pkt) {
    int ret = 0;

    if (pkt) {
        av_log(NULL, AV_LOG_DEBUG, "%s: send packet <- pts=%ld,dts=%ld\n",
               __func__, pkt->pts, pkt->dts);
    }

    pthread_mutex_lock(&demux_worker->packet_lock);
    if (is_fifo_full(demux_worker->demux_packet_fifo)) {
        av_log(NULL, AV_LOG_DEBUG, "enc fifo space full, wait!\n");
        pthread_cond_wait(&demux_worker->produce_cond, &demux_worker->packet_lock);
    }
#if IS_FFMPEG_70_AND_ABOVE
    ret = av_fifo_write(demux_worker->demux_packet_fifo, &pkt, 1);
#else
    ret = av_fifo_generic_write(demux_worker->demux_packet_fifo, &pkt, sizeof(AVPacket*), NULL);
#endif
    if (ret < 0) {
        pthread_mutex_unlock(&demux_worker->packet_lock);
        av_log(NULL, AV_LOG_ERROR, "send_mux_packet failed\n");
        return ret;
    }
    pthread_cond_signal(&demux_worker->consume_cond);
    pthread_mutex_unlock(&demux_worker->packet_lock);
    return ret < 0 ? ret : 0;
}

// place a packet into queue for mux
static int send_mux_packet(muxer_worker *mux_worker, AVPacket *pkt) {
    int ret = 0;

    if (pkt) {
        av_log(NULL, AV_LOG_DEBUG, "%s: send packet <- pts=%ld,dts=%ld\n",
               __func__, pkt->pts, pkt->dts);
    }

    pthread_mutex_lock(&mux_worker->packet_lock);
    if (is_fifo_full(mux_worker->mux_packet_fifo)) {
        av_log(NULL, AV_LOG_DEBUG, "enc fifo space full, wait!\n");
        pthread_cond_wait(&mux_worker->produce_cond, &mux_worker->packet_lock);
    }
#if IS_FFMPEG_70_AND_ABOVE
    ret = av_fifo_write(mux_worker->mux_packet_fifo, &pkt, 1);
#else
    ret = av_fifo_generic_write(mux_worker->mux_packet_fifo, &pkt, sizeof(AVPacket*), NULL);
#endif
    if (ret < 0) {
        pthread_mutex_unlock(&mux_worker->packet_lock);
        av_log(NULL, AV_LOG_ERROR, "send_mux_packet failed\n");
        return ret;
    }
    pthread_cond_signal(&mux_worker->consume_cond);
    pthread_mutex_unlock(&mux_worker->packet_lock);
    return ret < 0 ? ret : 0;
}

// DECODER THREAD FUNCTION
// init decoding task
static int init_decoder_worker(decoder_worker *dec_worker) {
    int ret;

    ret = pthread_mutex_init(&dec_worker->frame_lock, NULL);
    if (ret) {
        goto dec_fail_init_frame_lock;
    }

    ret = pthread_mutex_init(&dec_worker->list_lock, NULL);
    if (ret) {
        goto dec_fail_init_list_lock;
    }

    ret = pthread_cond_init(&dec_worker->list_cond, NULL);
    if (ret) {
        goto dec_fail_init_list_cond;
    }

#if IS_FFMPEG_70_AND_ABOVE
    dec_worker->wait_to_free_list = av_fifo_alloc2(NI_MAX_DEC_CAPACITY, sizeof(AVFrame*), AV_FIFO_FLAG_AUTO_GROW);
#else
    dec_worker->wait_to_free_list = av_fifo_alloc_array(NI_MAX_DEC_CAPACITY, sizeof(AVFrame*));
#endif
    if (!dec_worker->wait_to_free_list) {
        goto dec_fail_init_fifo;
    }

    dec_worker->should_exit = THREAD_STATE_RUNNING;
    dec_worker->decoded_frames = 0;

    return 0;

dec_fail_init_fifo:
    pthread_cond_destroy(&dec_worker->list_cond);
dec_fail_init_list_cond:
    pthread_mutex_destroy(&dec_worker->list_lock);
dec_fail_init_list_lock:
    pthread_mutex_destroy(&dec_worker->frame_lock);
dec_fail_init_frame_lock:
    return ret;
}

static void cleanup_decoder_worker(decoder_worker *worker) {
    if (worker) {
        pthread_mutex_destroy(&worker->frame_lock);
        pthread_mutex_destroy(&worker->list_lock);
        pthread_cond_destroy(&worker->list_cond);
        if (worker->wait_to_free_list) {
            av_log(NULL, AV_LOG_DEBUG, "decoder wait to free list size: %d\n",
                   get_fifo_size(worker->wait_to_free_list));
            free_fifo(worker->wait_to_free_list);
        }
    }
}

static int init_demuxer_worker(demuxer_worker *demux_worker) {
    int ret;

    ret = pthread_mutex_init(&demux_worker->packet_lock, NULL);
    if (ret) {
        goto demux_fail_init_packet_lock;
    }

    ret = pthread_cond_init(&demux_worker->consume_cond, NULL);
    if (ret) {
        goto demux_fail_init_consume_cond;
    }

    ret = pthread_cond_init(&demux_worker->produce_cond, NULL);
    if (ret) {
        goto demux_fail_init_produce_cond;
    }

#if LIBAVCODEC_VERSION_MAJOR >= 61 //7.0
    demux_worker->demux_packet_fifo = av_fifo_alloc2(NI_MAX_DEMUX_CAPACITY, sizeof(AVPacket*), AV_FIFO_FLAG_AUTO_GROW);
#else
    demux_worker->demux_packet_fifo = av_fifo_alloc_array(NI_MAX_DEMUX_CAPACITY, sizeof(AVPacket*));
#endif
    if (!demux_worker->demux_packet_fifo) {
        goto demux_fail_init_fifo;
    }
    demux_worker->should_exit = THREAD_STATE_RUNNING;
    demux_worker->demux_started = 0;
    demux_worker->input_eos = 0;
    demux_worker->demux_pkt_num = 0;
    demux_worker->video_index = -1;
    demux_worker->target_dts = AV_NOPTS_VALUE;
    return 0;

demux_fail_init_fifo:
    pthread_cond_destroy(&demux_worker->produce_cond);
demux_fail_init_produce_cond:
    pthread_cond_destroy(&demux_worker->consume_cond);
demux_fail_init_consume_cond:
    pthread_mutex_destroy(&demux_worker->packet_lock);
demux_fail_init_packet_lock:
    return -1;
}

static void cleanup_demuxer_worker(demuxer_worker *worker) {
    if (worker) {
        pthread_mutex_destroy(&worker->packet_lock);
        pthread_cond_destroy(&worker->consume_cond);
        pthread_cond_destroy(&worker->produce_cond);
        if (worker->demux_packet_fifo) {
            av_log(NULL, AV_LOG_DEBUG, "muxer packet list size: %d\n",
                    get_fifo_size(worker->demux_packet_fifo));
            free_fifo(worker->demux_packet_fifo);
        }
    }
}


// decoder get filter ret from filter thread
// if ret < 0, decoder should exit
static int get_filter_ret(filter_worker *f) {
    int ret = 0;
    pthread_mutex_lock(&f->ret_lock);
    ret = f->filter_ret;
    pthread_mutex_unlock(&f->ret_lock);
    return ret;
}

// decoder get filter flush status from filter thread
// if flushed decoder shouldn't push frame to this filter
static bool get_filter_flush_state(filter_worker *f) {
    bool ret = 0;
    pthread_mutex_lock(&f->ret_lock);
    ret = f->flushed;
    pthread_mutex_unlock(&f->ret_lock);
    return ret;
}

// Setup the hwdevice and session for each image file input
static int init_hwframe_uploader(decoder_worker *dec_worker) {
    int ret;
    AVHWFramesContext* hwframe_ctx;
    char buf[8] = { 0 };
    snprintf(buf, sizeof(buf), "%d", dec_worker->devid);

    av_log(NULL, AV_LOG_INFO, "init_hwframe_uploader width %d height %d pixfmt %d devid %d\n",
            dec_worker->in.sw.width, dec_worker->in.sw.height, dec_worker->in.sw.pix_fmt, dec_worker->devid);
    ret = av_hwdevice_ctx_create(&dec_worker->in.sw.hwdevice_upload, AV_HWDEVICE_TYPE_NI_QUADRA, buf,
        NULL, 0);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "failed to create AV HW device ctx %d\n", dec_worker->index);
        return ret;
    }

    dec_worker->in.sw.hwctx_upload = av_hwframe_ctx_alloc(dec_worker->in.sw.hwdevice_upload);
    if (!dec_worker->in.sw.hwctx_upload)
        return AVERROR(ENOMEM);

    hwframe_ctx = (AVHWFramesContext*)dec_worker->in.sw.hwctx_upload->data;
    hwframe_ctx->format = AV_PIX_FMT_NI_QUAD;
    hwframe_ctx->sw_format = dec_worker->in.sw.pix_fmt;
    hwframe_ctx->width = dec_worker->in.sw.width;
    hwframe_ctx->height = dec_worker->in.sw.height;

    ret = av_hwframe_ctx_init(dec_worker->in.sw.hwctx_upload);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "failed to init AV HW device ctx %d\n", dec_worker->index);
        return ret;
    }
    return 0;
}

// Exchange SW AVframe for HW AVFrame stored in global array
static int retrieve_hwframe(decoder_worker *dec_worker, AVFrame* dst_sw) {
    int ret = 0;

    if (!dec_worker->decoded_frame)
        return AVERROR(ENOMEM);

    av_log(NULL, AV_LOG_DEBUG, "retrieve_hwframe[%d]\n", dec_worker->index);
    ret = av_hwframe_get_buffer(dec_worker->in.sw.hwctx_upload, dec_worker->decoded_frame, 0);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "failed to get buffer from frames context %d\n", dec_worker->index);
        av_frame_free(&dec_worker->decoded_frame); //implement a free all function instead of this?
        return ret;
    }

    dec_worker->decoded_frame->width = dst_sw->width;
    dec_worker->decoded_frame->height = dst_sw->height;

    ret = av_hwframe_transfer_data(dec_worker->decoded_frame, dst_sw, 0);
    if (ret < 0 || !dec_worker->decoded_frame->data[3]) {
        av_log(NULL, AV_LOG_ERROR, "failed to transfer sw to hwframe %d\n", dec_worker->index);
        av_frame_free(&dec_worker->decoded_frame); //implement a free all function instead of this?
        return ret;
    }
    niFrameSurface1_t *p_data3 = (niFrameSurface1_t*)dec_worker->decoded_frame->data[3];
    av_log(NULL, AV_LOG_DEBUG, "input_image_hw_frames[%d]->data[3] = %p ui16FrameIdx %u\n",
           dec_worker->index, p_data3, p_data3->ui16FrameIdx);
    return ret;
}

// open yuv file, init sws_ctx and upload ctx
static int open_yuv_file(decoder_worker *dec_worker) {
    int i;
    // Open input yuv file
    dec_worker->in.sw.input_fp = fopen(dec_worker->input_file, "rb");
    if (!dec_worker->in.sw.input_fp) {
        av_log(NULL, AV_LOG_ERROR, "Could not to open input file: %s\n", dec_worker->input_file);
        return -1;
    }

    // Check resolution of input file
    fseek(dec_worker->in.sw.input_fp, 0, SEEK_END);
    unsigned long inputfile_size = ftell(dec_worker->in.sw.input_fp);
    if (inputfile_size % (dec_worker->in.sw.input_width * dec_worker->in.sw.input_height * 3 / 2) != 0) {
      av_log(NULL, AV_LOG_ERROR, "Size of inputfile is not integer multiple of resolution. "
             "Either input file has partial frames, or input resolution is wrong.\n");
      return -1;
    }

    // calculate frame count in yuv file, only support yuv420p now
    unsigned int yuv_frame_cnt = inputfile_size / (dec_worker->in.sw.input_width * dec_worker->in.sw.input_height * 3 / 2);
    av_log(NULL, AV_LOG_INFO, "inputfile size=%lu, number of frames = %u.\n", inputfile_size, yuv_frame_cnt);
    fseek(dec_worker->in.sw.input_fp, 0, SEEK_SET);

    dec_worker->in.sw.yuv_frame_cnt = yuv_frame_cnt;
    dec_worker->in.sw.pix_fmt = AV_PIX_FMT_YUV420P;
    dec_worker->last_decoded_pts = -DEFAULT_YUV_PTS_STEP;
    dec_worker->last_decoded_dts = 0;
    dec_worker->start_pts = 0;

    // init sws_ctx when input resolution not equal target resolution
    if ((dec_worker->in.sw.width > 0 && dec_worker->in.sw.height > 0) &&
        (dec_worker->in.sw.width != dec_worker->in.sw.input_width ||
        dec_worker->in.sw.height != dec_worker->in.sw.input_height)) {
        dec_worker->in.sw.need_scale = true;
        dec_worker->in.sw.sws_ctx = sws_getContext(
                                            dec_worker->in.sw.input_width, dec_worker->in.sw.input_height,
                                            dec_worker->in.sw.pix_fmt, dec_worker->in.sw.width, dec_worker->in.sw.height,
                                            AV_PIX_FMT_YUV420P, SWS_POINT, NULL, NULL, NULL);
    } else {
        dec_worker->in.sw.width = dec_worker->in.sw.input_width;
        dec_worker->in.sw.height = dec_worker->in.sw.input_height;
        dec_worker->in.sw.need_scale = false;
    }

    init_hwframe_uploader(dec_worker);

    for (i = 0; i < filter_num; i++) {
        create_new_src_pad(dec_worker->xstack[i], dec_worker, av_make_q(1, DEFAULT_FPS),
                           av_make_q(1, 1), av_make_q(1, DEFAULT_TIME_BASE));
    }
    return 0;
}

// open image file, init sws_ctx and upload ctx
static int open_image_file(decoder_worker *dec_worker) {
    int i, ret;
    AVCodecContext *codec_ctx;
    AVFormatContext* ifmt_ctx = NULL;

    ret = avformat_open_input(&ifmt_ctx, dec_worker->input_file, NULL, NULL);
    if (ret != 0) {
        av_log(NULL, AV_LOG_ERROR, "Can't open image file '%s'\n",
            dec_worker->input_file);
        return ret;
    }
    ret = avformat_find_stream_info(ifmt_ctx, NULL);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Can't find stream\n");
        return ret;
    }
    // av_dump_format(ifmt_ctx, 0, dec_worker->input_file, 0);

    int index = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
    AVCodec *dec = avcodec_find_decoder(ifmt_ctx->streams[index]->codecpar->codec_id);
    codec_ctx = avcodec_alloc_context3(dec);
    avcodec_parameters_to_context(codec_ctx, ifmt_ctx->streams[index]->codecpar);

    if (codec_ctx->pix_fmt != AV_PIX_FMT_YUV420P ||
        ((dec_worker->in.sw.width > 0 && dec_worker->in.sw.height > 0) &&
        (dec_worker->in.sw.width != codec_ctx->width ||
        dec_worker->in.sw.height != codec_ctx->height))) {
        dec_worker->in.sw.need_scale = true;

        if (dec_worker->in.sw.width == 0 || dec_worker->in.sw.height == 0) {
            dec_worker->in.sw.width = codec_ctx->width;
            dec_worker->in.sw.height = codec_ctx->height;
        }
        // init sws_ctx
        dec_worker->in.sw.sws_ctx = sws_getContext(
                                            codec_ctx->width, codec_ctx->height, codec_ctx->pix_fmt,
                                            dec_worker->in.sw.width, dec_worker->in.sw.height,
                                            AV_PIX_FMT_YUV420P, SWS_POINT, NULL, NULL, NULL);
    } else {
        dec_worker->in.sw.width = codec_ctx->width;
        dec_worker->in.sw.height = codec_ctx->height;
        dec_worker->in.sw.need_scale = false;
    }
    dec_worker->in.sw.pix_fmt = AV_PIX_FMT_YUV420P;

    init_hwframe_uploader(dec_worker);

    for (i = 0; i < filter_num; i++) {
        create_new_src_pad(dec_worker->xstack[i], dec_worker, codec_ctx->framerate,
                           codec_ctx->sample_aspect_ratio, ifmt_ctx->streams[index]->time_base);
    }

    // Open codec
    ret = avcodec_open2(codec_ctx, dec, NULL);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Could not open codec\n");
        return ret;
    }
    dec_worker->dec_ctx = codec_ctx;
    dec_worker->last_decoded_pts = -DEFAULT_YUV_PTS_STEP;
    dec_worker->last_decoded_dts = 0;
    dec_worker->start_pts = 0;
    dec_worker->ifmt_ctx = ifmt_ctx;
    dec_worker->video_index = index;
    return 0;
}

static int init_input_fmt_ctx(decoder_worker *dec_worker) {
    int i = 0, ret = 0;
    int video_index = -1;
    AVFormatContext *ifmt_ctx = NULL;
    demuxer_worker *demux_worker = dec_worker->demux_worker;
    const char *input_file = dec_worker->input_file;

    if ((ret = avformat_open_input(&ifmt_ctx, input_file, NULL, NULL)) < 0) {
        av_log(NULL, AV_LOG_ERROR, "Cannot open input file %s\n", input_file);
        return ret;
    }
    if ((ret = avformat_find_stream_info(ifmt_ctx, NULL)) < 0) {
        av_log(NULL, AV_LOG_ERROR, "Cannot find stream information\n");
        goto free_ifmt;
    }

    for (i = 0; i < ifmt_ctx->nb_streams; i++) {
        if (ifmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
            video_index = i;
            break;
        }
    }
    if (video_index < 0) {
        av_log(NULL, AV_LOG_ERROR, "can not find video stream\n");
        ret = video_index;
        goto free_ifmt;
    }
    if (strstr(input_file, "rtsp://")) {
        av_opt_set(ifmt_ctx->priv_data, "rtsp_transport", "tcp", 0);
    }

    // av_dump_format(ifmt_ctx, 0, input_file, 0);
    dec_worker->video_index = video_index;
    demux_worker->video_index = video_index;
    demux_worker->ifmt_ctx = ifmt_ctx;
    // pthread_mutex_lock(&demux_worker->packet_lock);
    // demux_worker->demux_started = 1;
    // pthread_cond_signal(&demux_worker->produce_cond);
    // pthread_mutex_unlock(&demux_worker->packet_lock);
    return 0;

free_ifmt:
    avformat_free_context(ifmt_ctx);
    return ret;
}

// open an input for decoding
static int open_hw_input_file(decoder_worker *dec_worker) {
    int ret;
    unsigned int i;
    demuxer_worker *demux_worker = dec_worker->demux_worker;

    ret = init_input_fmt_ctx(dec_worker);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR,"fail to open file\n");
        return ret;
    }

    const char *codec_name;
    AVCodecContext *codec_ctx = NULL;
    const AVCodec *dec = NULL;
    AVFormatContext *ifmt_ctx = dec_worker->demux_worker->ifmt_ctx;
    AVStream *stream = ifmt_ctx->streams[dec_worker->video_index];
    av_log(NULL, AV_LOG_DEBUG, "%d stream time base %d:%d\n", dec_worker->index, stream->time_base.num, stream->time_base.den);
    if (dec_worker->in.hw.decoder_name[0]) {
        codec_name = dec_worker->in.hw.decoder_name;
    } else {
        if(stream->codecpar->codec_id == 27) { //h264
            codec_name = "h264_ni_quadra_dec";
        } else if (stream->codecpar->codec_id == 173) { //h265
            codec_name = "h265_ni_quadra_dec";
        } else if (stream->codecpar->codec_id == 167) { //vp9
            codec_name = "vp9_ni_quadra_dec";
        } else {
            av_log(NULL, AV_LOG_ERROR, "dec_ctx %d unsupported dec id %d\n", dec_worker->index, stream->codecpar->codec_id);
            return AVERROR_DECODER_NOT_FOUND;
        }
        av_log(NULL, AV_LOG_INFO, "dec_ctx %d auto find dec name %s\n", dec_worker->index, codec_name);
    }
    dec = avcodec_find_decoder_by_name(codec_name);
    if (dec) {
        if (stream->codecpar->codec_id != dec->id) {
            av_log(NULL, AV_LOG_ERROR, "dec %d codec %s does not match with "
                    "stream id %d\n", dec_worker->index, codec_name, stream->codecpar->codec_id);
            return AVERROR_DECODER_NOT_FOUND;
        }
    }

    if (!dec) {
        av_log(NULL, AV_LOG_ERROR, "dec_ctx %d failed to find decoder\n", dec_worker->index);
        return AVERROR_DECODER_NOT_FOUND;
    }

    codec_ctx = avcodec_alloc_context3(dec);
    if (!codec_ctx) {
        av_log(NULL, AV_LOG_ERROR, "Failed to allocate the decoder context for stream\n");
        return AVERROR(ENOMEM);
    }
    ret = avcodec_parameters_to_context(codec_ctx, stream->codecpar);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Failed to copy decoder parameters to input decoder context for stream\n");
        return ret;
    }

    av_log(NULL, AV_LOG_DEBUG, "stream.time_base=%d/%d, avg_frame_rate=%d/%d.\n",
            stream->time_base.num, stream->time_base.den,
            stream->avg_frame_rate.num, stream->avg_frame_rate.den);

    // process quadra parameters
    if (strstr(dec->name, "quadra")) {
        char str_devid[4] = {0};
        char *dec_params = dec_worker->in.hw.decoder_params;
        snprintf(str_devid, sizeof(str_devid), "%d", dec_worker->devid);
        av_opt_set(codec_ctx->priv_data, "dec", str_devid, 0);

        if (!strstr(dec_params, "out=hw")) {
            if (strlen(dec_params)) {
                snprintf(dec_params + strlen(dec_params),
                         256 - strlen(dec_params), "%s", ":out=hw");
            } else {
                snprintf(dec_params, 256, "%s", "out=hw");
            }
        }
        av_opt_set(codec_ctx->priv_data, "xcoder-params", dec_params, 0);

        av_opt_set(codec_ctx->priv_data, "keep_alive_timeout", "30", 0);
    }

    if (codec_ctx->codec_type == AVMEDIA_TYPE_VIDEO)
        codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, stream, NULL);

    ret = avcodec_parameters_from_context(stream->codecpar, codec_ctx);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Failed to copy decoder parameters from codec context for stream\n");
        return ret;
    }

    if (codec_ctx->codec_type == AVMEDIA_TYPE_VIDEO) {
        for (i = 0; i < filter_num; i++) {
            create_new_src_pad(dec_worker->xstack[i], dec_worker, codec_ctx->framerate,
                               codec_ctx->sample_aspect_ratio, stream->time_base);
        }
    }

    /* Open decoder */
    if (codec_ctx->codec_type == AVMEDIA_TYPE_VIDEO) {
        ret = avcodec_open2(codec_ctx, dec, NULL);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "Failed to open decoder for stream #%u\n", i);
            return ret;
        }
    }
    dec_worker->dec_ctx = codec_ctx;
    dec_worker->last_decoded_pts = AV_NOPTS_VALUE;
    dec_worker->last_decoded_dts = AV_NOPTS_VALUE;
    dec_worker->start_pts = AV_NOPTS_VALUE;
    pthread_mutex_lock(&demux_worker->packet_lock);
    demux_worker->demux_started = 1;
    pthread_cond_signal(&demux_worker->produce_cond);
    pthread_mutex_unlock(&demux_worker->packet_lock);
    return 0;
}

static int read_yuv_frame(decoder_worker *dec_worker, AVFrame *frame) {
    int i, ret = 0;
    unsigned int read_size;
    // Read data for Y into frame buffer
    for (i = 0; i < dec_worker->in.sw.input_height; i++) {
        read_size = fread(&frame->data[0][0] + i * frame->linesize[0], dec_worker->in.sw.input_width, 1, dec_worker->in.sw.input_fp);
        if (read_size != 1) {
            av_log(NULL, AV_LOG_ERROR, "Failed to read Y. read_size=%u.\n", read_size);
            ret = -1;
            goto end;
        }
    }
    // Read data for U into frame buffer
    for (i = 0; i < dec_worker->in.sw.input_height / 2; i++) {
        read_size = fread(&frame->data[1][0] + i * frame->linesize[1], dec_worker->in.sw.input_width / 2, 1, dec_worker->in.sw.input_fp);
        if (read_size != 1) {
            av_log(NULL, AV_LOG_ERROR, "Failed to read U. read_size=%u.\n", read_size);
            ret = -1;
            goto end;
        }
    }
    // Read data for V into frame buffer
    for (i = 0; i < dec_worker->in.sw.input_height / 2; i++) {
        read_size = fread(&frame->data[2][0] + i * frame->linesize[2], dec_worker->in.sw.input_width / 2, 1, dec_worker->in.sw.input_fp);
        if (read_size != 1) {
            av_log(NULL, AV_LOG_ERROR, "Failed to read V. read_size=%u.\n", read_size);
            ret = -1;
            goto end;
        }
    }
end:
    return ret;
}

// main decode function
static int decoder_get_frame(decoder_worker *dec_worker, AVPacket *packet) {
    int i, ret = 0;
    niFrameSurface1_t* p_data3;
    AVFrame *frame;
    bool all_filters_flushed = false;
    bool flushed[4] = {0};
    av_log(NULL, AV_LOG_DEBUG, "%s worker %d pkt %p\n",
           __func__, dec_worker->index, packet);

    ret = avcodec_send_packet(dec_worker->dec_ctx, packet);
    if (ret < 0 && ret != AVERROR_EOF) {
        av_log(NULL, AV_LOG_ERROR, "decoder %d: failed to send packet. ret %d\n",
               dec_worker->index, ret);
        goto end;
    }
    frame = dec_worker->decoded_frame;
    while (!dec_worker->should_exit && (ret >= 0 || !packet)) {
        av_log(NULL, AV_LOG_DEBUG, "ret %d packet %p\n", ret, packet);
        ret = avcodec_receive_frame(dec_worker->dec_ctx, frame);
        if (ret < 0) {
            if (ret != AVERROR(EAGAIN)) {
                if (ret != AVERROR_EOF) {
                    av_log(NULL, AV_LOG_ERROR, "decoder %d failed to receive "
                           "frame, ret %d\n", dec_worker->index, ret);
                } else
                    av_log(NULL, AV_LOG_DEBUG, "decoder %d got AVERROR_EOF\n",
                           dec_worker->index);
            } else {
                av_log(NULL, AV_LOG_DEBUG, "avcodec_receive_frame ret EAGAIN but changed to 0 and return\n");
                ret = 0;
            }
            goto end;
        }

        // once we got a frame, save this one
        // so that the last frame of this decoding can continuously be used to
        // feed into xstack together with other longer inputs

        av_log(NULL, AV_LOG_DEBUG, "%s %d frame->best_effort_timestamp %ld pts %ld\n",
               __func__, dec_worker->index, frame->best_effort_timestamp, frame->pts);

#if IS_FFMPEG_61_AND_ABOVE
        frame->time_base = dec_worker->dec_ctx->pkt_timebase;
#endif
        frame->pts = frame->best_effort_timestamp;
        if (frame->pts == AV_NOPTS_VALUE) {
#if IS_FFMPEG_70_AND_ABOVE
            frame->pts = dec_worker->last_decoded_pts == AV_NOPTS_VALUE ? 0 : dec_worker->last_decoded_pts + frame->duration;
#else
            frame->pts = dec_worker->last_decoded_pts == AV_NOPTS_VALUE ? 0 : dec_worker->last_decoded_pts + frame->pkt_duration;
#endif
            av_log(NULL, AV_LOG_DEBUG, "%s frame->pts == AV_NOPTS_VALUE, adjust to %ld\n", __func__, frame->pts);
        }

        if (dec_worker->start_pts == AV_NOPTS_VALUE) {
            dec_worker->start_pts = frame->pts;
            frame->pts = 0;
        } else {
            frame->pts -= dec_worker->start_pts;
        }

        if (packet) {
            av_log(NULL, AV_LOG_DEBUG, "dec %d, pts=%ld, dts=%ld, "
                   "best_effort=%ld\n",
                    dec_worker->index, frame->pts, frame->pkt_dts,
                    frame->best_effort_timestamp);
        }

        if ((dec_worker->last_decoded_pts != AV_NOPTS_VALUE) &&
            (frame->pts == dec_worker->last_decoded_pts)) {
            av_log(NULL, AV_LOG_ERROR, "flush decoder: same pts!!!\n");
        }
        if ((dec_worker->last_decoded_dts != AV_NOPTS_VALUE) &&
            (frame->pkt_dts == dec_worker->last_decoded_dts)) {
            av_log(NULL, AV_LOG_ERROR, "flush decoder: same dts!!!\n");
        }

        dec_worker->last_decoded_pts = frame->pts;
        dec_worker->last_decoded_dts = frame->pkt_dts;

        pthread_mutex_lock(&dec_worker->list_lock);
        if (get_fifo_size(dec_worker->wait_to_free_list) >= NI_MAX_DEC_CAPACITY) {
            av_log(NULL, AV_LOG_ERROR, "wait list frame > %d\n", NI_MAX_DEC_CAPACITY);
            ret = -1;
            pthread_mutex_unlock(&dec_worker->list_lock);
            goto end;
        }
        pthread_mutex_unlock(&dec_worker->list_lock);

        dec_worker->decoded_frames++;

        //check if all filters are flushed
        all_filters_flushed = true;
        for (i = 0; i < filter_num; i++) {
            flushed[i] = get_filter_flush_state(xstack_workers[i]);
            if (!flushed[i]) {
                all_filters_flushed = false;
                break;
            }
        }
        if (all_filters_flushed) {
            av_log(NULL, AV_LOG_DEBUG, "%s all filters flushed, skip send frame\n", __func__);
            goto filters_flushed;
        }

        // call av_frame_ref to increase buffer ref count / preserve buffer
        dec_worker->wait_free_frame = av_frame_alloc();
        ret = av_frame_ref(dec_worker->wait_free_frame, frame);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "%s: av_frame_ref ERROR !!!\n", __func__);
            goto end;
        }
        pthread_mutex_lock(&dec_worker->list_lock);
        //add the buffered frame to wait to free
        ret = list_append_frame(dec_worker->wait_to_free_list, dec_worker->wait_free_frame);
        if (ret < 0) {
            pthread_mutex_unlock(&dec_worker->list_lock);
            av_log(NULL, AV_LOG_ERROR, "%s: add to wait free list failed !!!\n", __func__);
            goto end;
        }
        av_log(NULL, AV_LOG_DEBUG, "append list size %d index %d\n",
               get_fifo_size(dec_worker->wait_to_free_list), dec_worker->index);
        pthread_mutex_unlock(&dec_worker->list_lock);

        for (i = 0; i < filter_num; i++) {
            //don't send frame to filter if it is flushed
            if (flushed[i])
                continue;
            ret = send_decode_frame(dec_worker->stack_entry[i], frame);
            if (ret < 0 || dec_worker->stack_entry[i]->list_block) {
                av_log(NULL, AV_LOG_ERROR, "send_decode_frame failed %d\n", ret);
                goto end;
            }
        }

        pthread_mutex_lock(&dec_worker->list_lock);
        // wait dec hw frame free
        // every decoder thread can preserve 3 hw frames at most in same time, if list is full
        // need to wait filter thread unref the hw frame and wait signal
        // filter thread will keep ref to this hw frames while do xstack, it unref the hw frame
        // in the list and signal until next hw frame send to filter
        if (get_fifo_size(dec_worker->wait_to_free_list) > NI_MAX_DEC_CAPACITY) {
            pthread_mutex_unlock(&dec_worker->list_lock);
            av_log(NULL, AV_LOG_ERROR, "wait list frame > %d\n", NI_MAX_DEC_CAPACITY);
            ret = -1;
            goto end;
        } else if (get_fifo_size(dec_worker->wait_to_free_list) == NI_MAX_DEC_CAPACITY) {
            av_log(NULL, AV_LOG_DEBUG, "before recycle list size %d entry size %d index %d\n",
                   get_fifo_size(dec_worker->wait_to_free_list), get_fifo_size(dec_worker->stack_entry[0]->dec_frame_fifo), dec_worker->index);
            pthread_cond_wait(&dec_worker->list_cond, &dec_worker->list_lock);
            av_log(NULL, AV_LOG_DEBUG, "after recycle list size %d entry size %d index %d.\n",
                   get_fifo_size(dec_worker->wait_to_free_list), get_fifo_size(dec_worker->stack_entry[0]->dec_frame_fifo), dec_worker->index);
        }
        pthread_mutex_unlock(&dec_worker->list_lock);

filters_flushed:
        p_data3 = NULL;
        if (frame->buf[0]) {
            p_data3 = (niFrameSurface1_t*)(frame->buf[0]->data);
        }
        av_log(NULL, AV_LOG_DEBUG, "%s while loop, before unref frame %p "
               "ui16FrameIdx = [%d] ref_cnt %d\n", __func__, frame,
               p_data3 ? p_data3->ui16FrameIdx : -1,
               p_data3 ? frame_get_ref_count(frame) : -1);

        av_frame_unref(frame);

        for (i = 0; i < filter_num; i++) {
            ret = get_filter_ret(xstack_workers[i]);
            if (ret < 0) {
                if (AVERROR_EOF == ret) {
                    dec_worker->should_exit = THREAD_STATE_EXIT_ISSUED;
                    av_log(NULL, AV_LOG_INFO, "%s decoder %d process_filter_graph get eof\n",
                        __func__, dec_worker->index);
                    ret = 0;
                } else {
                    av_log(NULL, AV_LOG_ERROR, "%s decoder %d process_filter_graph return %d\n",
                        __func__, dec_worker->index, ret);
                }
                goto end;
            } else {
                av_log(NULL, AV_LOG_DEBUG, "%s decoder %d process_filter_graph "
                    "return > 0: %d.\n",
                    __func__, dec_worker->index, ret);
            }
        }
    }

end:
    return ret;
}

// handle end-of-stream in decoding
static void process_decoder_eos(decoder_worker *dec_worker) {
    int i, j, eos_num;
    filter_worker *f;
    ni_xstack_entry_t *entry;
    decoder_worker *dec_other;
    int ret;
    demuxer_worker *demux_worker = dec_worker->demux_worker;

    for (i = 0; i < filter_num; i++) {
        f = xstack_workers[i];
        pthread_mutex_lock(&f->filter_lock);
        eos_num = ++f->input_eos_num;
        pthread_mutex_unlock(&f->filter_lock);

        av_log(NULL, AV_LOG_INFO, "%s %d , eos_num: %d xstack shortest: %d\n",
            __func__, dec_worker->index, eos_num, f->shortest);
    }

    // for shortest=1, it will exit if one of decoder thread get eof
    // for shortest=0, it will exit until all decoder threads get eof
    for (i = 0; i < filter_num; i++) {
        dec_worker->stack_entry[i]->eos_flag = 1;
    }

    pthread_mutex_lock(&dec_worker->frame_lock);
    if (!dec_worker->should_exit) {
        dec_worker->should_exit = THREAD_STATE_EXIT_PROCESSED;
    }
    pthread_mutex_unlock(&dec_worker->frame_lock);

    //Unlock filter for this input in case it started waiting before seeing the eos flag
    for (j = 0; j < filter_num; j++) {
        entry = dec_worker->stack_entry[j];
        f = xstack_workers[j];
        if (get_filter_flush_state(xstack_workers[j])) {
            av_log(NULL, AV_LOG_DEBUG, "%s %d, filter %d is flushed, skip sending last frame\n", __func__, dec_worker->index, j);
            continue;
        }
        // signal if filter is wait for next frame
        pthread_mutex_lock(&entry->lock);
        pthread_cond_signal(&entry->frame_cond);
        pthread_mutex_unlock(&entry->lock);

        pthread_mutex_lock(&f->ret_lock);
        pthread_cond_wait(&f->flush_cond, &f->ret_lock);
        pthread_mutex_unlock(&f->ret_lock);
    }

    pthread_mutex_lock(&dec_worker->list_lock);
    while (get_fifo_size(dec_worker->wait_to_free_list)) {
        drain_fifo(dec_worker->wait_to_free_list, __func__, NULL);
    }
    pthread_mutex_unlock(&dec_worker->list_lock);
    av_log(NULL, AV_LOG_INFO, "fifo size %d dec %d drain wait to free fifo\n", get_fifo_size(dec_worker->wait_to_free_list), dec_worker->index);

    if (dec_worker->type == HW_VIDEO) {
        while (!is_fifo_empty(demux_worker->demux_packet_fifo)) {
            pthread_mutex_lock(&demux_worker->packet_lock);
    #if IS_FFMPEG_70_AND_ABOVE
            av_fifo_read(demux_worker->demux_packet_fifo, &demux_worker->demux_packet, 1);
    #else
            av_fifo_generic_read(demux_worker->demux_packet_fifo, &demux_worker->demux_packet, sizeof(AVPacket*), NULL);
    #endif
            pthread_cond_signal(&demux_worker->produce_cond);
            pthread_mutex_unlock(&demux_worker->packet_lock);
            av_packet_free(&demux_worker->demux_packet);
        }
        ret = avcodec_send_packet(dec_worker->dec_ctx, NULL);
        if (ret < 0 && ret != AVERROR_EOF) {
            av_log(NULL, AV_LOG_ERROR, "decoder %d: failed to send packet. ret %d\n",
                dec_worker->index, ret);
        }
        while (ret >= 0) {
            AVFrame *frame = av_frame_alloc();
            ret = avcodec_receive_frame(dec_worker->dec_ctx, frame);
            if (ret < 0) {
                if (ret == AVERROR_EOF) {
                    av_log(NULL, AV_LOG_DEBUG, "decoder %d got AVERROR_EOF\n",
                        dec_worker->index);
                } else {
                    av_log(NULL, AV_LOG_DEBUG, "avcodec_receive_frame ret %d\n", ret);
                }
            }
            av_frame_free(&frame);
        }
    }

    // send the last frame to filter to tell filter all decoder threads ready to exit
    if (get_decoder_eos_num_total(xstack_workers[0]) == xstack_workers[0]->num_src_pads) {
        for (i = 0; i < active_decoder_workers; i++) {
            dec_other = decoder_workers[i];
            if (dec_other->type == SW_PICTURE && !dec_other->should_exit) {
                pthread_mutex_lock(&dec_other->frame_lock);
                dec_other->should_exit = THREAD_STATE_EXIT_ISSUED;
                pthread_mutex_unlock(&dec_other->frame_lock);
                av_log(NULL, AV_LOG_DEBUG, "%s %d: signal image decoder %d to exit.\n",
                       __func__, dec_worker->index, dec_other->index);
            }
        }
    }
}

// decoder loop for sw input
static int sw_decoder_run(decoder_worker *dec_worker) {
    int i, j, ret;
    AVFrame *frame, *src, *dst;
    bool flushed[4] = {0};
    bool all_filters_flushed = false;

    // Allocate frame object
    src = av_frame_alloc();
    if (!src) {
      av_log(NULL, AV_LOG_ERROR, "Could not allocate src AVFrame\n");
      return -1;
    }
    src->format = dec_worker->in.sw.pix_fmt;
    src->width  = dec_worker->in.sw.input_width;
    src->height = dec_worker->in.sw.input_height;

    ret = av_frame_get_buffer(src, 32);
    if (ret < 0) {
      av_log(NULL, AV_LOG_ERROR, "Could not allocate the src AVFrame buffers ret = %d\n", ret);
      return ret;
    }
    av_log(NULL, AV_LOG_INFO, "Input line sizes: Y=%d, U=%d, V=%d count %d.\n", src->linesize[0],
           src->linesize[1], src->linesize[2], dec_worker->in.sw.yuv_frame_cnt);

    // Make sure the frame data is writable
    ret = av_frame_make_writable(src);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "av_frame_make_writable() error %d.\n", ret);
        goto end;
    }

    if (dec_worker->in.sw.need_scale) {
        dst = av_frame_alloc();
        if (!dst) {
            av_log(NULL, AV_LOG_ERROR, "Could not allocate dst AVFrame\n");
            goto end;
        }
        dst->format = AV_PIX_FMT_YUV420P;
        dst->width = dec_worker->in.sw.width;
        dst->height = dec_worker->in.sw.height;
        ret = av_frame_get_buffer(dst, 32);
        if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "%s Could not allocate the dst AVFrame buffers ret = %d\n",
               __func__, ret);
        return ret;
        }
    }

    while (dec_worker->in.sw.sw_loop) {
        // read yuv frame file and send to list
        for (i = 0; i < dec_worker->in.sw.yuv_frame_cnt; i++) {
            if (dec_worker->should_exit) {
                break;
            }
            ret = read_yuv_frame(dec_worker, src);
            if (ret < 0) {
                av_log(NULL, AV_LOG_ERROR, "read yuv frame fail index %d\n", i);
                goto end;
            }

            if (dec_worker->in.sw.need_scale) {
                sws_scale(dec_worker->in.sw.sws_ctx, (const uint8_t* const*)src->data,
                        src->linesize, 0, src->height, dst->data, dst->linesize);
            } else {
                dst = src;
            }

            pthread_mutex_lock(&dec_worker->list_lock);
            if (get_fifo_size(dec_worker->wait_to_free_list) >= NI_MAX_DEC_CAPACITY) {
                av_log(NULL, AV_LOG_ERROR, "wait list frame > %d\n", NI_MAX_DEC_CAPACITY);
                ret = -1;
                goto end;
            }
            pthread_mutex_unlock(&dec_worker->list_lock);

            // upload the sw frame to hw frame, place in dec_worker->decoded_frame
            ret = retrieve_hwframe(dec_worker, dst);
            if (ret < 0) {
                av_log(NULL, AV_LOG_ERROR, "retrieve hwframe[%d] for image failed\n", i);
                return ret;
            }

            frame = dec_worker->decoded_frame;
            frame->pts = DEFAULT_YUV_PTS_STEP + dec_worker->last_decoded_pts;
            dec_worker->decoded_frames++;
            //check if all filters are flushed
            all_filters_flushed = true;
            for (j = 0; j < filter_num; j++) {
                flushed[j] = get_filter_flush_state(xstack_workers[j]);
                if (!flushed[j]) {
                    all_filters_flushed = false;
                    break;
                }
            }
            if (all_filters_flushed) {
                av_log(NULL, AV_LOG_DEBUG, "%s all filters flushed, skip send frame\n", __func__);
                goto filters_flushed;
            }

            // call av_frame_ref to increase buffer ref count / preserve buffer
            dec_worker->wait_free_frame = av_frame_alloc();
            ret = av_frame_ref(dec_worker->wait_free_frame, frame);
            if (ret < 0) {
                av_log(NULL, AV_LOG_ERROR, "%s: av_frame_ref ERROR !!!\n", __func__);
                goto end;
            }
            pthread_mutex_lock(&dec_worker->list_lock);
            //add the buffered frame to wait to free
            ret = list_append_frame(dec_worker->wait_to_free_list, dec_worker->wait_free_frame);
            if (ret < 0) {
                pthread_mutex_unlock(&dec_worker->list_lock);
                av_log(NULL, AV_LOG_ERROR, "%s: add to wait free list failed !!!\n", __func__);
                goto end;
            }
            av_log(NULL, AV_LOG_DEBUG, "append list size %d index %d\n",
                   get_fifo_size(dec_worker->wait_to_free_list), dec_worker->index);
            pthread_mutex_unlock(&dec_worker->list_lock);

            for (j = 0; j < filter_num; j++) {
                //don't send frame to filter if it is flushed
                if (flushed[j])
                    continue;
                ret = send_decode_frame(dec_worker->stack_entry[j], frame);
                if (ret < 0 || dec_worker->stack_entry[j]->list_block) {
                    av_log(NULL, AV_LOG_ERROR, "send_decode_frame failed %d\n", ret);
                    goto end;
                }
            }

            pthread_mutex_lock(&dec_worker->list_lock);
            // wait dec hw frame free
            // every decoder thread can preserve 3 hw frames at most in same time, if list is full
            // need to wait filter thread unref the hw frame and wait signal
            // filter thread will keep ref to this hw frames while do xstack, it unref the hw frame
            // in the list and signal until next hw frame send to filter
            if (get_fifo_size(dec_worker->wait_to_free_list) > NI_MAX_DEC_CAPACITY) {
                pthread_mutex_unlock(&dec_worker->list_lock);
                av_log(NULL, AV_LOG_ERROR, "wait to free list frame %d > %d\n",
                    get_fifo_size(dec_worker->wait_to_free_list), NI_MAX_DEC_CAPACITY);
                ret = -1;
                goto end;
            } else if (get_fifo_size(dec_worker->wait_to_free_list) == NI_MAX_DEC_CAPACITY) {
                av_log(NULL, AV_LOG_DEBUG, "before recycle list size %d entry size %d index %d\n",
                    get_fifo_size(dec_worker->wait_to_free_list), get_fifo_size(dec_worker->stack_entry[0]->dec_frame_fifo), dec_worker->index);
                pthread_cond_wait(&dec_worker->list_cond, &dec_worker->list_lock);
                av_log(NULL, AV_LOG_DEBUG, "after recycle list size %d entry size %d index %d\n",
                    get_fifo_size(dec_worker->wait_to_free_list), get_fifo_size(dec_worker->stack_entry[0]->dec_frame_fifo), dec_worker->index);
            }
            pthread_mutex_unlock(&dec_worker->list_lock);

    filters_flushed:
            dec_worker->last_decoded_pts = frame->pts;
            niFrameSurface1_t *p_data3 = NULL;
            if (frame->buf[0]) {
                p_data3 = (niFrameSurface1_t*)(frame->buf[0]->data);
            }
            av_log(NULL, AV_LOG_DEBUG, "%s while loop, before unref frame %p "
                "ui16FrameIdx = [%d] ref_cnt %d\n", __func__, frame,
                p_data3 ? p_data3->ui16FrameIdx : -1,
                p_data3 ? frame_get_ref_count(frame) : -1);

            av_frame_unref(frame);

            // get ret from filter thread, if ret < 0 means need filter thread failed
            // and decoder thread need to exit now
            for (j = 0; j < filter_num; j++) {
                ret = get_filter_ret(xstack_workers[j]);
                if (ret < 0) {
                    av_log(NULL, AV_LOG_ERROR, "%s decoder %d process_filter_graph "
                        "return < 0: %d, could be eos.\n",
                        __func__, dec_worker->index, ret);
                    if (AVERROR_EOF == ret) {
                        dec_worker->should_exit = THREAD_STATE_EXIT_ISSUED;
                    }
                    goto end;
                } else {
                    av_log(NULL, AV_LOG_DEBUG, "%s decoder %d process_filter_graph "
                        "return > 0: %d.\n",
                        __func__, dec_worker->index, ret);
                }
            }
        }

        dec_worker->in.sw.sw_loop--;
        if (dec_worker->in.sw.sw_loop && !dec_worker->should_exit) {
            av_log(NULL, AV_LOG_DEBUG, "%s Decoder %d end of one iteration.\n",
                __func__, dec_worker->index);
            fseek(dec_worker->in.sw.input_fp, 0, SEEK_SET);
            av_log(NULL, AV_LOG_INFO, "%s: dec %d remaining loop: %u\n",
                __func__, dec_worker->index, dec_worker->in.sw.sw_loop);
        } else {
            break;
        }
    }
    av_log(NULL, AV_LOG_INFO, "%s Decoder %d completed decoding, notify eos.\n",
           __func__, dec_worker->index);
    process_decoder_eos(dec_worker);
end:
    if (src) {
        av_frame_free(&src);
        src = NULL;
    }
    if (dec_worker->in.sw.need_scale) {
        av_frame_free(&dst);
        dst = NULL;
    }
    return ret;
}

// decoder loop for picture input
static int image_decoder_run(decoder_worker *dec_worker) {
    int i, ret = 0;
    AVPacket* packet = av_packet_alloc();
    AVFrame *src = av_frame_alloc();
    AVFrame *dst;
    AVFrame *frame = NULL;
    ni_xstack_entry_t *entry;
    int eos_num;
    filter_worker *f;
    int first_frames_to_filters = 0;//count to track first frames to filter

    if (!src) {
        av_log(NULL, AV_LOG_ERROR, "Can't allocate memory for AVFrame\n");
        return AVERROR(ENOMEM);
    }

    while (av_read_frame(dec_worker->ifmt_ctx, packet) >= 0 && !dec_worker->should_exit) {
        if (packet->stream_index != dec_worker->video_index) {
            continue;
        }
        ret = avcodec_send_packet(dec_worker->dec_ctx, packet);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "avcodec_send_packet failed");
            av_packet_unref(packet);
            return ret;
        }
        av_packet_unref(packet);
        ret = avcodec_receive_frame(dec_worker->dec_ctx, src);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "avcodec_receive_frame failed");
            return ret;
        }
    }
    av_packet_free(&packet);

    if (dec_worker->in.sw.need_scale) {
        dst = av_frame_alloc();
        if (!dst) {
            av_log(NULL, AV_LOG_ERROR, "Could not allocate dst AVFrame\n");
            goto end;
        }
        dst->format = AV_PIX_FMT_YUV420P;
        dst->width = dec_worker->in.sw.width;
        dst->height = dec_worker->in.sw.height;
        ret = av_frame_get_buffer(dst, 32);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "%s Could not allocate the dst AVFrame buffers ret = %d\n",
                    __func__, ret);
            return ret;
        }
        sws_scale(dec_worker->in.sw.sws_ctx, (const uint8_t* const*)src->data,
                src->linesize, 0, src->height, dst->data, dst->linesize);
    } else {
        dst = src;
    }

    // upload the sw frame to hw frame, place in dec_worker->decoded_frame
    ret = retrieve_hwframe(dec_worker, dst);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "retrieve hwframe for image failed\n");
        return ret;
    }
    frame = dec_worker->decoded_frame;
    dec_worker->decoded_frames++;

    for (i = 0; i < filter_num; i++) {
        entry = dec_worker->stack_entry[i];
        entry->eos_flag = 1;
        f = xstack_workers[i];
        pthread_mutex_lock(&f->filter_lock);
        eos_num = ++f->input_eos_num;
        pthread_mutex_unlock(&f->filter_lock);

        av_log(NULL, AV_LOG_INFO, "%s %d , eos_num: %d xstack shortest: %d\n",
            __func__, dec_worker->index, eos_num, f->shortest);
    }

    // Continuous send last frame even shortest is 1
    // need other decoder thread tell image thread to exit
    do {
        frame->pts = 0;
        for (i = 0; i < filter_num; i++) {
            entry = dec_worker->stack_entry[i];
            if (first_frames_to_filters == filter_num && //first frames sent to all filters
                     !xstack_workers[i]->shortest && //filter is following longest video
                     !dec_worker->should_exit) {
                // shortest == 0, not sned anymore and exit
                goto end;
            }
            if (dec_worker->should_exit) {
                goto end;
            }
            if (get_filter_flush_state(xstack_workers[i])) {
                av_log(NULL, AV_LOG_DEBUG, "%s %d, filter %d is flushed, skip sending last frame. Exit: %d\n",
                       __func__, dec_worker->index, i, dec_worker->should_exit);
                continue;
            }
            if (get_fifo_size(entry->dec_frame_fifo) >= NI_MAX_DEC_CAPACITY) {
                av_log(NULL, AV_LOG_DEBUG, "%s %d, filter %d fifo full skip sending frame.\n",
                       __func__, dec_worker->index, i);
                usleep(100);
                continue;
            }
            ret = send_decode_frame(entry, frame);
            if (ret < 0 || entry->list_block) {
                av_log(NULL, AV_LOG_ERROR, "send_decode_frame failed %d\n", ret);
                goto end;
            }
            if (first_frames_to_filters < filter_num) {
                first_frames_to_filters++;
                av_log(NULL, AV_LOG_DEBUG, "%s %d, first_frames_to_filters: %d\n",
                       __func__, dec_worker->index, first_frames_to_filters);
            }
        }
    } while (!dec_worker->should_exit);

end:
    av_log(NULL, AV_LOG_DEBUG, "sw pic finish\n");
    av_frame_unref(frame);
    if (src) {
        av_frame_free(&src);
        src = NULL;
    }
    if (dec_worker->in.sw.need_scale) {
        av_frame_free(&dst);
        dst = NULL;
    }
    return ret;
}

// decoder loop for hw input
static int hw_decoder_run(decoder_worker *dec_worker) {
    int ret = 0;
    demuxer_worker *demux_worker = dec_worker->demux_worker;

    while (!dec_worker->should_exit) {
        pthread_mutex_lock(&demux_worker->packet_lock);
        while (is_fifo_empty(demux_worker->demux_packet_fifo)) {
            // flush the decoder if ifmt get eos
            if (demux_worker->input_eos) {
                av_log(NULL, AV_LOG_INFO, "%s input eos demuxer %d ready to flush\n",
                       __func__, demux_worker->index);
                pthread_mutex_unlock(&demux_worker->packet_lock);
                do {
                    ret = decoder_get_frame(dec_worker, NULL);
                } while (ret >= 0);
                // avcodec_flush_buffers(dec_worker->dec_ctx);
                ret = 0;
                goto eos;
            }
            if (!demux_worker->should_exit) {
                pthread_cond_wait(&demux_worker->consume_cond, &demux_worker->packet_lock);
            } else {
                pthread_mutex_unlock(&demux_worker->packet_lock);
                goto eos;
            }
        }
        if (!demux_worker->demux_started) {
            av_log(NULL, AV_LOG_ERROR, "muxer not init yet, it is unexpected\n");
            break;
        }

        // read encode frame from encoder fifo buffer
#if IS_FFMPEG_70_AND_ABOVE
        av_fifo_read(demux_worker->demux_packet_fifo, &demux_worker->demux_packet, 1);
#else
        av_fifo_generic_read(demux_worker->demux_packet_fifo, &demux_worker->demux_packet, sizeof(AVPacket*), NULL);
#endif
        pthread_cond_signal(&demux_worker->produce_cond);
        pthread_mutex_unlock(&demux_worker->packet_lock);

        ret = decoder_get_frame(dec_worker, demux_worker->demux_packet);
        av_packet_free(&demux_worker->demux_packet);
        if (ret < 0) {
            break;
        }
    }
eos:
    av_log(NULL, AV_LOG_DEBUG, "%s Decoder %d completed decoding, notify eos.\n",
           __func__, dec_worker->index);
    process_decoder_eos(dec_worker);

    return ret;
}

// demux thread only for hw input
static void *demuxer_thread_run(void *thread_data) {
    int ret = 0;
    demuxer_worker *demux_worker = (demuxer_worker *)thread_data;
    decoder_worker *dec_worker = demux_worker->dec_worker;
    int64_t duration_dts = 0;
    int saw_first_ts = 0;

    pthread_mutex_lock(&demux_worker->packet_lock);
    if (demux_worker->input_eos) {
        pthread_mutex_unlock(&demux_worker->packet_lock);
        goto eos;
    }
    if (!demux_worker->demux_started) {
        pthread_cond_wait(&demux_worker->produce_cond, &demux_worker->packet_lock);
    }
    if (demux_worker->input_eos) {
        pthread_mutex_unlock(&demux_worker->packet_lock);
        goto eos;
    }
    pthread_mutex_unlock(&demux_worker->packet_lock);
    av_log(NULL, AV_LOG_INFO, "demux %d start\n", demux_worker->index);

    AVStream *st = demux_worker->ifmt_ctx->streams[demux_worker->video_index];
    AVCodecContext *dec_ctx = dec_worker->dec_ctx;

    while (!dec_worker->should_exit) {
        AVPacket *packet = av_packet_alloc();
        ret = av_read_frame(demux_worker->ifmt_ctx, packet);
        if (ret < 0) {
            if (ret == AVERROR_EOF) {
                av_packet_free(&packet);
                av_log(NULL, AV_LOG_INFO, "%s stream=%d av_read_frame got "
                        "EOF\n", __func__, demux_worker->index);
                pthread_mutex_lock(&demux_worker->packet_lock);
                demux_worker->input_eos = 1;
                pthread_cond_signal(&demux_worker->consume_cond);
                pthread_mutex_unlock(&demux_worker->packet_lock);
                ret = 0;
                goto eos;
            } else {
                goto eos;
            }
        }
        if (packet->stream_index == demux_worker->video_index) {
            if (!saw_first_ts) {
                demux_worker->target_dts = st->avg_frame_rate.num ? - dec_ctx->has_b_frames * AV_TIME_BASE / av_q2d(st->avg_frame_rate) : 0;
                if (packet && packet->pts != AV_NOPTS_VALUE) {
                    demux_worker->target_dts += av_rescale_q(packet->pts, st->time_base, AV_TIME_BASE_Q);
                }
                saw_first_ts = 1;
            }

            // set pkt dts here
            if (demux_worker->target_dts != AV_NOPTS_VALUE) {
                packet->dts = av_rescale_q(demux_worker->target_dts, AV_TIME_BASE_Q, st->time_base);
                av_log(NULL, AV_LOG_DEBUG, "send pkt dts %ld target_dts %ld\n", packet->dts, demux_worker->target_dts);
            }
            demux_worker->demux_pkt_num++;
            ret = send_demux_packet(demux_worker, packet);

            // update the target_dts
            if (packet && packet->duration) {
                duration_dts = av_rescale_q(packet->duration, st->time_base, AV_TIME_BASE_Q);
            } else if(dec_ctx->framerate.num != 0 && dec_ctx->framerate.den != 0) {
#if LIBAVCODEC_VERSION_MAJOR >= 62
                AVRational field_rate = av_mul_q(dec_ctx->framerate, (AVRational){ 2, 1 });
                int ticks = 2;
                ticks = av_stream_get_parser(st) ? av_stream_get_parser(st)->repeat_pict+1 : ticks;
                duration_dts = av_rescale_q(ticks, av_inv_q(field_rate), AV_TIME_BASE_Q);
#else
                int ticks= av_stream_get_parser(st) ? av_stream_get_parser(st)->repeat_pict+1 : dec_ctx->ticks_per_frame;
                duration_dts = ((int64_t)AV_TIME_BASE *
                                              dec_ctx->framerate.den * ticks) /
                                              dec_ctx->framerate.num / dec_ctx->ticks_per_frame;
#endif
            }

            if(demux_worker->target_dts != AV_NOPTS_VALUE && duration_dts) {
                demux_worker->target_dts += duration_dts;
            }else
                demux_worker->target_dts = AV_NOPTS_VALUE;

            av_log(NULL, AV_LOG_DEBUG, "mux one packet finish\n");
        } else {
            av_packet_free(&packet);
        }
    } // while

eos:
    avformat_close_input(&demux_worker->ifmt_ctx);
    av_log(NULL, AV_LOG_ERROR, "demuxer %d demux num %d exit ret=0x%x.\n",
           demux_worker->index, demux_worker->demux_pkt_num, ret);
    demux_worker->should_exit = THREAD_STATE_EXIT_ISSUED;
    pthread_mutex_lock(&demux_worker->common->lock);
    demux_worker->common->exit_demux_num++;
    av_log(NULL, AV_LOG_DEBUG, "exit demux num %d\n", demux_worker->common->exit_demux_num);
    pthread_mutex_unlock(&demux_worker->common->lock);
    return (void *)((long)ret);
}

// decoder thread routine
static void *decoder_thread_run(void *thread_data) {
    decoder_worker *dec_worker = (decoder_worker *)thread_data;
    int i, ret = 0;

    switch (dec_worker->type) {
    case SW_VIDEO:
        ret = open_yuv_file(dec_worker);
        break;
    case SW_PICTURE:
        ret = open_image_file(dec_worker);
        break;
    case HW_VIDEO:
        ret = open_hw_input_file(dec_worker);
        break;
    default:
        av_log(NULL, AV_LOG_ERROR, "invalid decoder type %d.\n",
                dec_worker->type);
        ret = -1;
        break;
    }
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "fail to open input file.\n");
        for (i = 0; i < filter_num; i++) {
            pthread_mutex_lock(&dec_worker->xstack[i]->filter_lock);
            pthread_cond_signal(&dec_worker->xstack[i]->init_cond);
            dec_worker->xstack[i]->flushed = 1;
            pthread_mutex_unlock(&dec_worker->xstack[i]->filter_lock);
        }
        if (dec_worker->type == HW_VIDEO) {
            pthread_mutex_lock(&dec_worker->demux_worker->packet_lock);
            pthread_cond_signal(&dec_worker->demux_worker->produce_cond);
            dec_worker->demux_worker->input_eos = 1;
            pthread_mutex_unlock(&dec_worker->demux_worker->packet_lock);
        }
        goto end;
    }

    switch (dec_worker->type) {
    case SW_VIDEO:
        ret = sw_decoder_run(dec_worker);
        break;
    case SW_PICTURE:
        ret = image_decoder_run(dec_worker);
        break;
    case HW_VIDEO:
        ret = hw_decoder_run(dec_worker);
        break;
    default:
        av_log(NULL, AV_LOG_ERROR, "invalid decoder type %d.\n",
                dec_worker->type);
        break;
    }
    av_log(NULL, AV_LOG_ERROR, "decoder %d total dec num %d exit: ret=0x%x.\n",
           dec_worker->index, dec_worker->decoded_frames, ret);

end:
    if (dec_worker->type != SW_PICTURE) {
        //shared memory debt can incurred if decoder closes before frames have
        //been recycled
        av_log(NULL, AV_LOG_DEBUG, "%s: stream=%d waiting for recycle list to be empty\n",
               __func__, dec_worker->index);
        pthread_mutex_lock(&dec_worker->list_lock);
        while (get_fifo_size(dec_worker->wait_to_free_list) != 0) {
            pthread_cond_wait(&dec_worker->list_cond, &dec_worker->list_lock);
        }
        pthread_mutex_unlock(&dec_worker->list_lock);
        av_log(NULL, AV_LOG_DEBUG, "%s: stream=%d recycle list empty\n",
               __func__, dec_worker->index);
    }
    if (dec_worker->type != HW_VIDEO) {
        if (dec_worker->in.sw.sws_ctx) {
            sws_freeContext(dec_worker->in.sw.sws_ctx);
        }
        if (dec_worker->in.sw.hwctx_upload) {
            av_buffer_unref(&dec_worker->in.sw.hwctx_upload);
        }
        if (dec_worker->in.sw.hwdevice_upload) {
            av_buffer_unref(&dec_worker->in.sw.hwdevice_upload);
        }
    }
    if (dec_worker->decoded_frame) {
        av_frame_free(&dec_worker->decoded_frame);
        dec_worker->decoded_frame = NULL;
    }
    if (dec_worker->dec_ctx) {
        avcodec_free_context(&dec_worker->dec_ctx);
    }
    if (dec_worker->ifmt_ctx) {
        avformat_close_input(&dec_worker->ifmt_ctx);
    }

    pthread_mutex_lock(&dec_worker->common->lock);
    dec_worker->common->exit_dec_num++;
    av_log(NULL, AV_LOG_ERROR, "exit dec num %d\n", dec_worker->common->exit_dec_num);
    pthread_mutex_unlock(&dec_worker->common->lock);
    return (void *)((long)ret);
}

// FILTEER THREAD FUNCTION
// ni_xstack init, return 0 if all successful, -1 otherwise
static int init_xstack(filter_worker *f, int entries, int exits) {
    int i, ret;
    f->init = 0;
    f->inputs = entries;
    f->outputs = exits;
    f->shortest = 0;
    f->sync = 0;

    if (strstr(f->filter_desc, "shortest=1")) {
        f->shortest = 1;
    }

    ret = pthread_mutex_init(&f->filter_lock, NULL);
    if (ret) {
        goto fail_init_filter;
    }
    ret = pthread_mutex_init(&f->ret_lock, NULL);
    if (ret) {
        goto fail_init_ret;
    }
    ret = pthread_cond_init(&f->init_cond, NULL);
    if (ret) {
        goto fail_init_cond;
    }
    ret = pthread_cond_init(&f->flush_cond, NULL);
    if (ret) {
        goto fail_flush_cond;
    }
    for (i = 0; i < NI_MAX_XSTACK_INPUTS; i++) {
        f->src_pads[i] = NULL;
    }
    f->num_src_pads = 0;
    for (i = 0; i < NI_MAX_XSTACK_OUTPUTS; i++) {
        f->dst_pads[i] = NULL;
    }
    f->num_dst_pads = 0;

    f->filter_graph = NULL;
    // f->got_input_num = 0;
    f->input_eos_num = 0;
    f->filter_ret = 0;
    f->flushed = false;

    return 0;

fail_flush_cond:
    pthread_cond_destroy(&f->init_cond);
fail_init_cond:
    pthread_mutex_destroy(&f->ret_lock);
fail_init_ret:
    pthread_mutex_destroy(&f->filter_lock);
fail_init_filter:
    return ret;
}

static void cleanup_filter_worker(filter_worker *worker) {
    if (worker) {
        pthread_mutex_destroy(&worker->filter_lock);
        pthread_mutex_destroy(&worker->ret_lock);
        pthread_cond_destroy(&worker->init_cond);
        pthread_cond_destroy(&worker->flush_cond);
        if (worker->filter_graph) {
            avfilter_graph_free(&worker->filter_graph);
        }
    }
}

// ni_xstack filter graph initialization
// use filter description to init the filter graph
// link the inputs to entries and outputs to exits
static int init_filter_graph2(filter_worker *f) {
    int i, j, ret = 0;
    char args[512] = { 0 };
    char name[32] = { 0 };
    AVFilterInOut *inputs, *outputs, *cur;

    f->filter_graph = avfilter_graph_alloc();
    if (f->filter_graph == NULL) {
        av_log(NULL, AV_LOG_ERROR, "failed to allocate filter graph\n");
        goto end;
    }

    // parse filter description and generate inputs and outputs
    // each input will link to a decoder
    // each output will link to a encoder
    ret = avfilter_graph_parse2(f->filter_graph, f->filter_desc, &inputs, &outputs);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "failed to parse graph\n");
        goto end;
    }

    // process filter inputs
    // link entry to src pad
    for (cur = inputs, i = 0; cur && i < f->num_src_pads; cur = cur->next, i++) {
        ni_xstack_entry_t *entry = NULL;
        // get the pad that matches the incrementing index
        for (j = 0; j < f->num_src_pads; j++) {
            if (atoi(&(cur->name[0])) == f->src_pads[j]->worker->index) {
                entry = f->src_pads[j];
                break;
            }
        }
        if (!entry) {
            av_log(NULL, AV_LOG_ERROR, "%s failed to get pad %d\n",
                   __func__, i);
            goto end;
        }

        snprintf(name, sizeof(name), "in_%d_%d", f->index, entry->worker->index);
        snprintf (args, sizeof (args),
                  "video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d:frame_rate=%d/%d",
                  entry->width, entry->height, entry->hw_pixfmt,
                  entry->time_base.num, entry->time_base.den,
                  entry->par.num, entry->par.den,
                  entry->fps.num, entry->fps.den);

        av_log(NULL, AV_LOG_DEBUG, "input filter args: %s\n", args);
        ret = avfilter_graph_create_filter(&entry->buffersrc_ctx, avfilter_get_by_name("buffer"),
                                           name, args, NULL, f->filter_graph);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "failed to create input filter: %d\n", i);
            goto end;
        }

        // set buffer src HW frames context
        entry->buffersrc_par = av_buffersrc_parameters_alloc();
        entry->buffersrc_par->hw_frames_ctx = entry->first_frame->hw_frames_ctx;
        ret = av_buffersrc_parameters_set(entry->buffersrc_ctx, entry->buffersrc_par);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "%s av_buffersrc_parameters_set failed"
                   " for entity %s\n", __func__, entry->name);
            goto end;
        }

        // connect buffer src (index 0) pad to one of ni_xstack's src
        // (index i) pads
        ret = avfilter_link(entry->buffersrc_ctx, 0, cur->filter_ctx, cur->pad_idx);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "failed to link input filter: %d\n", i);
            goto end;
        }
    }

    // process filter outputs
    // create dst pad if not been created, it will only create for one time
    if (!f->num_dst_pads) {
        for (cur = outputs; cur; cur = cur->next) {
            f->num_dst_pads++;
        }
    }

    if (f->num_dst_pads != f->outputs) {
        av_log(NULL, AV_LOG_ERROR, "filter outputs not equal to file outputs dst_pad %d output %d",
               f->num_dst_pads, f->outputs);
        goto end;
    }
    // link entry to dst pad
    for (cur = outputs, i = f->num_dst_pads - 1; cur && i >= 0; cur = cur->next, i--) {

        ni_xstack_exit_t *exit = f->dst_pads[i];
        snprintf(name, sizeof(name), "out_%d_%d", f->index, i);
        ret = avfilter_graph_create_filter(&exit->buffersink_ctx, avfilter_get_by_name("buffersink"),
                                           name, NULL, NULL, f->filter_graph);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "failed to create output filter: %d\n", i);
            goto end;
        }

        // connect ni_xstack's dst (index i) pads to one of buffer sink
        // (index 0) pad
        ret = avfilter_link(cur->filter_ctx, cur->pad_idx, exit->buffersink_ctx, 0);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "failed to link output filter: %d\n", i);
            goto end;
        }
    }

    // configure and validate the filter graph
    ret = avfilter_graph_config(f->filter_graph, NULL);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "%s failed to config graph filter\n",
               __func__);
        goto end;
    } else {
        av_log(NULL, AV_LOG_INFO, "%s success config graph filter %d %s\n",
               __func__, f->index, f->filter_desc);
        //av_log(NULL, AV_LOG_INFO, "%s filter graph dump:\n%s\n", __func__,
        //       avfilter_graph_dump(f->filter_graph, NULL));
    }

end:
    for (i = 0; i < f->num_src_pads; i++) {
        av_freep (&f->src_pads[i]->buffersrc_par);
    }

    avfilter_inout_free(&inputs);
    avfilter_inout_free(&outputs);
    return ret;
}

// run once for init
// Before calling this function, caller should have already locked f->filter_lock
// checks each decode stream's pts and timebase to find which has latest start time
void normalize_pts_for_sw_decoder(filter_worker* f) {
    int i;
    AVRational fps = av_make_q(DEFAULT_FPS, 1);
    AVRational tb = av_make_q(1, DEFAULT_TIME_BASE);
    int sync_index = 0;
    for (i = 0; i < f->num_src_pads; i++) {
        ni_xstack_entry_t* entry = f->src_pads[i];
        if (entry->worker->type != SW_PICTURE) {
            if (sync_index == f->sync) {
                fps = entry->fps;
                tb = entry->time_base;
                f->sync = i;
                av_log(NULL, AV_LOG_INFO, "%s filter sync %d\n",
                       __func__, f->sync);
                break;
            }
            sync_index++;
        }
    }
    for (i = 0; i < f->num_src_pads; i++) {
        ni_xstack_entry_t* entry = f->src_pads[i];
        if (entry->worker->type == SW_PICTURE) {
            entry->first_frame->pts = 0;
            entry->last_pts = 0;
            if (fps.num && tb.num) {
                entry->fps = fps;
                entry->time_base = tb;
                entry->pts_step = (tb.den * fps.den) / (tb.num * fps.num);
            }
        }
    }
}

static input_type get_input_type(const char *file_name) {
    char *suffix = strrchr(file_name, '.');
    if (strcasecmp(suffix, ".jpg") == 0 || strcasecmp(suffix, ".jpeg") == 0 ||
        strcasecmp(suffix, ".png") == 0 || strcasecmp(suffix, ".bmp") == 0 ||
        strcasecmp(suffix, ".gif") == 0) {
        return SW_PICTURE;
    }
    return UNKNOW;
}

// flush the filter at the end of filter
static int flush_filter(filter_worker *f) {
    int i, ret, flush_num;
    ni_xstack_entry_t *entry;
    ni_xstack_exit_t *exit;

    for (i = 0; i < f->num_src_pads; i++) {
        entry = f->src_pads[i];
        pthread_mutex_lock(&entry->lock);
        entry->list_block = 1;
        av_log(NULL, AV_LOG_DEBUG, "%s filter %d dec_frame_fifo %d size %d empty %d\n", __func__,
               f->index, entry->worker->index,
               get_fifo_size(entry->dec_frame_fifo),
               is_fifo_empty(entry->dec_frame_fifo));
        while (!is_fifo_empty(entry->dec_frame_fifo)) {
#if IS_FFMPEG_70_AND_ABOVE
            av_fifo_read(entry->dec_frame_fifo, &(entry->first_frame), 1);
#else
            av_fifo_generic_read(entry->dec_frame_fifo, &(entry->first_frame), sizeof(AVFrame*), NULL);
#endif
            av_log(NULL, AV_LOG_DEBUG, "%s filter %d index %d stream frame -> pts=%ld,dts=%ld\n",
                   __func__, f->index, entry->worker->index, entry->first_frame->pts, entry->first_frame->pkt_dts);
            av_frame_free(&entry->first_frame);
        }
        av_frame_unref(entry->last_frame);
        pthread_mutex_unlock(&entry->lock);
        if (f->init) {
            ret = av_buffersrc_add_frame_flags(
                    entry->buffersrc_ctx, NULL,
                    AV_BUFFERSRC_FLAG_KEEP_REF | AV_BUFFERSRC_FLAG_PUSH);
            if (ret < 0) {
                av_log(NULL, AV_LOG_ERROR, "%s av_buffersrc_add_frame_flags got error %d\n",
                    __func__, ret);
                goto end;
            }
        }
    }

    for (i = 0; i < f->num_src_pads; i++) {
        entry = f->src_pads[i];
        if (entry->worker->type == SW_PICTURE) {
            continue;
        }
        int attempt_cnt = 0;
        pthread_mutex_lock(&entry->worker->list_lock);
        while (get_fifo_size(entry->worker->wait_to_free_list)) {
            if (list_recycle_frames(entry->worker->wait_to_free_list) < NI_MAX_DEC_CAPACITY) {
                av_log(NULL, AV_LOG_DEBUG, "fifo size %d index %d\n",
                        get_fifo_size(entry->worker->wait_to_free_list), entry->worker->index);
            }
            if (++attempt_cnt > NI_MAX_DEC_CAPACITY) {
                av_log(NULL, AV_LOG_INFO, "filter thread fail to recycle the last frame now, recycle later, fifo size %d index %d\n",
                        get_fifo_size(entry->worker->wait_to_free_list), entry->worker->index);
                break;
            }
        }
        pthread_mutex_unlock(&entry->worker->list_lock);
    }

    while (f->init) {
        for (i = 0; i < f->num_dst_pads; i++) {
            exit = f->dst_pads[i];
            ret = av_buffersink_get_frame(exit->buffersink_ctx, exit->filter_frame);

            if (ret < 0 && ret != AVERROR (EAGAIN) && ret != AVERROR_EOF) {
                av_log(NULL, AV_LOG_ERROR, "%s av_buffersink_get_frame got error %d\n",
                       __func__, ret);
                goto end;
            } else if (ret == AVERROR (EAGAIN) || ret == AVERROR_EOF) {
                av_log(NULL, AV_LOG_DEBUG, "%s av_buffersink_get_frame got %s\n",
                        __func__, ret == AVERROR (EAGAIN) ? "EAGAIN" : "EOF");
                ret = 0;
                goto end;
            } else {
                ret = send_encode_frame(exit->enc_worker, exit->filter_frame);

                if (ret < 0) {
                    av_log(NULL, AV_LOG_ERROR, "%s: send_encode_frame ERROR !!!\n",
                           __func__);
                    goto end;
                }
                av_frame_unref(exit->filter_frame);
                f->filtered_frames++;
            }
        }
    }
end:
    pthread_mutex_lock(&f->ret_lock);
    f->flushed = true;
    pthread_cond_broadcast(&f->flush_cond);
    pthread_mutex_unlock(&f->ret_lock);
    pthread_mutex_lock(&f->common->lock);
    flush_num = ++(f->common->exit_filt_num);
    pthread_mutex_unlock(&f->common->lock);
    if (flush_num < filter_num) {
        av_log(NULL, AV_LOG_INFO, "%s %d / %d filters remaining. Shouldn't drain wait list\n",
               __func__, (filter_num - flush_num), filter_num);
        return ret;
    }
    av_log(NULL, AV_LOG_INFO, "%s All %d / %d filters flushed. Drain wait list\n",
           __func__, flush_num, filter_num);
    for (i = 0; i < f->num_src_pads; i++) {
        entry = f->src_pads[i];
        if (entry->worker->type == SW_PICTURE) {
            continue;
        }
        pthread_mutex_lock(&entry->worker->list_lock);
        drain_fifo(entry->worker->wait_to_free_list, __func__, entry->name);
        pthread_cond_signal(&entry->worker->list_cond);
        pthread_mutex_unlock(&entry->worker->list_lock);
    }
    return ret;
}

// tell encoder filter have finish
static int finish_filter(filter_worker *f) {
    int i;
    encoder_worker *enc_worker;
    for (i = 0; i < f->outputs; i++) {
        enc_worker = f->dst_pads[i]->enc_worker;
        pthread_mutex_lock(&enc_worker->frame_lock);
        enc_worker->filter_flush = 1;
        if (is_fifo_empty(enc_worker->enc_frame_fifo)) {
            pthread_cond_signal(&enc_worker->consume_cond);
        }
        pthread_mutex_unlock(&enc_worker->frame_lock);
    }
    return 0;
}

static filt_dec_frame_t filter_dec_frame_read(filter_worker *f, ni_xstack_entry_t *entry) {
    int wait_list_size;

    pthread_mutex_lock(&entry->lock);
    if (entry->eos_flag && is_fifo_empty(entry->dec_frame_fifo)) {
        av_log(NULL, AV_LOG_DEBUG, "%s dec %s %s eos %d. eos current/total  %d/%d\n",
               __func__, entry->name, entry->worker->input_file, entry->eos_flag,
               f->input_eos_num, f->num_src_pads);
        av_frame_free(&entry->first_frame);
        entry->first_frame = NULL;
        pthread_mutex_unlock(&entry->lock);
        return FILT_DEC_FRAME_EOS;
    }
    if (is_fifo_empty(entry->dec_frame_fifo)) {
        pthread_mutex_lock(&entry->worker->list_lock);
        wait_list_size = get_fifo_size(entry->worker->wait_to_free_list);
        pthread_mutex_unlock(&entry->worker->list_lock);
        // check for deadlock conditions
        if (wait_list_size < NI_MAX_DEC_CAPACITY) {
            av_log(NULL, AV_LOG_DEBUG, "%s dec frame wait %s %s %d\n",
                   __func__, entry->name, entry->worker->input_file, entry->eos_flag);
            pthread_cond_wait(&entry->frame_cond, &entry->lock);
            av_log(NULL, AV_LOG_DEBUG, "%s dec frame finish wait %s %s %d\n",
                   __func__, entry->name, entry->worker->input_file, entry->eos_flag);
            if (entry->eos_flag && is_fifo_empty(entry->dec_frame_fifo)) {
                av_log(NULL, AV_LOG_INFO, "%s dec frame unavailable %s %s. eos %d/%d\n",
                       __func__, entry->name, entry->worker->input_file,
                       f->input_eos_num, f->num_src_pads);
                av_frame_free(&entry->first_frame);
                entry->first_frame = NULL;
                pthread_mutex_unlock(&entry->lock);
                return FILT_DEC_FRAME_EOS;
            }
            else {
                av_log(NULL, AV_LOG_DEBUG, "%s dec frame available %s %s\n",
                       __func__, entry->name, entry->worker->input_file);
            }
        } else {
            av_log(NULL, AV_LOG_DEBUG, "%s read dec frame skip for %s %s. Wait list is full dec wont generate a frame. "
                                       "Continue to allow frame to be unreferenced and wait other thread dec frame.\n",
                   __func__, entry->name, entry->worker->input_file);
            pthread_mutex_unlock(&entry->lock);
            return FILT_DEC_FRAME_SKIP;
        }
    }
#if IS_FFMPEG_70_AND_ABOVE
    av_fifo_read(entry->dec_frame_fifo, &(entry->first_frame), 1);
#else
    av_fifo_generic_read(entry->dec_frame_fifo, &(entry->first_frame), sizeof(AVFrame *), NULL);
#endif

    entry->first_frame->extended_data = entry->first_frame->data;
    av_log(NULL, AV_LOG_DEBUG,
           "%s dec frame get %s %s ui16FrameIdx = [%d] ref_count = %d from fifo %p\n",
           __func__, entry->name, entry->worker->input_file,
           ((niFrameSurface1_t *)(entry->first_frame->buf[0]->data))->ui16FrameIdx,
           av_buffer_get_ref_count(entry->first_frame->buf[0]), entry->dec_frame_fifo);

    // recalculate pts for yuv and image
    if (entry->worker->type == SW_PICTURE) {
        entry->first_frame->pts = entry->last_pts + entry->pts_step;
        entry->last_pts = entry->first_frame->pts;
        av_log(NULL, AV_LOG_DEBUG, "pts %ld step %d\n", entry->last_pts, entry->pts_step);
    }

    pthread_mutex_unlock(&entry->lock);
    return FILT_DEC_FRAME_VALID;
}

// filter thread routine
// central filtering processing
// a decoded frame getting into filter graph to be processed
static void *filter_thread_run(void *thread_data) {
    filter_worker *f = (filter_worker *)thread_data;
    int ret, i, eof_cnt;
    ni_xstack_entry_t *entry;
    ni_xstack_exit_t *exit;
    int64_t last_pts = 0;
    int64_t min_pts;
    int64_t convert_pts;
    AVRational base_tb = av_make_q(1, DEFAULT_TIME_BASE);
    while (1) {
        pthread_mutex_lock(&f->filter_lock);
        // filter thread exit when all the decoder threads finish
        if (f->init && f->input_eos_num == f->num_src_pads) {
            if (entry_empty(f)) {
                av_log(NULL, AV_LOG_INFO, "%s all %d video decoders flushed, filter %d ready to flush\n",
                       __func__, f->input_eos_num, f->index);
                pthread_mutex_unlock(&f->filter_lock);
                goto end;
            }
            av_log(NULL, AV_LOG_INFO, "%s need to flush the filter\n", __func__);
        }
        pthread_mutex_unlock(&f->filter_lock);

        // init filter and send frame to filter
        if (!f->init) {
            pthread_mutex_lock(&f->filter_lock);
            if (f->flushed) {
                pthread_mutex_unlock(&f->filter_lock);
                goto end;
            }
            av_log(NULL, AV_LOG_DEBUG, "filter init wait\n");
            pthread_cond_wait(&f->init_cond, &f->filter_lock);
            if (f->flushed) {
                pthread_mutex_unlock(&f->filter_lock);
                goto end;
            }
            av_log(NULL, AV_LOG_DEBUG, "src pad number %d\n", f->num_src_pads);
            pthread_mutex_unlock(&f->filter_lock);

            // filter get first frame and try to init filter
            for (i = 0; i < f->num_src_pads; i++) {
                entry = f->src_pads[i];
                pthread_mutex_lock(&entry->lock);
                if (is_fifo_empty(entry->dec_frame_fifo)) {
                    av_log(NULL, AV_LOG_DEBUG, "%s init dec frame wait %s %s\n", __func__, entry->name, entry->worker->input_file);
                    pthread_cond_wait(&entry->frame_cond, &entry->lock);
                    av_log(NULL, AV_LOG_DEBUG, "%s init dec frame available %s %s\n", __func__, entry->name, entry->worker->input_file);
                }
#if IS_FFMPEG_70_AND_ABOVE
                av_fifo_read(entry->dec_frame_fifo, &(entry->first_frame), 1);
#else
                av_fifo_generic_read(entry->dec_frame_fifo, &(entry->first_frame), sizeof(AVFrame*), NULL);
#endif
                entry->first_frame->extended_data = entry->first_frame->data;
                // ppu may change the resolution of frame, get resolution from frame instead of dec_ctx
                entry->width = entry->first_frame->width;
                entry->height = entry->first_frame->height;
                av_log(NULL, AV_LOG_DEBUG,
                       "%s init dec frame get %s %s ui16FrameIdx = [%d] ref_count = %d from fifo %p\n",
                       __func__, entry->name, entry->worker->input_file,
                       ((niFrameSurface1_t *)(entry->first_frame->buf[0]->data))->ui16FrameIdx,
                       av_buffer_get_ref_count(entry->first_frame->buf[0]), entry->dec_frame_fifo);
                pthread_mutex_unlock(&entry->lock);
            }
            // init pts for yuv and image inputs
            normalize_pts_for_sw_decoder(f);

            ret = init_filter_graph2(f);
            if (ret < 0) {
                av_log(NULL, AV_LOG_ERROR, "init filter graph failed\n");
                for (i = 0; i < f->num_src_pads; i++) {
                    av_frame_free(&entry->first_frame);
                }
                goto end;
            }
            else {
                av_log(NULL, AV_LOG_INFO, "init filter graph success\n");
            }
            f->init = 1;

            for (i = 0; i < f->num_src_pads; i++) {
                entry = f->src_pads[i];
                ret = av_buffersrc_add_frame_flags(
                    entry->buffersrc_ctx, entry->first_frame,
                    AV_BUFFERSRC_FLAG_KEEP_REF | AV_BUFFERSRC_FLAG_PUSH);
                if (ret < 0 && ret != AVERROR_EOF && ret != AVERROR (EAGAIN)) {
                    av_log(NULL, AV_LOG_ERROR, "%s av_buffersrc_add_frame_flags"
                           " first frame failed for %s %d\n", __func__, entry->name, ret);
                    goto end;
                }

                // record the last frame if decoder not reach eof
                if (!entry->eos_flag) {
                    // av_frame_unref(entry->last_frame);
                    if (!(entry->last_frame->buf[0] && entry->first_frame->buf[0] &&
                        entry->last_frame->buf[0]->data == entry->first_frame->buf[0]->data)) {
                        av_frame_ref(entry->last_frame, entry->first_frame);
                    }
                }
                av_frame_free(&entry->first_frame);

                av_log(NULL, AV_LOG_DEBUG, "%s av_buffersrc_add_frame_flags"
                       " first frames fed to filter\n", __func__);
            }
            av_log(NULL, AV_LOG_INFO, "filter init_finish\n");
        } else {
            min_pts = INT64_MAX;
            for (i = 0; i < f->num_src_pads; i++) {
                entry = f->src_pads[i];
                if (entry->frame_status == FILT_DEC_FRAME_NEED) {
                    entry->frame_status = filter_dec_frame_read(f, entry);
                    if (FILT_DEC_FRAME_EXIT == entry->frame_status) {
                        goto finish;
                    }
                }
                if (FILT_DEC_FRAME_VALID == entry->frame_status) {
                    convert_pts = av_rescale_q(entry->first_frame->pts, entry->time_base, base_tb);
                    // convert_pts = entry->first_frame->pts;
                    av_log(NULL, AV_LOG_DEBUG, "entry %d pts %ld\n", i, convert_pts);
                    if (convert_pts < min_pts) {
                        min_pts = convert_pts;
                    }
                }
            }
            av_log(NULL, AV_LOG_DEBUG, "min pts %ld\n", min_pts);
            for (i = 0; i < f->num_src_pads; i++) {
                entry = f->src_pads[i];
                if (FILT_DEC_FRAME_SKIP == entry->frame_status) {
                    entry->frame_status = FILT_DEC_FRAME_NEED;
                    continue;
                }
                if (FILT_DEC_FRAME_VALID == entry->frame_status &&
                    min_pts != av_rescale_q(entry->first_frame->pts, entry->time_base, base_tb)) {
                    continue;
                }
                ret = av_buffersrc_add_frame_flags(
                    entry->buffersrc_ctx, entry->first_frame,
                    AV_BUFFERSRC_FLAG_KEEP_REF | AV_BUFFERSRC_FLAG_PUSH);
                if (ret < 0 && ret != AVERROR_EOF && ret != AVERROR (EAGAIN)) {
                    av_log(NULL, AV_LOG_ERROR, "%s av_buffersrc_add_frame_flags"
                           " add frame failed for %s %d\n", __func__, entry->name, ret);
                    goto end;
                }
                entry->frame_status = FILT_DEC_FRAME_NEED;

                // record the last frame if decoder not reach eof
                if (!entry->eos_flag) {
                    pthread_mutex_lock(&entry->lock);
                    last_pts = entry->last_frame->pts;
                    av_frame_unref(entry->last_frame);
                    if (!(entry->last_frame->buf[0] && entry->first_frame->buf[0] &&
                        entry->last_frame->buf[0]->data == entry->first_frame->buf[0]->data)) {
                        if (last_pts != -1 && entry->first_frame != NULL)//decoder signaled exit dont save a reference
                            av_frame_ref(entry->last_frame, entry->first_frame);
                    }
                    pthread_mutex_unlock(&entry->lock);
                }
                av_frame_free(&entry->first_frame);
            }

            // recycle and unref the hw frame if it not ref by filter anymore,
            // it will signal to decoder threads
            for (i = 0; i < f->num_src_pads; i++) {
                entry = f->src_pads[i];
                if (entry->worker->type == SW_PICTURE) {
                    continue;
                }
                if (entry->frame_status == FILT_DEC_FRAME_NEED) {
                    pthread_mutex_lock(&entry->worker->list_lock);
                    if (!is_fifo_empty(entry->worker->wait_to_free_list) &&
                        list_recycle_frames(entry->worker->wait_to_free_list) < NI_MAX_DEC_CAPACITY) {
                        av_log(NULL, AV_LOG_DEBUG, "%s fifo size %d index %d\n", __func__,
                            get_fifo_size(entry->worker->wait_to_free_list), entry->worker->index);
                        pthread_cond_signal(&entry->worker->list_cond);
                    }
                    pthread_mutex_unlock(&entry->worker->list_lock);
                }
            }
        }

        // try to pull filtered frames from every filter exit
        // put all filtered frames to encoder fifo buffer
        eof_cnt = 0;
        for (i = 0; i < f->num_dst_pads; i++) {
            exit = f->dst_pads[i];
            ret = av_buffersink_get_frame(exit->buffersink_ctx, exit->filter_frame);

            if (ret < 0 && ret != AVERROR (EAGAIN) && ret != AVERROR_EOF) {
                av_log(NULL, AV_LOG_ERROR, "%s av_buffersink_get_frame got error %d\n",
                    __func__, ret);
                goto end;
            } else if (ret == AVERROR (EAGAIN) || ret == AVERROR_EOF) {
                av_log(NULL, AV_LOG_DEBUG, "%s av_buffersink_get_frame got %s\n",
                       __func__,
                       ret == AVERROR(EAGAIN) ? "EAGAIN" : "EOF");
                if (ret == AVERROR_EOF) {
                    eof_cnt++;
                }
                if (!f->src_pads[0]->last_frame) {
                    av_log(NULL, AV_LOG_ERROR, "%s av_buffersink_get_frame got "
                        "%s src frame NULL, return 0 !\n", __func__,
                        ret == AVERROR (EAGAIN) ? "EAGAIN" : "EOF");
                    ret = 0;
                    goto end;
                }
            }
            else {
                niFrameSurface1_t *p_data3 = NULL;
                if (exit->filter_frame && exit->filter_frame->data[3]) {
                    p_data3 = (niFrameSurface1_t*)(exit->filter_frame->data[3]);
                }
                av_log(NULL, AV_LOG_DEBUG, "%s av_buffersink_get_frame got "
                    "one frame out: %p ui16FrameIdx = [%d], %d x %d pts %ld\n",
                    __func__, p_data3, p_data3->ui16FrameIdx, p_data3->ui16width, p_data3->ui16height, exit->filter_frame->pts);

                if (!exit->enc_worker->enc_started) {
                    exit->enc_worker->timebase = av_buffersink_get_time_base(exit->buffersink_ctx);
                    av_log(NULL, AV_LOG_DEBUG, "%s av_buffersink_get_time_base %d/%d\n",
                        __func__, exit->enc_worker->timebase.num,
                        exit->enc_worker->timebase.den);
                }

                ret = send_encode_frame(exit->enc_worker, exit->filter_frame);
                if (ret < 0) {
                    av_log(NULL, AV_LOG_ERROR, "%s: send_encode_frame ERROR !!!\n", __func__);
                }
                f->filtered_frames++;
                av_frame_unref(exit->filter_frame);
            }
        }
        if (eof_cnt == f->num_dst_pads) {
            av_log(NULL, AV_LOG_INFO, "%s all %d encoder flushed, filter %d ready to flush\n",
                   __func__, eof_cnt, f->index);
            goto end;
        }
    }
end:
    pthread_mutex_lock(&f->ret_lock);
    f->filter_ret = ret;
    pthread_mutex_unlock(&f->ret_lock);
    for (i = 0; i < f->num_src_pads; i++) {
        entry = f->src_pads[i];
        pthread_mutex_lock(&entry->worker->list_lock);
        pthread_cond_broadcast(&entry->worker->list_cond);
        pthread_mutex_unlock(&entry->worker->list_lock);
    }
finish:
    flush_filter(f);
    for (i = 0; i < f->num_src_pads; i++) {
        entry = f->src_pads[i];
        if (entry->eos_flag) {
            pthread_mutex_lock(&entry->lock);
            pthread_cond_broadcast(&entry->eos_cond);
            pthread_mutex_unlock(&entry->lock);
        }
    }
    ret = finish_filter(f);
    av_log(NULL, AV_LOG_ERROR, "filter %d filter num %d exit: ret=0x%x.\n", f->index, f->filtered_frames, ret);
    return (void *)((long)ret);
}


// ENCODER THREAD FUNCTION
// init the encoding task
static int init_encoder_worker(encoder_worker *enc_worker) {
    int ret;

    ret = pthread_mutex_init(&enc_worker->frame_lock, NULL);
    if (ret) {
        goto enc_fail_init_frame_lock;
    }

    ret = pthread_cond_init(&enc_worker->consume_cond, NULL);
    if (ret) {
        goto enc_fail_init_consume_cond;
    }

    ret = pthread_cond_init(&enc_worker->produce_cond, NULL);
    if (ret) {
        goto enc_fail_init_produce_cond;
    }

    // enc only cache one frame
#if LIBAVCODEC_VERSION_MAJOR >= 61 //7.0
    enc_worker->enc_frame_fifo = av_fifo_alloc2(NI_MAX_ENC_CAPACITY, sizeof(AVFrame*), AV_FIFO_FLAG_AUTO_GROW);
#else
    enc_worker->enc_frame_fifo = av_fifo_alloc_array(NI_MAX_ENC_CAPACITY, sizeof(AVFrame*));
#endif
    if (!enc_worker->enc_frame_fifo) {
        goto enc_fail_init_fifo;
    }

    enc_worker->encoded_frames = enc_worker->encoder_output_frames = 0;
    enc_worker->filter_flush = 0;
    enc_worker->should_exit = THREAD_STATE_RUNNING;
    enc_worker->enc_started = 0;

    ret = create_new_dst_pad(enc_worker->xstack, enc_worker);
    if (ret) {
        goto enc_fail_create_exit;
    }

    return 0;

enc_fail_create_exit:
    if (enc_worker->enc_frame_fifo) {
        free_fifo(enc_worker->enc_frame_fifo);
    }
enc_fail_init_fifo:
    pthread_cond_destroy(&enc_worker->produce_cond);
enc_fail_init_produce_cond:
    pthread_cond_destroy(&enc_worker->consume_cond);
enc_fail_init_consume_cond:
    pthread_mutex_destroy(&enc_worker->frame_lock);
enc_fail_init_frame_lock:
    return -1;
}

static int init_muxer_worker(muxer_worker *mux_worker) {
    int ret;

    ret = pthread_mutex_init(&mux_worker->packet_lock, NULL);
    if (ret) {
        goto mux_fail_init_packet_lock;
    }

    ret = pthread_cond_init(&mux_worker->consume_cond, NULL);
    if (ret) {
        goto mux_fail_init_consume_cond;
    }

    ret = pthread_cond_init(&mux_worker->produce_cond, NULL);
    if (ret) {
        goto mux_fail_init_produce_cond;
    }

#if LIBAVCODEC_VERSION_MAJOR >= 61 //7.0
    mux_worker->mux_packet_fifo = av_fifo_alloc2(NI_MAX_MUX_CAPACITY, sizeof(AVPacket*), AV_FIFO_FLAG_AUTO_GROW);
#else
    mux_worker->mux_packet_fifo = av_fifo_alloc_array(NI_MAX_MUX_CAPACITY, sizeof(AVPacket*));
#endif
    if (!mux_worker->mux_packet_fifo) {
        goto mux_fail_init_fifo;
    }
    mux_worker->should_exit = THREAD_STATE_RUNNING;
    mux_worker->encoder_flush = 0;
    mux_worker->live = 0;
    mux_worker->mux_started = 0;
    mux_worker->mux_pkt_num = 0;

    return 0;

mux_fail_init_fifo:
    pthread_cond_destroy(&mux_worker->produce_cond);
mux_fail_init_produce_cond:
    pthread_cond_destroy(&mux_worker->consume_cond);
mux_fail_init_consume_cond:
    pthread_mutex_destroy(&mux_worker->packet_lock);
mux_fail_init_packet_lock:
    return -1;
}

static void cleanup_encoder_worker(encoder_worker *worker) {
    if (worker) {
        pthread_mutex_destroy(&worker->frame_lock);
        pthread_cond_destroy(&worker->consume_cond);
        pthread_cond_destroy(&worker->produce_cond);
        if (worker->enc_frame_fifo) {
            av_log(NULL, AV_LOG_DEBUG, "encoder frame list size: %d\n",
                   get_fifo_size(worker->enc_frame_fifo));
            free_fifo(worker->enc_frame_fifo);
        }
        if (worker->filtered_frame) {
            av_freep(&worker->filtered_frame);
            worker->filtered_frame = NULL;
        }
    }
}

static void cleanup_muxer_worker(muxer_worker *worker) {
    if (worker) {
        pthread_mutex_destroy(&worker->packet_lock);
        pthread_cond_destroy(&worker->consume_cond);
        pthread_cond_destroy(&worker->produce_cond);
        if (worker->mux_packet_fifo) {
            av_log(NULL, AV_LOG_DEBUG, "muxer packet list size: %d\n",
                    get_fifo_size(worker->mux_packet_fifo));
            free_fifo(worker->mux_packet_fifo);
        }
        if (worker->encoded_packet) {
            av_freep(&worker->encoded_packet);
            worker->encoded_packet = NULL;
        }
    }
}

// init AVFormatContext for output muxer
static int init_output_fmt_ctx(encoder_worker *enc_worker, const char *output_file, AVCodecContext *enc_ctx) {
    int ret = 0;
    AVFormatContext *ofmt_ctx = NULL;
    AVStream *out_stream = NULL;

    if (!strcmp(output_file, "null")) {
        avformat_alloc_output_context2(&ofmt_ctx, NULL, output_file, NULL);
    } else if (strstr(output_file, "rtmp://")) {
        avformat_alloc_output_context2(&ofmt_ctx, NULL, "flv", output_file);
        enc_worker->mux_worker->live = 1;
    } else if (strstr(output_file, "rtsp://")) {
        avformat_alloc_output_context2(&ofmt_ctx, NULL, "rtsp", output_file);
        enc_worker->mux_worker->live = 1;
    } else if (strstr(output_file, "udp://")) {
        avformat_alloc_output_context2(&ofmt_ctx, NULL, "mpegts", output_file);
        enc_worker->mux_worker->live = 1;
    } else {
        /* Note: The file extension string should be in output_file here for
                 avformat_alloc_output_context2() to auto-detect output format
        */
        ret = avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, output_file);
    }

    if (strstr(output_file, "rtsp")) {
        av_opt_set(ofmt_ctx->priv_data, "rtsp_transport", "tcp", 0);
    }

    out_stream = avformat_new_stream(ofmt_ctx, NULL);
    if (!out_stream) {
        av_log(NULL, AV_LOG_ERROR,"Failed allocating output stream\n");
        return -1;
    }
    ret = avcodec_parameters_from_context(out_stream->codecpar, enc_ctx);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR,"Failed to copy codec parameters\n");
        goto fail_init;
    }
    out_stream->time_base = enc_ctx->time_base;

    if (!(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) {
        ret = avio_open(&ofmt_ctx->pb, output_file, AVIO_FLAG_WRITE);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "Could not open output file '%s'", output_file);
            goto fail_init;
        }
    }

    /* init muxer, write output file header */
    ret = avformat_write_header(ofmt_ctx, NULL);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Error occurred when opening output file\n");
        goto fail_init;
    }

    enc_worker->mux_worker->ofmt_ctx = ofmt_ctx;
    enc_worker->mux_worker->timebase = enc_worker->timebase;
    return 0;
fail_init:
    avformat_free_context(ofmt_ctx);
    return ret;
}

// init encoder and muxer
static int open_output_file(encoder_worker *enc_worker, const AVFrame *frame) {
    int ret = 0;
    const char *codec_name = enc_worker->encoder_name;
    const char *output_file = enc_worker->output_name;
    const AVCodec *enc = NULL;
    AVCodecContext *enc_ctx = NULL;

    av_log(NULL, AV_LOG_DEBUG, "%s open encoder %s output file %s index %d_%d\n",
           __func__, codec_name, output_file, enc_worker->filter_index, enc_worker->enc_index);

    // Find video encoder codec selected
    enc = avcodec_find_encoder_by_name(codec_name);
    if (!enc) {
        av_log(NULL, AV_LOG_ERROR,"Codec '%s' not found\n", codec_name);
        return AVERROR_ENCODER_NOT_FOUND;
    }

    // Allocate codec context for encoding
    enc_ctx = avcodec_alloc_context3(enc);
    if (!enc_ctx) {
        av_log(NULL, AV_LOG_ERROR, "Could not allocate video codec context\n");
        return AVERROR(ENOMEM);
    }

    enc_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
    enc_ctx->width = frame->width;
    enc_ctx->height = frame->height;
    enc_ctx->sample_aspect_ratio = frame->sample_aspect_ratio;
    enc_ctx->time_base = enc_worker->timebase;
    enc_ctx->pix_fmt = AV_PIX_FMT_NI_QUAD;
    if (frame->hw_frames_ctx) {
        AVHWFramesContext* pAVHFWCtx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
        enc_ctx->sw_pix_fmt = pAVHFWCtx->sw_format;
    } else {
        av_log(NULL, AV_LOG_ERROR, "encode frame should be hw frame\n");
    }

    if (strstr(output_file, ".mkv")) {
        if (enc_worker->encoder_params[0]) {
            snprintf(enc_worker->encoder_params + strlen(enc_worker->encoder_params),
                     sizeof(enc_worker->encoder_params) - strlen(enc_worker->encoder_params),
                     "%s", ":GenHdrs=1");
        } else {
            snprintf(enc_worker->encoder_params + strlen(enc_worker->encoder_params),
                     sizeof(enc_worker->encoder_params) - strlen(enc_worker->encoder_params),
                     "%s", "GenHdrs=1");
        }
    }

    if ((enc->id == AV_CODEC_ID_H264) || (enc->id == AV_CODEC_ID_H265) || (enc->id == AV_CODEC_ID_AV1)) {
        if(strlen(enc_worker->encoder_params)) {
            av_opt_set(enc_ctx->priv_data, "xcoder-params", enc_worker->encoder_params, 0);
        }
        if(enc_worker->device_id >= 0) {
            char str_devid[4] = {0};
            snprintf(str_devid, sizeof(str_devid), "%d", enc_worker->device_id);
            av_opt_set(enc_ctx->priv_data, "enc", str_devid, 0);
        }
    }
    else {
        av_log(NULL, AV_LOG_ERROR, "codec id %d not supported.\n", enc->id);
        ret = -1;
        return ret;
    }

    // Open encoder
    ret = avcodec_open2(enc_ctx, enc, NULL);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Could not open enc\n");
        return ret;
    }

    ret = init_output_fmt_ctx(enc_worker, output_file, enc_ctx);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR,"fail to open file\n");
        return ret;
    }
    if (enc_worker->mux_worker->ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
        enc_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

    enc_worker->enc_ctx = enc_ctx;

    return ret;
}

// write a frame to encoder and try to get an encoded frame back
static int encoder_write_frame(encoder_worker *enc_worker, AVFrame *filt_frame, int *got_frame) {
    int ret;
    av_log(NULL, AV_LOG_DEBUG, "%s frame %p data %p extended_data %p\n",
           __func__, filt_frame, filt_frame ? filt_frame->data : NULL, filt_frame ? filt_frame->extended_data : NULL);

    *got_frame = 0;

    if (filt_frame && !enc_worker->force_source_keyframe) {
        filt_frame->pict_type = AV_PICTURE_TYPE_NONE;
    }

    ret = avcodec_send_frame(enc_worker->enc_ctx, filt_frame);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "%s avcodec_send_frame fail to send frame %d"
               "\n",  __func__, ret);
        return ret;
    }

    while (1) {
        AVPacket *enc_pkt = av_packet_alloc();
        ret = avcodec_receive_packet(enc_worker->enc_ctx, enc_pkt);
        if (ret >= 0) {
            if (enc_pkt->size && enc_pkt->data) {
                *got_frame = 1;
            }
        } else if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
            av_log(NULL, AV_LOG_DEBUG, "%s: avcodec_receive_packet got %s "
                   " %d\n",  __func__, ret == AVERROR(EAGAIN) ? "AGAIN" : "EOF",
                   ret);
            *got_frame = 0;
            ret = 0;
        } else {
            av_log(NULL, AV_LOG_ERROR, "%s: avcodec_receive_packet fail to "
                   "receive packet %d\n", __func__, ret);
        }

        if (ret < 0 || !(*got_frame)) {
            av_packet_free(&enc_pkt);
            break;
        }

        enc_worker->encoder_output_frames++;

        if (filt_frame) {
            av_log(NULL, AV_LOG_DEBUG, "encoder mux -> pts=%ld,dts=%ld\n",
                   enc_pkt->pts, enc_pkt->dts);
        } else {
            av_log(NULL, AV_LOG_DEBUG, "encoder mux -> flush pts=%ld,dts=%ld\n",
                   enc_pkt->pts, enc_pkt->dts);
        }

        if ((enc_worker->last_encoded_pts != AV_NOPTS_VALUE) &&
            (enc_pkt->pts == enc_worker->last_encoded_pts)) {
            av_log(NULL, AV_LOG_ERROR, "%s same pts!!! pts=%ld,last_pts=%ld\n",
                   __func__, enc_pkt->pts, enc_worker->last_encoded_pts);
        }
        if ((enc_worker->last_encoded_dts != AV_NOPTS_VALUE) &&
            (enc_pkt->dts <= enc_worker->last_encoded_dts)) {
            av_log(NULL, AV_LOG_ERROR, "%s Non-monotonically increasing dts!!! "
                   "dts=%ld,last_dts=%ld\n",
                   __func__, enc_pkt->dts, enc_worker->last_encoded_dts);
        }

        enc_worker->last_encoded_pts = enc_pkt->pts;
        enc_worker->last_encoded_dts = enc_pkt->dts;

        /* send encoded frame */
        ret = send_mux_packet(enc_worker->mux_worker, enc_pkt);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "encoder stream send_mux_packet error\n");
            break;
        }
    }

    return ret;
}

static int flush_encoder(encoder_worker *enc_worker) {
    int ret = 0;
    int got_frame;
    if (!enc_worker->enc_started)
        return 0;
    if (!(enc_worker->enc_ctx->codec->capabilities & AV_CODEC_CAP_DELAY))
        return 0;

    do {
        av_log(NULL, AV_LOG_DEBUG, "Flushing stream encoder\n");
        ret = encoder_write_frame(enc_worker, NULL, &got_frame);
        if (ret < 0)
            break;
        if (!got_frame)
            return 0;
    } while (0);
    return ret;
}

// encoder thread routine
static void *encoder_thread_run(void *thread_data) {
    int ret = 0, got_frame = 0;
    encoder_worker *enc_worker = (encoder_worker *)thread_data;
    while (1) {
        pthread_mutex_lock(&enc_worker->frame_lock);
        while (is_fifo_empty(enc_worker->enc_frame_fifo)) {
            // flush the encoder if filter has flushed
            if (enc_worker->filter_flush) {
                av_log(NULL, AV_LOG_INFO, "%s filter flushed, encoder %d_%d ready to flush\n",
                       __func__, enc_worker->filter_index, enc_worker->enc_index);
                pthread_mutex_unlock(&enc_worker->frame_lock);
                goto flush;
            }

            if (!enc_worker->should_exit) {
                pthread_cond_wait(&enc_worker->consume_cond, &enc_worker->frame_lock);
            } else {
                pthread_mutex_unlock(&enc_worker->frame_lock);
                goto flush;
            }
        }

        // read encode frame from encoder fifo buffer
#if IS_FFMPEG_70_AND_ABOVE
        av_fifo_read(enc_worker->enc_frame_fifo, &enc_worker->filtered_frame, 1);
#else
        av_fifo_generic_read(enc_worker->enc_frame_fifo, &enc_worker->filtered_frame, sizeof(AVFrame*), NULL);
#endif
        pthread_cond_signal(&enc_worker->produce_cond);
        pthread_mutex_unlock(&enc_worker->frame_lock);
        if (enc_worker->filtered_frame->data != enc_worker->filtered_frame->extended_data) {
            av_log(NULL, AV_LOG_DEBUG, "%s frame %p data %p != extended_data %p\n",
                   __func__, enc_worker->filtered_frame, enc_worker->filtered_frame->data,
                   enc_worker->filtered_frame->extended_data);
            enc_worker->filtered_frame->extended_data = enc_worker->filtered_frame->data;
        }

        if (!enc_worker->enc_started) {
            if (open_output_file(enc_worker, enc_worker->filtered_frame)) {
                pthread_mutex_unlock(&enc_worker->frame_lock);
                break;
            }
            enc_worker->last_encoded_pts = AV_NOPTS_VALUE;
            enc_worker->last_encoded_dts = AV_NOPTS_VALUE;
            enc_worker->enc_started = 1;
            enc_worker->mux_worker->mux_started = 1;
        }

        enc_worker->encoded_frames++;
        av_log(NULL, AV_LOG_DEBUG, "encoder encoding total=%lu, output total="
               "%lu, %dx%d,pts=%lu,dts=%lu\n",
               enc_worker->encoded_frames, enc_worker->encoder_output_frames,
               enc_worker->filtered_frame->width, enc_worker->filtered_frame->height,
               enc_worker->filtered_frame->pts, enc_worker->filtered_frame->pkt_dts);

        ret = encoder_write_frame(enc_worker, enc_worker->filtered_frame, &got_frame);

        av_frame_free(&enc_worker->filtered_frame);
        if (ret < 0) {
            goto end;
        }
        av_log(NULL, AV_LOG_DEBUG, "encode one frame finish\n");
    } // while

flush:
    ret = flush_encoder(enc_worker);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "encoder flushing encoder failed\n");
    }
    muxer_worker *mux_worker = enc_worker->mux_worker;
    pthread_mutex_lock(&mux_worker->packet_lock);
    mux_worker->encoder_flush = 1;
    if (is_fifo_empty(mux_worker->mux_packet_fifo)) {
        pthread_cond_signal(&mux_worker->consume_cond);
    }
    pthread_mutex_unlock(&mux_worker->packet_lock);

end:
    if (enc_worker->enc_started) {
        avcodec_free_context(&enc_worker->enc_ctx);
    }
    av_log(NULL, AV_LOG_ERROR, "encoder %d_%d enc num %ld exit ret=0x%x.\n",
           enc_worker->filter_index, enc_worker->enc_index, enc_worker->encoded_frames, ret);
    enc_worker->should_exit = THREAD_STATE_EXIT_ISSUED;

    pthread_mutex_lock(&enc_worker->common->lock);
    enc_worker->common->exit_enc_num++;
    av_log(NULL, AV_LOG_DEBUG, "exit enc num %d\n", enc_worker->common->exit_enc_num);
    pthread_mutex_unlock(&enc_worker->common->lock);

    return (void *)((long)ret);
}

static void *muxer_thread_run(void *thread_data) {
    int ret = 0;
    int64_t mux_last_time = 0;
    int64_t mux_last_dts = AV_NOPTS_VALUE;
    int64_t mux_current_time = 0;
    AVRational us_tb = av_make_q(1, 1000000);
    int64_t usleep_time = 0;
    muxer_worker *mux_worker = (muxer_worker *)thread_data;
    while (1) {
        pthread_mutex_lock(&mux_worker->packet_lock);
        while (is_fifo_empty(mux_worker->mux_packet_fifo)) {
            // flush the encoder if filter has flushed
            if (mux_worker->encoder_flush) {
                av_log(NULL, AV_LOG_INFO, "%s encoder flushed, muxer %d_%d ready to flush\n",
                       __func__, mux_worker->filter_index, mux_worker->enc_index);
                pthread_mutex_unlock(&mux_worker->packet_lock);
                goto flush;
            }
            if (!mux_worker->should_exit) {
                pthread_cond_wait(&mux_worker->consume_cond, &mux_worker->packet_lock);
            } else {
                pthread_mutex_unlock(&mux_worker->packet_lock);
                goto flush;
            }
        }
        if (!mux_worker->mux_started) {
            av_log(NULL, AV_LOG_ERROR, "muxer not init yet, it is unexpected\n");
            break;
        }

        // read encode frame from encoder fifo buffer
#if IS_FFMPEG_70_AND_ABOVE
        av_fifo_read(mux_worker->mux_packet_fifo, &mux_worker->encoded_packet, 1);
#else
        av_fifo_generic_read(mux_worker->mux_packet_fifo, &mux_worker->encoded_packet, sizeof(AVPacket*), NULL);
#endif
        pthread_cond_signal(&mux_worker->produce_cond);
        pthread_mutex_unlock(&mux_worker->packet_lock);

        av_packet_rescale_ts(mux_worker->encoded_packet, mux_worker->timebase,
                             mux_worker->ofmt_ctx->streams[0]->time_base);
        /* prepare packet for muxing */
        av_log(NULL, AV_LOG_DEBUG, "%s encoder stream %d_%d mux <- pts=%ld,dts=%ld timebase %d:%d\n",
               __func__, mux_worker->enc_index, mux_worker->filter_index,
                mux_worker->encoded_packet->pts, mux_worker->encoded_packet->dts,
                mux_worker->ofmt_ctx->streams[0]->time_base.num, mux_worker->ofmt_ctx->streams[0]->time_base.den);

        // need to control mux speed when output is live stream
        if (mux_worker->live) {
            if (mux_last_time) {
                mux_current_time = av_gettime_relative();
                usleep_time += av_rescale_q(mux_worker->encoded_packet->dts - mux_last_dts,
                              mux_worker->ofmt_ctx->streams[0]->time_base, us_tb) -
                              (mux_current_time - mux_last_time);
                mux_last_time = mux_current_time;
                mux_last_dts = mux_worker->encoded_packet->dts;
                // sleep and reset sleep time, otherwise record sleep and add to next sleep time
                if (usleep_time > 0) {
                    usleep(usleep_time);
                    usleep_time = 0;
                }
            } else {
                mux_last_time = av_gettime_relative();
                mux_last_dts = mux_worker->encoded_packet->dts;
            }
        }
        ret = av_interleaved_write_frame(mux_worker->ofmt_ctx, mux_worker->encoded_packet);
        av_packet_free(&mux_worker->encoded_packet);
        mux_worker->mux_pkt_num++;
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "encoder stream interleaved write error\n");
            break;
        }
        av_log(NULL, AV_LOG_DEBUG, "mux one packet finish\n");
    } // while

flush:
    if (mux_worker->mux_started) {
        av_write_trailer(mux_worker->ofmt_ctx);
        if (!(mux_worker->ofmt_ctx->oformat->flags & AVFMT_NOFILE)) {
            avio_closep(&mux_worker->ofmt_ctx->pb);
        }
        if (mux_worker->ofmt_ctx) {
            avformat_free_context(mux_worker->ofmt_ctx);
        }
    }
    av_log(NULL, AV_LOG_ERROR, "muxer %d_%d exit mux num %d ret=0x%x.\n",
           mux_worker->filter_index, mux_worker->enc_index, mux_worker->mux_pkt_num, ret);
    mux_worker->should_exit = THREAD_STATE_EXIT_ISSUED;
    pthread_mutex_lock(&mux_worker->common->lock);
    mux_worker->common->exit_mux_num++;
    av_log(NULL, AV_LOG_DEBUG, "exit mux num %d\n", mux_worker->common->exit_mux_num);
    pthread_mutex_unlock(&mux_worker->common->lock);
    return (void *)((long)ret);
}


// MAIN THREAD FUNCTION
// check the output resolution
static inline int check_resolution(int width, int height) {
    if (width % 2 || height % 2) {
        return -1;
    }
    if (width < MIN_WIDTH || width > MAX_WIDTH || height < MIN_HEIGHT || height > MAX_HEIGHT) {
        return -1;
    }
    return 0;
}

// get resolution from args, format like widthxheight
static int read_resolution(const char *args, int *width, int *height) {
    char *ch = NULL;
    *width = strtoul(args, &ch, 10);
    if (*ch != 'x') {
        av_log(NULL, AV_LOG_ERROR, "invalid resolution format %s\n",
                args);
        return -1;
    }
    *height = strtoul(ch + 1, NULL, 10);
    if (check_resolution(*width, *height) < 0) {
        av_log(NULL, AV_LOG_ERROR, "invalid resolution value %s\n",
                args);
        return -1;
    }
    return 0;
}

// read filter description from file
static int read_filter(const char *filter_desc, char *dest) {
    if (strstr(filter_desc, "inputs=") && strstr(filter_desc, ":layout=")) {
        strcpy(dest, filter_desc);
        return 0;
    } else {
        FILE *file = fopen(filter_desc, "r");
        if (!file) {
            av_log(NULL, AV_LOG_ERROR, "ERROR: %s: Cannot open filter file: %s\n",
                __func__, filter_desc);
            return -1;
        }
        char one_line[1536] = {0};
        if (fgets(one_line, sizeof(one_line), file)) {
            strcpy(dest, one_line);
            return 0;
        } else {
            return -1;
        }
    }
}

// judge the input name of -I is image or not
// only support format: bmp, png, jpg
static int is_image(const char *name) {
    FILE *file;
    unsigned short pos[5];
    file = fopen(name,"r");
    if(!file) {
        av_log(NULL, AV_LOG_ERROR, "ERROR: %s: Cannot open filter file: %s\n",
                __func__, name);
        return -1;
    }

    fread(pos, 8, 1, file);
    fclose(file);

    if(pos[0]==BMP) {
        return 1;
    }
    else if(pos[0]==JPG) {
        return 2;
    }
    else if(PNG[0]==pos[0]&&PNG[1]==pos[1]&&PNG[2]==pos[2]&&PNG[3]==pos[3]) {
        return 3;
    }
    else if(GIF[0]==pos[0]&&GIF[1]==pos[1]&&GIF[2]==pos[2]) {
        return 0;
    }
    return -1;
}

// calculate the number of all encoded frames
static inline unsigned long cur_total_frames() {
    int i, j, total_frame = 0;
    for (i = 0; i < filter_num; i++) {
        for (j = 0; j < xstack_workers[i]->outputs; j++) {
            total_frame += encoder_workers[i][j]->encoder_output_frames;
        }
    }
    return total_frame;
}

static void help_usage(void) {
    printf("Usage: \n"
            "-i | --input                    input video file name.\n"
            "-I | --input_image              input image file name.\n"
            "-d | --decoder                  decoder name.\n"
            "-p | --decoder_params           decoder parameters.\n"
            "-S | --input_res                only for yuv input, yuv resolution, must set fot yuv input\n"
            "-s | --scale_res                for sw input like yuv and picture , scaled resolution, if use default, "
                                             "it will not do scale in most situation\n"
            "-l | --loop                     number of input cycles.\n"
            "-e | --encoder                  encoder name.\n"
            "-x | --encoder_params           encoder parameters.\n"
            "-o | --output                   output file name.\n"
            "-f | --filter                   ni_quadra_xstack filter description.\n"
            "-n | --devid                    device id.\n"
            "-v | --loglevel                 available debug level: warning, info, debug, trace.\n"
            "-h | --help                     print this help information.\n");
}

void setup_loglevel(char *loglevel) {
    if (loglevel) {
        if (!strcmp(loglevel, "error")) {
            av_log_set_level(AV_LOG_ERROR);
        } else if (!strcmp(loglevel, "warning")) {
            av_log_set_level(AV_LOG_WARNING);
        } else if (!strcmp(loglevel, "info")) {
            av_log_set_level(AV_LOG_INFO);
        } else if (!strcmp(loglevel, "debug")) {
            av_log_set_level(AV_LOG_DEBUG);
        } else if (!strcmp(loglevel, "trace")) {
            av_log_set_level(AV_LOG_TRACE);
        } else {
            av_log_set_level(AV_LOG_INFO);
        }
    } else {
        av_log_set_level(AV_LOG_INFO);
    }
}

static void print_report(int is_last_report, int64_t timer_start, int64_t cur_time,
                         unsigned long frame_number) {
    static int64_t last_time = -1;
    float t;
    char buf[1024];
    float fps;

    if (!print_stat)
        return;

    if (!is_last_report) {
        if (last_time == -1) {
            last_time = cur_time;
            return;
        }
        if ((cur_time - last_time) < 500000)
            return;
        last_time = cur_time;
    }

    t = (cur_time - timer_start) / 1000000.0;

    fps = t > 1 ? frame_number / t : 0;
    if (print_stat) {
        const char end = is_last_report ? '\n' : '\r';

        buf[0] = '\0';
        snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "frame=%5lu spend time=%f fps=%3.*f",
                 frame_number, t, (fps < 9.95), fps);
        if (AV_LOG_INFO > av_log_get_level()) {
            fprintf(stderr, "%s   %c", buf, end);
        } else {
            av_log(NULL, AV_LOG_INFO, "%s   %c", buf, end);
        }
        fflush(stderr);
    }
}


int main(int argc, char **argv) {
    int ret, i, j;
    int state = EXIT_SUCCESS;

    int input_num = 0;
    int picture_num = 0;
    int temp_out_num = 0;
    int output_num[NI_MAX_XSTACK_FILTER] = {0};
    input_info in_info[NI_MAX_XSTACK_INPUTS] = {0};
    filter_info f_info[NI_MAX_XSTACK_FILTER] = {0};
    output_info out_info[NI_MAX_XSTACK_FILTER][NI_MAX_XSTACK_OUTPUTS] = {0};

    unsigned int dec_loop = 1;
    char *encoder_name = NULL;
    char *encoder_params = NULL;
    int force_source_keyframe = 0;
    int devid = 0;
    char *loglevel = NULL;

    pthread_attr_t attr;
    void *result;

    char *suffix;
    int opt, opt_index;
    const char *opt_string = "i:I:d:p:S:s:l:o:e:x:f:n:v:hk";
    static struct option long_options[] = {
        {"input",                 required_argument, NULL, 'i'},
        {"input_image",           required_argument, NULL, 'I'},
        {"decoder",               required_argument, NULL, 'd'},
        {"decoder_params",        required_argument, NULL, 'p'},
        {"input_res",             required_argument, NULL, 'S'},
        {"scale_res",             required_argument, NULL, 's'},
        {"loop",                  required_argument, NULL, 'l'},
        {"encoder",               required_argument, NULL, 'e'},
        {"encoder_params",        required_argument, NULL, 'x'},
        {"output",                required_argument, NULL, 'o'},
        {"filter",                required_argument, NULL, 'f'},
        {"devid",                 required_argument, NULL, 'n'},
        {"loglevel",              required_argument, NULL, 'v'},
        {"help",                  no_argument,       NULL, 'h'},
        {"force-keyframe",        no_argument,       NULL, 'k'},
        { NULL,                   0,                 NULL,  0 },
    };

    while ((opt = getopt_long(argc, argv, opt_string, long_options, &opt_index)) != -1) {
        switch (opt) {
            case 'i':
                input_num++;
                if (input_num < 1 || input_num > NI_MAX_XSTACK_INPUTS) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d input files\n",
                           NI_MAX_XSTACK_INPUTS);
                    help_usage();
                    return EXIT_FAILURE;
                }
                strcpy(in_info[input_num - 1].input_name, optarg);
                suffix = strrchr(optarg, '.');
                if (strcasecmp(suffix, ".yuv") == 0) {
                    in_info[input_num - 1].type = SW_VIDEO;
                } else {
                    in_info[input_num - 1].type = HW_VIDEO;
                }
                break;
            case 'I':
                if (is_image(optarg) <= 0 || get_input_type(optarg) != SW_PICTURE) {
                    av_log(NULL, AV_LOG_ERROR, "Not support this kind of picture or gif %s\n",
                           optarg);
                    return EXIT_FAILURE;
                }
                input_num++;
                picture_num++;
                if (input_num < 1 || input_num > NI_MAX_XSTACK_INPUTS) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d input files\n",
                           NI_MAX_XSTACK_INPUTS);
                    help_usage();
                    return EXIT_FAILURE;
                }
                strcpy(in_info[input_num - 1].input_name, optarg);
                in_info[input_num - 1].type = SW_PICTURE;
                break;
            case 'd':
                if (input_num < 1 || input_num > NI_MAX_XSTACK_INPUTS) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d decoders\n",
                           NI_MAX_XSTACK_INPUTS);
                    help_usage();
                    return EXIT_FAILURE;
                }
                strcpy(in_info[input_num - 1].decoder_name, optarg);
                break;
            case 'p':
                if (input_num < 1 || input_num > NI_MAX_XSTACK_INPUTS) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d decoder_params\n",
                           NI_MAX_XSTACK_INPUTS);
                    help_usage();
                    return EXIT_FAILURE;
                }
                strcpy(in_info[input_num - 1].decoder_params, optarg);
                break;
            case 'S':
                if (input_num < 1 || input_num > NI_MAX_XSTACK_INPUTS) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d input files\n",
                           NI_MAX_XSTACK_INPUTS);
                    help_usage();
                    return EXIT_FAILURE;
                }
                if (in_info[input_num - 1].type != SW_VIDEO) {
                    av_log(NULL, AV_LOG_ERROR, "input is not yuv\n");
                    break;
                }
                if (read_resolution(optarg, &in_info[input_num - 1].input_width,
                    &in_info[input_num - 1].input_height) < 0) {
                    return EXIT_FAILURE;
                }
                break;
            case 's':
                if (input_num < 1 || input_num > NI_MAX_XSTACK_INPUTS) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d input files\n",
                           NI_MAX_XSTACK_INPUTS);
                    help_usage();
                    return EXIT_FAILURE;
                }
                if (read_resolution(optarg, &in_info[input_num - 1].scaled_width,
                    &in_info[input_num - 1].scaled_height) < 0) {
                    return EXIT_FAILURE;
                }
                break;
            case 'f':
                filter_num++;
                if (filter_num < 1 || filter_num > NI_MAX_XSTACK_FILTER) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d filter\n",
                           NI_MAX_XSTACK_FILTER);
                    help_usage();
                    return EXIT_FAILURE;
                }
                if (read_filter(optarg, f_info[filter_num - 1].filter_desc) < 0) {
                    av_log(NULL, AV_LOG_ERROR, "fail to read filter description or filter description invalid\n");
                    return EXIT_FAILURE;
                }
                break;
            case 'e':
                encoder_name = optarg;
                break;
            case 'o':
                output_num[filter_num - 1]++;
                temp_out_num = output_num[filter_num - 1] - 1;
                if (output_num[filter_num - 1] > NI_MAX_XSTACK_OUTPUTS) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d output files\n",
                           NI_MAX_XSTACK_OUTPUTS);
                    help_usage();
                    return EXIT_FAILURE;
                }
                if (output_num[filter_num - 1] < 1) {
                    av_log(NULL, AV_LOG_ERROR, "invalid output number %d\n", output_num[filter_num - 1]);
                    help_usage();
                    return EXIT_FAILURE;
                }
                strcpy(out_info[filter_num - 1][temp_out_num].output_name, optarg);
                break;
            case 'x':
                encoder_params = optarg;
                break;
            case 'l':
                dec_loop = strtoul(optarg, NULL, 10);
                break;
            case 'n':
                devid = atoi(optarg);
                break;
            case 'v':
                loglevel = optarg;
                break;
            case 'h':
                help_usage();
                return EXIT_SUCCESS;
            case 'k':
                force_source_keyframe = 1;
                break;
            default:
                av_log(NULL, AV_LOG_ERROR, "can not parse the arg '-%c %s'\n",
                       opt, optarg);
                help_usage();
                return EXIT_FAILURE;
        }
    }

    setup_loglevel(loglevel);

    // check the input parameters
    if (input_num < 2) {
        av_log(NULL, AV_LOG_ERROR,
               "Error number of input files less than 2\n");
        return EXIT_FAILURE;
    }

    if (input_num == picture_num) {
        av_log(NULL, AV_LOG_ERROR,
               "Error, must have a video input.\n");
        return EXIT_FAILURE;
    }

    for (i = 0; i < input_num; i++) {
        // if (!in_info[i].decoder_name[0] && in_info[i].type == HW_VIDEO) {
        //     av_log(NULL, AV_LOG_ERROR,
        //         "Error, didn't get decoder name for input file %d.\n", i);
        //     return EXIT_FAILURE;
        // }
        if (in_info[i].type == SW_VIDEO && (in_info[i].input_width == 0 || in_info[i].input_height == 0)) {
            av_log(NULL, AV_LOG_ERROR,
                "Error, didn't get width or height for yuv input file %d.\n", i);
            return EXIT_FAILURE;
        }
    }

    if (!filter_num) {
        av_log(NULL, AV_LOG_ERROR, "Error missing -f filter description.\n");
        return EXIT_FAILURE;
    }

    for (i = 0; i < filter_num; i++) {
        if (output_num[i] < 1) {
            av_log(NULL, AV_LOG_ERROR,
                   "Error number of output files\n");
            return EXIT_FAILURE;
        }
    }

    if (!encoder_name) {
        av_log(NULL, AV_LOG_ERROR, "Error no encode name specified.\n");
        return EXIT_FAILURE;
    }

    //alloc common
    common *common = alloc_common();
    if (common == NULL) {
        av_log(NULL, AV_LOG_ERROR, "failed to allocate common data.\n");
        state = EXIT_FAILURE;
        goto end;
    }

    //alloc demuxer_workers
    for (i = 0; i < input_num; i++) {
        demuxer_workers[i] = calloc(1, sizeof(demuxer_worker));
        if (demuxer_workers[i] == NULL) {
            av_log(NULL, AV_LOG_ERROR, "Error alloc decoder worker.\n");
            state = EXIT_FAILURE;
            goto end;
        }
        memset(demuxer_workers[i], 0, sizeof(demuxer_worker));
    }

    //alloc decoder_workers
    for (i = 0; i < input_num; i++) {
        decoder_workers[i] = calloc(1, sizeof(decoder_worker));
        if (decoder_workers[i] == NULL) {
            av_log(NULL, AV_LOG_ERROR, "Error alloc decoder worker.\n");
            state = EXIT_FAILURE;
            goto end;
        }
        memset(decoder_workers[i], 0, sizeof(decoder_worker));
    }

    // init filter description
    for (i = 0; i < filter_num; i++) {
        xstack_workers[i] = calloc(1, sizeof(filter_worker));
        if (xstack_workers[i] == NULL) {
            av_log(NULL, AV_LOG_ERROR, "Error alloc xstack worker.\n");
            state = EXIT_FAILURE;
            goto end;
        }
        memset(xstack_workers[i], 0, sizeof(filter_worker));
    }

    //alloc encoder_workers
    for (i = 0; i < filter_num; i++) {
        for (j = 0; j < output_num[i]; j++) {
            encoder_workers[i][j] = calloc(1, sizeof(encoder_worker));
            if (encoder_workers[i][j] == NULL) {
                av_log(NULL, AV_LOG_ERROR, "Error alloc encoder worker.\n");
                state = EXIT_FAILURE;
                goto end;
            }
            memset(encoder_workers[i][j], 0, sizeof(encoder_worker));
        }
    }

    //alloc muxer_workers
    for (i = 0; i < filter_num; i++) {
        for (j = 0; j < output_num[i]; j++) {
            muxer_workers[i][j] = calloc(1, sizeof(muxer_worker));
            if (muxer_workers[i][j] == NULL) {
                av_log(NULL, AV_LOG_ERROR, "Error alloc encoder worker.\n");
                state = EXIT_FAILURE;
                goto end;
            }
            memset(muxer_workers[i][j], 0, sizeof(muxer_worker));
        }
    }

    // init thread attr
    ret = pthread_attr_init(&attr);
    if (ret) {
        av_log(NULL, AV_LOG_ERROR, "fail to initialize attr: %s.\n", strerror(ret));
        state = EXIT_FAILURE;
        goto end;
    }

    ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
    if (ret) {
        av_log(NULL, AV_LOG_ERROR, "fail to set attr detachstate: %s.\n", strerror(ret));
        pthread_attr_destroy(&attr);
        state = EXIT_FAILURE;
        goto end;
    }

    //run filter thread
    for (i = 0; i < filter_num; i++) {
        filter_worker *xstack_worker = xstack_workers[i];
        xstack_worker->index = i;
        xstack_worker->filtered_frames = 0;
        xstack_worker->common = common;
        strcpy(xstack_worker->filter_desc, f_info[i].filter_desc);

        // init_xstack
        if (init_xstack(xstack_worker, input_num, output_num[i])) {
            av_log(NULL, AV_LOG_ERROR, "init_xstack failed !\n");
            state = EXIT_FAILURE;
            goto end;
        }

        ret = pthread_create(&xstack_worker->tid, &attr, &filter_thread_run, xstack_worker);
        if (ret) {
            av_log(NULL, AV_LOG_ERROR, "failed to create xstack thread %d: %s.\n", i, strerror(ret));
            pthread_attr_destroy(&attr);
            cleanup_filter_worker(xstack_worker);
            state = EXIT_FAILURE;
            goto end;
        }
    }

    //run encode thread
    for (i = 0; i < filter_num; i++) {
        for (j = 0; j < output_num[i]; j++) {
            encoder_worker *enc_worker = encoder_workers[i][j];
            muxer_worker *mux_worker = muxer_workers[i][j];
            enc_worker->xstack = xstack_workers[i];
            enc_worker->filter_index = i;
            enc_worker->enc_index = j;

            strcpy(enc_worker->output_name, out_info[i][j].output_name);
            strcpy(enc_worker->encoder_name, encoder_name);
            if (encoder_params) {
                strcpy(enc_worker->encoder_params, encoder_params);
            }
            enc_worker->device_id = devid;
            enc_worker->force_source_keyframe = force_source_keyframe;
            enc_worker->common = common;
            enc_worker->mux_worker = mux_worker;
            if (init_encoder_worker(enc_worker)) {
                av_log(NULL, AV_LOG_ERROR, "init_encoder_worker failed !\n");
                state = EXIT_FAILURE;
                goto end;
            }
            mux_worker->filter_index = i;
            mux_worker->enc_index = j;
            mux_worker->common = common;
            mux_worker->enc_worker = enc_worker;
            if (init_muxer_worker(mux_worker)) {
                av_log(NULL, AV_LOG_ERROR, "init_muxer_worker failed !\n");
                state = EXIT_FAILURE;
                goto end;
            }

            ret = pthread_create(&enc_worker->tid, &attr, &encoder_thread_run, enc_worker);
            if (ret) {
                av_log(NULL, AV_LOG_ERROR, "failed to create codec thread %d: %s.\n", i, strerror(ret));
                pthread_attr_destroy(&attr);
                cleanup_encoder_worker(enc_worker);
                state = EXIT_FAILURE;
                goto end;
            }
            active_encoder_workers++;

            ret = pthread_create(&mux_worker->tid, &attr, &muxer_thread_run, mux_worker);
            if (ret) {
                av_log(NULL, AV_LOG_ERROR, "failed to create codec thread %d: %s.\n", i, strerror(ret));
                pthread_attr_destroy(&attr);
                cleanup_muxer_worker(mux_worker);
                state = EXIT_FAILURE;
                goto end;
            }
            active_muxer_workers++;
        }
    }

    //run decode thread
    for (i = 0; i < input_num; i++) {
        decoder_worker *dec_worker = decoder_workers[i];
        demuxer_worker *demux_worker = demuxer_workers[i];
        dec_worker->xstack = xstack_workers;

        dec_worker->index = i;
        strcpy(dec_worker->input_file, in_info[i].input_name);
        if (in_info[i].type == SW_VIDEO) {
            dec_worker->type = SW_VIDEO;
            if (check_resolution(in_info[i].input_width, in_info[i].input_height) < 0) {
                av_log(NULL, AV_LOG_ERROR, "invalid input resolution\n");
                goto end;
            }
            dec_worker->in.sw.input_width = in_info[i].input_width;
            dec_worker->in.sw.input_height = in_info[i].input_height;
            dec_worker->in.sw.width = in_info[i].scaled_width;
            dec_worker->in.sw.height = in_info[i].scaled_height;
            dec_worker->in.sw.sw_loop = dec_loop;
        } else if (in_info[i].type == SW_PICTURE) {
            dec_worker->type = SW_PICTURE;
            dec_worker->in.sw.width = in_info[i].scaled_width;
            dec_worker->in.sw.height = in_info[i].scaled_height;
            dec_worker->in.sw.sw_loop = 0;
        } else {
            dec_worker->type = HW_VIDEO;
            if (in_info[i].decoder_name[0]) {
                strcpy(dec_worker->in.hw.decoder_name, in_info[i].decoder_name);
            }
            if (in_info[i].decoder_params[0]) {
                strcpy(dec_worker->in.hw.decoder_params, in_info[i].decoder_params);
            }
            dec_worker->demux_worker = demux_worker;
        }
        dec_worker->common = common;
        dec_worker->devid  = devid;

        dec_worker->decoded_frame = av_frame_alloc();
        if (!dec_worker->decoded_frame) {
            av_log(NULL, AV_LOG_ERROR, "failed to allocate decoded frame for codec worker %d\n", i);
            goto end;
        }

        ret = init_decoder_worker(dec_worker);
        if (ret) {
            av_log(NULL, AV_LOG_ERROR, "failed to init_decoder_worker %d.\n", i);
            pthread_attr_destroy(&attr);
            state = EXIT_FAILURE;
            goto end;
        }

        if (dec_worker->type != HW_VIDEO) {
            FREE_AND_NULLIFY(demuxer_workers[i]);
        } else {
            demux_worker->index = i;
            demux_worker->common = common;
            demux_worker->dec_worker = dec_worker;

            ret = init_demuxer_worker(demux_worker);
            if (ret) {
                av_log(NULL, AV_LOG_ERROR, "failed to init_decoder_worker %d.\n", i);
                pthread_attr_destroy(&attr);
                state = EXIT_FAILURE;
                goto end;
            }
        }

        ret = pthread_create(&dec_worker->tid, &attr, &decoder_thread_run, dec_worker);
        if (ret) {
            av_log(NULL, AV_LOG_ERROR, "failed to create codec thread %d: %s.\n", i, strerror(ret));
            pthread_attr_destroy(&attr);
            cleanup_decoder_worker(dec_worker);
            state = EXIT_FAILURE;
            goto end;
        }
        active_decoder_workers++;

        if (decoder_workers[i]->type == HW_VIDEO) {
            ret = pthread_create(&demux_worker->tid, &attr, &demuxer_thread_run, demux_worker);
            if (ret) {
                av_log(NULL, AV_LOG_ERROR, "failed to create codec thread %d: %s.\n", i, strerror(ret));
                pthread_attr_destroy(&attr);
                cleanup_demuxer_worker(demux_worker);
                state = EXIT_FAILURE;
                goto end;
            }
            active_demuxer_workers++;
        }
    }

    ret = pthread_attr_destroy(&attr);
    if (ret) {
        av_log(NULL, AV_LOG_ERROR, "failed to destroy attr: %s.\n", strerror(ret));
        state = EXIT_FAILURE;
        goto end;
    }

    // start calculate and print fps
    int print = 0;
    int64_t timer_start = av_gettime_relative();
    while (global_stop == 0) {
        if (common->exit_dec_num == active_decoder_workers &&
            common->exit_enc_num == active_encoder_workers &&
            common->exit_mux_num == active_muxer_workers &&
            common->exit_demux_num == active_demuxer_workers) {
            global_stop = 1;
            break;
        }
        usleep(100000);
        print++;
        if (print == 10) {
            print = 0;
            print_report(0, timer_start, av_gettime_relative(),
                        cur_total_frames());
        }
    }
    print_report(1, timer_start, av_gettime_relative(),
                 cur_total_frames());

    av_log(NULL, AV_LOG_INFO, "main thread is going to exit.\n");
end:
    // free active_demuxer_workers
    for (i = 0; i < input_num; i++) {
        if (!demuxer_workers[i]) {
            continue;
        }
        demuxer_worker *demux_worker = demuxer_workers[i];

        av_log(NULL, AV_LOG_DEBUG, "demuxer thread %d ready to exit.\n",
               demux_worker->index);

        pthread_mutex_lock(&demux_worker->packet_lock);
        demux_worker->should_exit = THREAD_STATE_EXIT_ISSUED;
        pthread_mutex_unlock(&demux_worker->packet_lock);
        if (pthread_join(demux_worker->tid, &result) == 0) {
            if ((long)result != 0) {
                av_log(NULL, AV_LOG_INFO, "pthread_join dec_worker %d ret %ld"
                       "\n", demux_worker->index, (long)result);
                state = EXIT_FAILURE;
            }
        }
        cleanup_demuxer_worker(demux_worker);
        FREE_AND_NULLIFY(demuxer_workers[i]);
    }

    // free active_decoder_workers
    for (i = 0; i < input_num; i++) {
        decoder_worker *dec_worker = decoder_workers[i];

        av_log(NULL, AV_LOG_DEBUG, "decoder thread %d ready to exit.\n",
               dec_worker->index);

        pthread_mutex_lock(&dec_worker->frame_lock);
        dec_worker->should_exit = THREAD_STATE_EXIT_ISSUED;
        pthread_mutex_unlock(&dec_worker->frame_lock);
        if (pthread_join(dec_worker->tid, &result) == 0) {
            if ((long)result != 0) {
                av_log(NULL, AV_LOG_INFO, "pthread_join dec_worker %d ret %ld"
                       "\n", dec_worker->index, (long)result);
                state = EXIT_FAILURE;
            }
        }
        cleanup_decoder_worker(dec_worker);
        FREE_AND_NULLIFY(decoder_workers[i]);
    }

    // free active_encoder_workers
    for (i = 0; i < filter_num; i++) {
        for (j = 0; j < output_num[i]; j++) {
            encoder_worker *enc_worker = encoder_workers[i][j];

            av_log(NULL, AV_LOG_DEBUG, "encoder thread %d_%d ready to exit.\n",
                   enc_worker->filter_index, enc_worker->enc_index);

            pthread_mutex_lock(&enc_worker->frame_lock);
            enc_worker->should_exit = THREAD_STATE_EXIT_ISSUED;
            pthread_mutex_unlock(&enc_worker->frame_lock);
            if (pthread_join(enc_worker->tid, &result) == 0) {
                if ((long)result != 0) {
                    av_log(NULL, AV_LOG_INFO, "pthread_join encoder worker ret %ld\n",
                        (long)result);
                    state = EXIT_FAILURE;
                }
            }
            cleanup_encoder_worker(enc_worker);
            FREE_AND_NULLIFY(encoder_workers[i][j]);
        }
    }

    // free active_muxer_workers
    for (i = 0; i < filter_num; i++) {
        for (j = 0; j < output_num[i]; j++) {
            muxer_worker *mux_worker = muxer_workers[i][j];

            av_log(NULL, AV_LOG_DEBUG, "muxer thread %d_%d ready to exit.\n",
                   mux_worker->filter_index, mux_worker->enc_index);

            pthread_mutex_lock(&mux_worker->packet_lock);
            mux_worker->should_exit = THREAD_STATE_EXIT_ISSUED;
            pthread_mutex_unlock(&mux_worker->packet_lock);
            if (pthread_join(mux_worker->tid, &result) == 0) {
                if ((long)result != 0) {
                    av_log(NULL, AV_LOG_INFO, "pthread_join encoder worker ret %ld\n",
                        (long)result);
                    state = EXIT_FAILURE;
                }
            }
            cleanup_muxer_worker(mux_worker);
            FREE_AND_NULLIFY(muxer_workers[i][j]);
        }
    }

    // free xstack entry and exit
    ni_xstack_entry_t *entry;
    ni_xstack_exit_t *exit;
    for (i = 0; i < filter_num; i++) {
        filter_worker *xstack_worker = xstack_workers[i];
        av_log(NULL, AV_LOG_DEBUG, "filter thread %d ready to exit.\n", xstack_worker->index);

        if (pthread_join(xstack_worker->tid, &result) == 0) {
            if ((long)result != 0) {
                av_log(NULL, AV_LOG_INFO, "pthread_join encoder worker ret %ld\n",
                    (long)result);
                state = EXIT_FAILURE;
            }
        }

        for (j = 0; j < xstack_worker->num_src_pads; j++) {
            entry = xstack_worker->src_pads[j];
            if (entry->dec_frame_fifo) {
                av_log(NULL, AV_LOG_DEBUG, "dec_frame_fifo list size %d\n",
                       get_fifo_size(entry->dec_frame_fifo));
                free_fifo(entry->dec_frame_fifo);
            }
            if (entry->first_frame) {
                av_frame_free(&entry->first_frame);
                entry->first_frame = NULL;
            }
            if (entry->last_frame) {
                av_frame_free(&entry->last_frame);
                entry->last_frame = NULL;
            }
            pthread_mutex_destroy(&entry->lock);
            pthread_cond_destroy(&entry->frame_cond);
            pthread_cond_destroy(&entry->eos_cond);
            FREE_AND_NULLIFY(xstack_worker->src_pads[j]);
        }

        for (j = 0; j < xstack_worker->outputs; j++) {
            exit = xstack_worker->dst_pads[j];
            av_frame_free(&exit->filter_frame);
            FREE_AND_NULLIFY(xstack_worker->dst_pads[j]);
        }
        // free filter
        cleanup_filter_worker(xstack_worker);
        FREE_AND_NULLIFY(xstack_workers[i]);
    }
    free_common(common);
    FREE_AND_NULLIFY(common);

    av_log(NULL, AV_LOG_INFO, "EXIT.. state=0x%x.\n", state);
    return state ? 1 : 0;
}
