/*
 * Copyright (c) 2010 Nicolas George
 * Copyright (c) 2011 Stefano Sabatini
 * Copyright (c) 2014 Andrey Utkin
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/**
 * @file
 * API example for decoding, ni_xstack filtering and encoding pipeline
 * @example ni_xstack.c
 *
 * @added by zhong.wang@netint.ca
 * use multiple threads to run filtering/transcoding.
 */

#define _GNU_SOURCE
#include <stdlib.h>
#include <unistd.h>
#include <getopt.h>
#include <pthread.h>
#include <signal.h>
#include <assert.h>
#include <string.h>

#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#include <libavutil/pixdesc.h>
#include <libavutil/time.h>
#include <libavutil/avassert.h>
#include <libavutil/channel_layout.h>
#include <libavutil/fifo.h>
#include <libswscale/swscale.h>
#include <ni_device_api.h>

#define NI_MAX_XSTACK_INPUTS  50
#define NI_MAX_XSTACK_FILTER  33
#define NI_MAX_XSTACK_OUTPUTS 4
#define NI_MAX_DEC_CAPACITY   3

#define MAX_WIDTH             7680
#define MAX_HEIGHT            4800
#define MIN_WIDTH             128
#define MIN_HEIGHT            96

#define DEFAULT_YUV_PTS_STEP  48000

#define NI_XSTACK_RECONFIG_FILE_NAME "reconf.xstack"
#define skip_blank(cur) while (*cur && isblank(*cur)) { cur++; }

#define IS_FFMPEG_61_AND_ABOVE                                                \
    ((LIBAVFILTER_VERSION_MAJOR > 9) ||                                        \
     (LIBAVFILTER_VERSION_MAJOR == 9 && LIBAVFILTER_VERSION_MINOR >= 12))

#define IS_FFMPEG_70_AND_ABOVE                                                \
    ((LIBAVFILTER_VERSION_MAJOR > 10) ||                                        \
     (LIBAVFILTER_VERSION_MAJOR == 10 && LIBAVFILTER_VERSION_MINOR >= 1))

typedef struct decoder_worker decoder_worker;
typedef struct encoder_worker encoder_worker;
typedef struct filter_worker filter_worker;

// common struct
typedef struct common
{
    pthread_mutex_t lock;
    pthread_cond_t ready_cond;
    int ready_num;
    int total_dec_threads;

    int exit_dec_num;
    int exit_enc_num;
} common;

typedef enum
{
    HW_VIDEO,
    SW_VIDEO,
    SW_PICTURE,
} input_type;

// Program input parameters for decoders filters encoders
typedef struct input_info
{
    input_type type;

    char input_name[256];
    char decoder_name[32];
    bool got_decoder;
    char decoder_params[256];
    bool got_params;

    int input_width;
    int input_height;
    int scaled_width;
    int scaled_height;
} input_info;

typedef struct filter_info
{
    char filter_desc[1536];
    char drawtext_filter_desc[256];
    bool get_drawtext;
    char pad_filter_desc[32];
    bool need_pad;
} filter_info;

typedef struct output_info
{
    char output_name[256];
    int width;
    int height;
    bool specific_res;
} output_info;

// only for reconfig filter, and not support for yuv and image input
// entry for add/remove participant in the reconfig file
// reconfig file format:
// ‘+ <source video file path> <decoder codec name>’, for example:
//  + Dinner-1080p30-500s-NI-2.h264 h264_ni_quadra_dec
// ‘- <source video file path>’, for example:
//  - Dinner-1080p30-500s-NI-2.h264
// ‘f <ni_xstack layout specification>’, for example:
//  f inputs=3:layout=0_0|w0_0|0_h0:size=540_360|540_360|540_360:fill=BLACK:shortest=1

typedef struct _ni_src_desc
{
    char file_name[256];
    char decoder_name[32];
    char decoder_params[256];
} ni_src_desc_t;

typedef enum
{
    NO_RECONFIG,
    DO_RECONFIG,
    WAIT_RECONFIG,
    DONE_RECONFIG,
} reconfig_status;

typedef enum thread_state_t
{
    THREAD_STATE_RUNNING = 0,
    THREAD_STATE_EXIT_ISSUED,
    THREAD_STATE_EXIT_PROCESSED,
    INVALID_THREAD_STATE,
} thread_state_t;

typedef enum
{
    FILT_DEC_FRAME_VALID = 0, ///< valid frame, send to filter source
    FILT_DEC_FRAME_SKIP,      ///< skip sending frame
    FILT_DEC_FRAME_EXIT       ///< exit filter
} filt_dec_frame_t;

typedef struct filter_common
{
    pthread_mutex_t lock;
    pthread_cond_t start_cond;
    pthread_cond_t finish_cond;
    pthread_cond_t reconfig_cond;

    reconfig_status reconfig;
    char *reconfig_file;
    int ready_num;
    /// @brief number of filters that have exited
    int flush_num;
} filter_common;

// filter thread parameters
// one xstack entry contains a buffersrc that is connected to a decoder
typedef struct _ni_xstack_entry
{
    pthread_mutex_t lock;
    pthread_cond_t  frame_cond;
    pthread_cond_t  eos_cond;

    char name[64];
    int eos_flag;
#if IS_FFMPEG_70_AND_ABOVE
    AVFifo *dec_frame_fifo;
#else
    AVFifoBuffer *dec_frame_fifo;
#endif

    AVFrame *first_frame;
    AVFrame *buffered_frame;
    AVFrame *last_frame;
    AVFilterContext *buffersrc_ctx;
    AVBufferSrcParameters *buffersrc_par;

    decoder_worker *worker;
    filter_worker *xstack;
    enum AVPixelFormat pixfmt, hw_pixfmt;
    int width, height;
    AVRational fps; // source framerate
    AVRational par; // source pixel aspect ratio
    AVRational time_base; // source time base
    int64_t last_pts; //for sw last frame pts
} ni_xstack_entry_t;

// one xstack exit contains a buffersink that is connected to a encoder
typedef struct _ni_xstack_exit
{
    AVFilterContext *buffersink_ctx;
    AVFrame *filter_frame;
    encoder_worker *enc_worker;
} ni_xstack_exit_t;

// the Quadra xstack filter graph
typedef struct filter_worker
{
    pthread_t tid;
    pthread_mutex_t filter_lock;
    pthread_mutex_t ret_lock;
    pthread_cond_t  init_cond;

    int index;
    int init;
    // flag indicating it is time to read reconfig and apply changes
    int need_reconfig;
    int inputs;
    int outputs;
    int shortest;

    ni_xstack_entry_t *src_pads[NI_MAX_XSTACK_INPUTS];
    int num_src_pads;
    ni_xstack_exit_t *dst_pads[NI_MAX_XSTACK_OUTPUTS];
    int num_dst_pads;

    char filter_desc[2048]; // full text of filter parameter description
    char desc_preix[512];   // prefix for filter parameter description
    char desc_suffix[1024]; // suffix for filter parameter description

    unsigned int filters_sequence[NI_MAX_XSTACK_OUTPUTS]; // filter sequence Correspond to output sequence
    AVFilterGraph *filter_graph;

    int input_eos_num; // how many end-of-stream of inputs got so far
    int64_t latest_stream_start; // latest stream start time, used for setting Image pts
    int64_t last_filter_pts;
    int filter_ret;

    unsigned int filtered_frames;

    filter_common *filter_common;
    ///flag to indicate if filter is in flush state
    bool flushed;
} filter_worker;

filter_worker *xstack_workers[NI_MAX_XSTACK_FILTER] = {0};
int filter_num = 0;


// decoder thread parameters
// input stream context
typedef struct InputStream {
    AVCodecContext *dec_ctx;
    int64_t last_decoded_pts;
    int64_t last_decoded_dts;
} InputStream;

// decoding task description
typedef struct decoder_worker {
    pthread_t tid;
    pthread_mutex_t frame_lock;
    pthread_mutex_t list_lock;
    pthread_cond_t  list_cond;

    int index;
    input_type type;
    // only for hw input
    char input_file[256];
    char decoder_name[32];
    char decoder_params[256];
    AVFormatContext *ifmt_ctx;
    InputStream *input_stream;
    int stream_index;
    int picture_num;

    //only for sw input
    FILE *input_fp;
    int frame_cnt;
    struct SwsContext* sws_ctx;
    AVBufferRef* hwdevice_upload;
    AVBufferRef* hwctx_upload;
    int input_width;
    int input_height;
    int width;
    int height;
    bool need_scale;
    enum AVPixelFormat pix_fmt;
    int pts_step;
    int video_index;

    unsigned int decoded_frames;

    thread_state_t should_exit;
    // have to stop now, e.g. participant removal
    int force_exit;
    // whether input file has already opened at decode thread start; used for
    // newly added decoder since input open has to be done earlier
    int input_file_already_opened;
    // timestamp (pts) of last frame of this stream; this is usually set to
    // the last frame pts of existing streams when adding this as a new decoder
    // stream, and is used for the new stream starting pts to be in sync with
    // other streams in progress
    int64_t last_decoded_pts;

    int encode_exited;

    filter_worker **xstack;
    ni_xstack_entry_t *stack_entry[NI_MAX_XSTACK_FILTER];
    AVFrame *decoded_frame;
    AVFrame *buffered_frame;
#if IS_FFMPEG_70_AND_ABOVE
    AVFifo *wait_to_free_list;
#else
    AVFifoBuffer *wait_to_free_list;
#endif

    int devid;
    unsigned int loop;
    int nb_streams;

    common *common;
    filter_common *filter_common;
} decoder_worker;

decoder_worker *decoder_workers[NI_MAX_XSTACK_INPUTS] = {0};
int active_decoder_workers = 0;


// encoder thread parameters
// output stream context
typedef struct OutputStream {
    AVCodecContext *enc_ctx;
    int64_t last_encoded_pts;
    int64_t last_encoded_dts;
} OutputStream;

// encoding task description
typedef struct encoder_worker {
    pthread_t tid;
    pthread_mutex_t frame_lock;
    pthread_cond_t consume_cond;
    pthread_cond_t produce_cond;

    int index;
    const char *encoder_name;
    char *output_name;
    int width;
    int height;
    const char *encoder_params;

    AVFormatContext *ofmt_ctx;
    OutputStream *output_stream;

    int device_id;
    AVRational timebase;
    int force_source_keyframe;

    int started;
    thread_state_t should_exit;
    int filter_flush;

    int nb_decoders;

#if IS_FFMPEG_70_AND_ABOVE
    AVFifo *enc_frame_fifo;
#else
    AVFifoBuffer *enc_frame_fifo;
#endif
    AVFrame *buffered_frame;
    AVFrame *filtered_frame;

    unsigned long encoded_frames;
    unsigned long encoder_output_frames;

    common *common;
} encoder_worker;

encoder_worker *encoder_workers[NI_MAX_XSTACK_FILTER][NI_MAX_XSTACK_OUTPUTS] = {0};
int active_encoder_workers = 0;


// main thread status
int global_stop = 0;
int print_stat = 1;
int reconfig_wait_count = 0;

// COMMON FUNCTION
// init common
static common *alloc_common(void)
{
    common *common;
    int ret;

    common = malloc(sizeof(struct common));
    if (common == NULL) {
        av_log(NULL, AV_LOG_ERROR, "%s failed.\n", __func__);
        return NULL;
    }

    memset(common, 0, sizeof(struct common));

    ret = pthread_mutex_init(&common->lock, NULL);
    if (ret) {
        goto fail_init_lock;
    }

    ret = pthread_cond_init(&common->ready_cond, NULL);
    if (ret) {
        goto fail_init_ready_cond;
    }

    return common;

fail_init_ready_cond:
    pthread_mutex_destroy(&common->lock);
fail_init_lock:
    free(common);
    return NULL;
}

static void free_common(common *common)
{
    if (common) {
        pthread_mutex_destroy(&common->lock);
        pthread_cond_destroy(&common->ready_cond);
        free(common);
    }
}

// init filter common
static filter_common *alloc_filter_common(void)
{
    filter_common *filter_common;
    int ret;

    filter_common = malloc(sizeof(struct filter_common));
    if (filter_common == NULL) {
        av_log(NULL, AV_LOG_ERROR, "%s failed.\n", __func__);
        return NULL;
    }

    memset(filter_common, 0, sizeof(struct filter_common));

    ret = pthread_mutex_init(&filter_common->lock, NULL);
    if (ret) {
        goto fail_init_lock;
    }
    ret = pthread_cond_init(&filter_common->start_cond, NULL);
    if (ret) {
        goto fail_init_start_cond;
    }

    ret = pthread_cond_init(&filter_common->finish_cond, NULL);
    if (ret) {
        goto fail_init_finish_cond;
    }

    ret = pthread_cond_init(&filter_common->reconfig_cond, NULL);
    if (ret) {
        goto fail_init_reconfig_cond;
    }

    filter_common->ready_num = 0;
    filter_common->reconfig = NO_RECONFIG;
    filter_common->reconfig_file = NI_XSTACK_RECONFIG_FILE_NAME;
    return filter_common;

fail_init_reconfig_cond:
    pthread_cond_destroy(&filter_common->finish_cond);
fail_init_finish_cond:
    pthread_cond_destroy(&filter_common->start_cond);
fail_init_start_cond:
    pthread_mutex_destroy(&filter_common->lock);
fail_init_lock:
    free(filter_common);
    return NULL;
}

static void free_filter_common(filter_common *filter_common)
{
    if (filter_common) {
        pthread_mutex_destroy(&filter_common->lock);
        pthread_cond_destroy(&filter_common->start_cond);
        pthread_cond_destroy(&filter_common->finish_cond);
        free(filter_common);
    }
}

// fifo operations (for fifo storing filtered frames to be encoded)
#if IS_FFMPEG_70_AND_ABOVE
static inline int is_fifo_empty(AVFifo *fifo)
{
    return av_fifo_can_read(fifo) ? 0 : 1;
}
#else
static inline int is_fifo_empty(AVFifoBuffer *fifo)
{
    return av_fifo_size(fifo) < sizeof(AVFrame*);
}
#endif

#if IS_FFMPEG_70_AND_ABOVE
static inline int is_fifo_full(AVFifo *fifo)
{
    return av_fifo_can_write(fifo) ? 0 : 1;
}
#else
static inline int is_fifo_full(AVFifoBuffer *fifo)
{
    return av_fifo_space(fifo) < sizeof(AVFrame*);
}
#endif

#if IS_FFMPEG_70_AND_ABOVE
static inline int get_fifo_size(AVFifo *fifo) {
    return av_fifo_can_read(fifo);
}
#else
static inline int get_fifo_size(AVFifoBuffer *fifo) {
    return av_fifo_size(fifo) / sizeof(AVFrame*);
}
#endif

#if IS_FFMPEG_70_AND_ABOVE
static void free_fifo(AVFifo *fifo) {
    av_fifo_freep2(&fifo);
}
#else
static void free_fifo(AVFifoBuffer *fifo) {
    return av_fifo_free(fifo);
}
#endif

static inline int frame_get_ref_count(AVFrame *frame) {
    if (!frame || !frame->buf || !frame->buf[0]) {
        return 0;
    } else {
        return av_buffer_get_ref_count(frame->buf[0]);
    }
}

#if IS_FFMPEG_70_AND_ABOVE
void drain_fifo(AVFifo *fifo, const char *caller_hint1,
#else
void drain_fifo(AVFifoBuffer *fifo, const char *caller_hint1,
#endif
                const char * caller_hint2)
{
    AVFrame *frame;
    av_log(NULL, AV_LOG_DEBUG, "%s %s %s size: %d\n", __func__,
           caller_hint1 ? caller_hint1 : "?",
           caller_hint2 ? caller_hint2 : "?", get_fifo_size(fifo));
    while (!is_fifo_empty(fifo))
    {
#if IS_FFMPEG_70_AND_ABOVE
        av_fifo_read(fifo, &frame, 1);
#else
        av_fifo_generic_read(fifo, &frame, sizeof(AVFrame *), NULL);
#endif
        av_log(NULL, AV_LOG_DEBUG, "force free ui16FrameIdx = [%d] ref_count = %d\n",
               ((niFrameSurface1_t *)(frame->buf[0]->data))->ui16FrameIdx,
               frame_get_ref_count(frame));
        av_frame_free(&frame);
    }
}

#if IS_FFMPEG_70_AND_ABOVE
void fifo_peek_print(AVFifo *fifo, const char* name, int worker_index, const char* hint, int fifo_index)
#else
void fifo_peek_print(AVFifoBuffer *fifo, const char* name, int worker_index, const char* hint, int fifo_index)
#endif
{
    AVFrame *tmp_frame;
    int ref_count;
    niFrameSurface1_t *p_data3; // going through the fifo to check
#if IS_FFMPEG_70_AND_ABOVE
    av_fifo_peek(fifo, &tmp_frame, 1, fifo_index );
#else
    av_fifo_generic_peek_at(fifo, &tmp_frame, fifo_index * sizeof(AVFrame *), sizeof(AVFrame *), NULL);
#endif

    if (tmp_frame->buf[0])
    {
        p_data3 = (niFrameSurface1_t *)(tmp_frame->buf[0]->data);
        ref_count = av_buffer_get_ref_count(tmp_frame->buf[0]);
        av_log(NULL, AV_LOG_DEBUG, "== ref_cnt = %d, frame %p ui16FrameIdx = [%d] DevHandle %d ==\n",
               ref_count, tmp_frame, p_data3->ui16FrameIdx, p_data3->device_handle);
    }
    else
    {
        p_data3 = NULL;
        ref_count = -1;
        av_log(NULL, AV_LOG_ERROR, "== ref_count is -1 ==\n");
    }
}

#if IS_FFMPEG_70_AND_ABOVE
void fifo_print(AVFifo *fifo, const char *name, int worker_index, const char *hint)
#else
void fifo_print(AVFifoBuffer *fifo, const char *name, int worker_index, const char *hint)
#endif
{
    //print each item in the fifo
    int fifo_size = get_fifo_size(fifo);
    int i = 0;
    av_log(NULL, AV_LOG_DEBUG, "==== %s %s name %s, fifo size %d ====\n", __func__, hint ? hint : "-", name, fifo_size);
    for (i = 0; i < fifo_size; i++)
    {
        fifo_peek_print(fifo, name, worker_index, hint, i);
    }
    av_log(NULL, AV_LOG_DEBUG, "====*******====\n");
}

// judge whether still frame exist in all filter entry
static int entry_empty(filter_worker *f) {
    int ret = true ;
    for (int i = 0; i < f->num_src_pads; i++) {
        ret &= is_fifo_empty(f->src_pads[i]->dec_frame_fifo);
    }
    return ret;
}

// create an ni_xstack entry for each opened input video stream
static int create_new_src_pad(filter_worker *f,
                              decoder_worker *dec_worker,
                              int width, int height,
                              enum AVPixelFormat pix_fmt, AVRational fps,
                              AVRational sar, AVRational tb)
{
    ni_xstack_entry_t *e;
    int i, slot;

    pthread_mutex_lock(&f->filter_lock);
    e = malloc(sizeof(ni_xstack_entry_t));
    if (e) {
        pthread_mutex_init(&e->lock, NULL);
        pthread_cond_init(&e->frame_cond, NULL);
        pthread_cond_init(&e->eos_cond, NULL);
        snprintf(e->name, sizeof(e->name), "src_%d_%d", f->index, dec_worker->index);

        e->eos_flag = 0;
#if IS_FFMPEG_70_AND_ABOVE
        e->dec_frame_fifo = av_fifo_alloc2(NI_MAX_DEC_CAPACITY, sizeof(AVFrame*), AV_FIFO_FLAG_AUTO_GROW);
#else
        e->dec_frame_fifo = av_fifo_alloc_array(NI_MAX_DEC_CAPACITY, sizeof(AVFrame*));
#endif

        e->last_frame = av_frame_alloc();
        e->buffersrc_ctx = NULL;
        e->buffersrc_par = NULL;
        e->worker = dec_worker;
        e->xstack = f;
        e->pixfmt = pix_fmt;
        e->hw_pixfmt = AV_PIX_FMT_NI_QUAD;
        e->width = width;
        e->height = height;
        e->fps = fps;
        e->par = sar;
        e->time_base = tb;

        dec_worker->stack_entry[f->index] = e;
        slot = f->num_src_pads;
        if (dec_worker->input_file_already_opened) {
            for (i = 0; i < NI_MAX_XSTACK_INPUTS; i++) {
                if (f->src_pads[i] == NULL) {
                    slot = i;
                    assert(i == f->num_src_pads);
                    break;
                }
            }
        } else {
            slot = dec_worker->index;
        }
        f->src_pads[slot] = e;

        f->num_src_pads++;

        av_log(NULL, AV_LOG_INFO, "%s Filter graph %d src pad index %d name %s "
               " slot %d created, total pads: %d.\n",
               __func__, f->index, dec_worker->index, e->name, slot, f->num_src_pads);
    }

    if (f->num_src_pads == f->inputs) {
        av_log(NULL, AV_LOG_DEBUG, "filter init signal\n");
        pthread_cond_signal(&f->init_cond);
    }

    pthread_mutex_unlock(&f->filter_lock);

    return 0;
}

// create an ni_xstack exit for each output video stream
static int create_new_dst_pad(filter_worker *f, int index)
{
    ni_xstack_exit_t *e;

    // create in init filter graph, filter_lock has already kept
    e = malloc(sizeof(ni_xstack_exit_t));
    if (e) {
        e->buffersink_ctx = NULL;
        e->filter_frame = av_frame_alloc();
        e->enc_worker = encoder_workers[f->index][index];
        if (!e->enc_worker) {
            av_log(NULL, AV_LOG_ERROR, "enc is null !!!\n");
            av_frame_free(&e->filter_frame);
            free(e);
            return -1;
        }
        f->dst_pads[index] = e;
        av_log(NULL, AV_LOG_DEBUG, "%s Filter graph %d dst pad %d created.\n",
               __func__, f->index, index);
        // f->num_dst_pads++;
    } else {
        av_log(NULL, AV_LOG_ERROR, "%s alloc dst pad failed.\n",
               __func__);
        return -1;
    }

    return 0;
}

// get the number of decoder exit
static int get_decoder_eos_num_total(filter_worker *f)
{
    int ret = 0;
    pthread_mutex_lock(&f->filter_lock);
    ret = f->input_eos_num;
    pthread_mutex_unlock(&f->filter_lock);
    return ret;
}

// add frame to list
#if IS_FFMPEG_70_AND_ABOVE
static int list_append_frame(AVFifo *fifo, AVFrame *frame)
#else
static int list_append_frame(AVFifoBuffer *fifo, AVFrame *frame)
#endif
{
    int ret;
    niFrameSurface1_t* p_data3 = (niFrameSurface1_t*)(frame->buf[0]->data);
    int ref_count = av_buffer_get_ref_count(frame->buf[0]);

    if (is_fifo_full(fifo))
    {
        av_log(NULL, AV_LOG_ERROR, "wait to free fifo space has full, it is unexpected!!!\n");
        return -1;
    }
#if IS_FFMPEG_70_AND_ABOVE
    ret = av_fifo_write(fifo, &frame, 1);
#else
    ret = av_fifo_generic_write(fifo, &frame, sizeof(AVFrame*), NULL);
#endif
    av_log(NULL, AV_LOG_DEBUG, "filt frame %p width %d height %d\n", frame, frame->width, frame->height);
    av_log(NULL, AV_LOG_DEBUG, "%s frame ptr %p ref_cnt %d queued, ui16FrameIdx = [%d] , fifo size: %d\n",
           __func__, frame, ref_count, p_data3->ui16FrameIdx, get_fifo_size(fifo));
    return ret;
}

// unref the frame until filter work not ref it anymore
#if IS_FFMPEG_70_AND_ABOVE
static int list_recycle_frames(AVFifo *fifo, const char* name)
#else
static int list_recycle_frames(AVFifoBuffer *fifo, const char* name)
#endif
{
    AVFrame *tmp_frame;
    int ref_count;
    niFrameSurface1_t* p_data3;
    int fifo_size = get_fifo_size(fifo);

    // going through the fifo to check
#if IS_FFMPEG_70_AND_ABOVE
    av_fifo_peek(fifo, &tmp_frame, 1, 0);
#else
    av_fifo_generic_peek_at(fifo, &tmp_frame, 0, sizeof(AVFrame*), NULL);
#endif
    if (fifo_size && tmp_frame && tmp_frame->buf[0]) {
        p_data3 = (niFrameSurface1_t*)(tmp_frame->buf[0]->data);
        ref_count = av_buffer_get_ref_count(tmp_frame->buf[0]);
    }
    else {
        p_data3 = NULL;
        ref_count = -1;
        av_log(NULL, AV_LOG_ERROR, "ref count is -1\n");
    }
    av_log(NULL, AV_LOG_DEBUG, "%s name %s, ref_cnt == %d , num_nodes now: %d\n", __func__, name, ref_count, fifo_size);
    if (p_data3 && 1 == ref_count) {
#if IS_FFMPEG_70_AND_ABOVE
        av_fifo_drain2(fifo, 1);
#else
        av_fifo_drain(fifo, sizeof(AVFrame*));
#endif
        fifo_size = get_fifo_size(fifo);
        av_log(NULL, AV_LOG_DEBUG, "%s ref_cnt == 1, unref frame %p ui16FrameIdx = [%d] DevHandle %d , num_nodes now: %d\n", __func__, tmp_frame, p_data3->ui16FrameIdx, p_data3->device_handle, fifo_size);
        av_frame_free(&tmp_frame);
    } else if (p_data3 && ref_count >= 2) {
        av_log(NULL, AV_LOG_DEBUG, "%s ref %d  >= 2 %p ui16FrameIdx = [%d] , removed from list, fifo size now: %d\n", __func__, ref_count, tmp_frame, p_data3->ui16FrameIdx, fifo_size);
    } else {
        av_log(NULL, AV_LOG_ERROR, "%s %p data error, p_data3 %p ui16FrameIdx = [%d] ref_cnt = %d , NOT drained\n",
                __func__, tmp_frame, p_data3, p_data3 ? p_data3->ui16FrameIdx : -1,
                p_data3 ? av_buffer_get_ref_count(tmp_frame->buf[0]) : -1);
    }
    return fifo_size;
}

// add frame to list for decoder threads and filter threads
#if IS_FFMPEG_70_AND_ABOVE
static int enqueue_frame(AVFifo *fifo, AVFrame *buffered_frame, const AVFrame *inframe)
#else
static int enqueue_frame(AVFifoBuffer *fifo, AVFrame *buffered_frame, const AVFrame *inframe)
#endif
{
    int ret;
    if (!inframe) {
        av_log(NULL, AV_LOG_ERROR, "input frame is null\n");
        return -1;
    }

    av_log(NULL, AV_LOG_DEBUG, "%s frame %p data %p extended_data %p\n",
           __func__, inframe, inframe->data, inframe->extended_data);

    // expand frame buffer fifo if not enough space
    if (is_fifo_full(fifo))
    {
        av_log(NULL, AV_LOG_ERROR, "dec or enc fifo space has full, it is unexpected!!!\n");
        return -1;
    }

    // call av_frame_ref to increase buffer ref count / preserve buffer
    ret = av_frame_ref(buffered_frame, inframe);
    if (ret < 0)
    {
        av_log(NULL, AV_LOG_ERROR, "%s: av_frame_ref ERROR %d!!!\n", __func__, ret);
        return ret;
    }

#if IS_FFMPEG_70_AND_ABOVE
    ret = av_fifo_write(fifo, &buffered_frame, 1);
#else
    ret = av_fifo_generic_write(fifo, &buffered_frame, sizeof(AVFrame*), NULL);
#endif
    if (ret > 0) {
      ret = 0;
    }

    av_log(NULL, AV_LOG_DEBUG, "%s enc frame queued, fifo size: %d\n",
           __func__, get_fifo_size(fifo));
    return ret;
}

// place a decoded frame into queue for filter
static int send_decode_frame(ni_xstack_entry_t *e, AVFrame *dec_frame)
{
    int ret;
    if (dec_frame) {
        av_log(NULL, AV_LOG_DEBUG, "%s stream send dec frame -> pts=%ld,dts=%ld index %d\n",
               __func__, dec_frame->pts, dec_frame->pkt_dts, e->worker->index);
    }
    pthread_mutex_lock(&e->lock);
    // decoder fifo size > 1, next time send buffered frame, the last frame
    // may still in the list, so need to alloc frame every time
    // it will unref by filter thread
    e->buffered_frame = av_frame_alloc();
    ret = enqueue_frame(e->dec_frame_fifo, e->buffered_frame, dec_frame);
    pthread_cond_signal(&e->frame_cond);
    pthread_mutex_unlock(&e->lock);
    return ret;
}

// place a filtered frame into queue for encoding
static int send_encode_frame(encoder_worker *enc_worker, AVFrame *filt_frame)
{
    int ret = 0;

    if (filt_frame) {
        av_log(NULL, AV_LOG_DEBUG, "%s: send frame <- pts=%ld,dts=%ld\n",
               __func__, filt_frame->pts, filt_frame->pkt_dts);
        av_log(NULL, AV_LOG_DEBUG, "%s: %p filt_frame->data %p filt_frame->extended_data %p\n", __func__, filt_frame, filt_frame->data, filt_frame->extended_data);
    }

    pthread_mutex_lock(&enc_worker->frame_lock);
    if (is_fifo_full(enc_worker->enc_frame_fifo))
    {
        av_log(NULL, AV_LOG_DEBUG, "enc fifo space full, wait!\n");
        pthread_cond_wait(&enc_worker->produce_cond, &enc_worker->frame_lock);
    }
    ret = enqueue_frame(enc_worker->enc_frame_fifo, enc_worker->buffered_frame, filt_frame);
    pthread_cond_signal(&enc_worker->consume_cond);
    pthread_mutex_unlock(&enc_worker->frame_lock);

    return ret;
}



// DECODER THREAD FUNCTION
// init decoding task
static int init_decoder_worker(decoder_worker *dec_worker)
{
    int ret;

    ret = pthread_mutex_init(&dec_worker->frame_lock, NULL);
    if (ret) {
        goto fail_init_frame_lock;
    }

    ret = pthread_mutex_init(&dec_worker->list_lock, NULL);
    if (ret) {
        goto fail_init_list_lock;
    }

    ret = pthread_cond_init(&dec_worker->list_cond, NULL);
    if (ret) {
        goto fail_init_list_cond;
    }

#if IS_FFMPEG_70_AND_ABOVE
    dec_worker->wait_to_free_list = av_fifo_alloc2(NI_MAX_DEC_CAPACITY, sizeof(AVFrame*), AV_FIFO_FLAG_AUTO_GROW);
#else
    dec_worker->wait_to_free_list = av_fifo_alloc_array(NI_MAX_DEC_CAPACITY, sizeof(AVFrame*));
#endif
    if (!dec_worker->wait_to_free_list) {
        goto fail_init_fifo;
    }

    dec_worker->should_exit = THREAD_STATE_RUNNING;

    return 0;

fail_init_fifo:
    pthread_cond_destroy(&dec_worker->list_cond);
fail_init_list_cond:
    pthread_mutex_destroy(&dec_worker->list_lock);
fail_init_list_lock:
    pthread_mutex_destroy(&dec_worker->frame_lock);
fail_init_frame_lock:
    return ret;
}

static void cleanup_decoder_worker(decoder_worker *worker)
{
    if (worker) {
        pthread_mutex_destroy(&worker->frame_lock);
        pthread_mutex_destroy(&worker->list_lock);
        pthread_cond_destroy(&worker->list_cond);
    }
}

// decoder get filter ret from filter thread
// if ret < 0, decoder should exit
static int get_filter_ret(filter_worker *f)
{
    int ret = 0;
    pthread_mutex_lock(&f->ret_lock);
    ret = f->filter_ret;
    pthread_mutex_unlock(&f->ret_lock);
    return ret;
}

// decoder get filter flush status from filter thread
// if flushed decoder shouldn't push frame to this filter
static bool get_filter_flush_state(filter_worker *f)
{
    bool ret = 0;
    pthread_mutex_lock(&f->ret_lock);
    ret = f->flushed;
    pthread_mutex_unlock(&f->ret_lock);
    return ret;
}

// Setup the hwdevice and session for each image file input
static int init_hwframe_uploader(decoder_worker *dec_worker) {
    int ret;
    AVHWFramesContext* hwframe_ctx;
    char buf[8] = { 0 };
    snprintf(buf, sizeof(buf), "%d", dec_worker->devid);

    av_log(NULL, AV_LOG_INFO, "init_hwframe_uploader width %d height %d pixfmt %d devid %d\n",
            dec_worker->width, dec_worker->height, dec_worker->pix_fmt, dec_worker->devid);
    ret = av_hwdevice_ctx_create(&dec_worker->hwdevice_upload, AV_HWDEVICE_TYPE_NI_QUADRA, buf,
        NULL, 0);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "failed to create AV HW device ctx %d\n", dec_worker->index);
        return ret;
    }

    dec_worker->hwctx_upload = av_hwframe_ctx_alloc(dec_worker->hwdevice_upload);
    if (!dec_worker->hwctx_upload)
        return AVERROR(ENOMEM);

    hwframe_ctx = (AVHWFramesContext*)dec_worker->hwctx_upload->data;
    hwframe_ctx->format = AV_PIX_FMT_NI_QUAD;
    hwframe_ctx->sw_format = dec_worker->pix_fmt;
    hwframe_ctx->width = dec_worker->width;
    hwframe_ctx->height = dec_worker->height;

    ret = av_hwframe_ctx_init(dec_worker->hwctx_upload);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "failed to init AV HW device ctx %d\n", dec_worker->index);
        return ret;
    }
    return 0;
}

// Exchange SW AVframe for HW AVFrame stored in global array
static int retrieve_hwframe(decoder_worker *dec_worker, AVFrame* dst_sw) {
    int ret = 0;

    if (!dec_worker->decoded_frame)
        return AVERROR(ENOMEM);

    av_log(NULL, AV_LOG_DEBUG, "retrieve_hwframe[%d]\n", dec_worker->index);
    ret = av_hwframe_get_buffer(dec_worker->hwctx_upload, dec_worker->decoded_frame, 0);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "failed to get buffer from frames context %d\n", dec_worker->index);
        av_frame_free(&dec_worker->decoded_frame); //implement a free all function instead of this?
        return ret;
    }

    dec_worker->decoded_frame->width = dst_sw->width;
    dec_worker->decoded_frame->height = dst_sw->height;

    ret = av_hwframe_transfer_data(dec_worker->decoded_frame, dst_sw, 0);
    if (ret < 0 || !dec_worker->decoded_frame->data[3]) {
        av_log(NULL, AV_LOG_ERROR, "failed to transfer sw to hwframe %d\n", dec_worker->index);
        av_frame_free(&dec_worker->decoded_frame); //implement a free all function instead of this?
        return ret;
    }
    niFrameSurface1_t *p_data3 = (niFrameSurface1_t*)dec_worker->decoded_frame->data[3];
    av_log(NULL, AV_LOG_INFO, "input_image_hw_frames[%d]->data[3] = %p ui16FrameIdx %u\n",
           dec_worker->index, p_data3, p_data3->ui16FrameIdx);
    return ret;
}

// open yuv file, init sws_ctx and upload ctx
static int open_yuv_file(decoder_worker *dec_worker) {
    int i;
    // Open input yuv file
    dec_worker->input_fp = fopen(dec_worker->input_file, "rb");
    if (!dec_worker->input_fp) {
        av_log(NULL, AV_LOG_ERROR, "Could not to open input file: %s\n", dec_worker->input_file);
        return -1;
    }

    // Check resolution of input file
    fseek(dec_worker->input_fp, 0, SEEK_END);
    unsigned long inputfile_size = ftell(dec_worker->input_fp);
    if (inputfile_size % (dec_worker->input_width * dec_worker->input_height * 3 / 2) != 0)
    {
      av_log(NULL, AV_LOG_ERROR, "Size of inputfile is not integer multiple of resolution. "
             "Either input file has partial frames, or input resolution is wrong.\n");
      return -1;
    }

    // calculate frame count in yuv file, only support yuv420p now
    unsigned int frame_cnt = inputfile_size / (dec_worker->input_width * dec_worker->input_height * 3 / 2);
    av_log(NULL, AV_LOG_INFO, "inputfile size=%lu, number of frames = %u.\n", inputfile_size, frame_cnt);
    fseek(dec_worker->input_fp, 0, SEEK_SET);

    dec_worker->frame_cnt = frame_cnt;
    dec_worker->pix_fmt = AV_PIX_FMT_YUV420P;
    dec_worker->last_decoded_pts = 0;

    // init sws_ctx when input resolution not equal target resolution
    if ((dec_worker->width > 0 && dec_worker->height > 0) &&
        (dec_worker->width != dec_worker->input_width ||
        dec_worker->height != dec_worker->input_height)) {
        dec_worker->need_scale = true;
        dec_worker->sws_ctx = sws_getContext(dec_worker->input_width, dec_worker->input_height,
                        dec_worker->pix_fmt, dec_worker->width, dec_worker->height,
                        AV_PIX_FMT_YUV420P, SWS_POINT, NULL, NULL, NULL);
    } else {
        dec_worker->width = dec_worker->input_width;
        dec_worker->height = dec_worker->input_height;
        dec_worker->need_scale = false;
    }

    init_hwframe_uploader(dec_worker);

    for (i = 0; i < filter_num; i++) {
        create_new_src_pad(dec_worker->xstack[i], dec_worker,
                            dec_worker->width, dec_worker->height,
                            dec_worker->pix_fmt, av_make_q(1, 25),
                            av_make_q(1, 1), av_make_q(1, 1200000));
    }
    return 0;
}

// open image file, init sws_ctx and upload ctx
static int open_image_file(decoder_worker *dec_worker) {
    int i, ret;
    AVCodecContext *codec_ctx;
    AVFormatContext* ifmt_ctx = NULL;
    InputStream *input_stream = NULL;

    ret = avformat_open_input(&ifmt_ctx, dec_worker->input_file, NULL, NULL);
    if (ret != 0) {
        av_log(NULL, AV_LOG_ERROR, "Can't open image file '%s'\n",
            dec_worker->input_file);
        return ret;
    }
    ret = avformat_find_stream_info(ifmt_ctx, NULL);
    if (ret < 0)
    {
        av_log(NULL, AV_LOG_ERROR, "Can't find stream\n");
        return ret;
    }
    input_stream = av_calloc(1, sizeof(*input_stream));
    if (!input_stream)
        return AVERROR(ENOMEM);
    // av_dump_format(ifmt_ctx, 0, dec_worker->input_file, 0);

    int index = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
    AVCodec *dec = avcodec_find_decoder(ifmt_ctx->streams[index]->codecpar->codec_id);
    codec_ctx = avcodec_alloc_context3(dec);
    avcodec_parameters_to_context(codec_ctx, ifmt_ctx->streams[index]->codecpar);

    if (codec_ctx->pix_fmt != AV_PIX_FMT_YUV420P ||
        ((dec_worker->width > 0 && dec_worker->height > 0) &&
        (dec_worker->width != codec_ctx->width ||
        dec_worker->height != codec_ctx->height))) {
        dec_worker->need_scale = true;

        if (dec_worker->width == 0 || dec_worker->height == 0) {
            dec_worker->width = codec_ctx->width;
            dec_worker->height = codec_ctx->height;
        }
        // init sws_ctx
        dec_worker->sws_ctx = sws_getContext(codec_ctx->width, codec_ctx->height,
                        codec_ctx->pix_fmt, dec_worker->width, dec_worker->height,
                        AV_PIX_FMT_YUV420P, SWS_POINT, NULL, NULL, NULL);
    } else {
        dec_worker->width = codec_ctx->width;
        dec_worker->height = codec_ctx->height;
        dec_worker->need_scale = false;
    }

    init_hwframe_uploader(dec_worker);

    for (i = 0; i < filter_num; i++) {
        create_new_src_pad(dec_worker->xstack[i], dec_worker,
                            codec_ctx->width, codec_ctx->height,
                            codec_ctx->pix_fmt, codec_ctx->framerate,
                            codec_ctx->sample_aspect_ratio,
                            ifmt_ctx->streams[index]->time_base);
    }

    // Open codec
    ret = avcodec_open2(codec_ctx, dec, NULL);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Could not open codec\n");
        return ret;
    }
    input_stream->dec_ctx = codec_ctx;
    dec_worker->ifmt_ctx = ifmt_ctx;
    dec_worker->input_stream = input_stream;
    dec_worker->nb_streams = ifmt_ctx->nb_streams;
    dec_worker->stream_index = index;
    return 0;
}

// open an input for decoding
static int open_hw_input_file(decoder_worker *dec_worker,
                              const char *codec_name, const char *input_file)
{
    int ret;
    unsigned int i, j;
    int video_index = -1;
    AVFormatContext *ifmt_ctx = NULL;
    InputStream *input_stream = NULL;

    if ((ret = avformat_open_input(&ifmt_ctx, input_file, NULL, NULL)) < 0) {
        av_log(NULL, AV_LOG_ERROR, "Cannot open input file\n");
        return ret;
    }

    if ((ret = avformat_find_stream_info(ifmt_ctx, NULL)) < 0) {
        av_log(NULL, AV_LOG_ERROR, "Cannot find stream information\n");
        return ret;
    }

    input_stream = av_calloc(1, sizeof(*input_stream));
    if (!input_stream)
        return AVERROR(ENOMEM);

    // handle video stream only for now
    for (i = 0; i < ifmt_ctx->nb_streams; i++) {
        if (ifmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
            video_index = i;
        }
    }
    if (video_index < 0) {
        av_log(NULL, AV_LOG_ERROR, "Cannot find video stream\n");
        return -1;
    }

    AVStream *stream = ifmt_ctx->streams[video_index];
    AVCodecContext *codec_ctx;
    const AVCodec *dec;

    if (codec_name) {
        dec = avcodec_find_decoder_by_name(codec_name);
        if (dec) {
            if (stream->codecpar->codec_id != dec->id) {
                av_log(NULL, AV_LOG_ERROR, "codec %s does not match with "
                        "stream id %d\n", codec_name, stream->codecpar->codec_id);
                return AVERROR_DECODER_NOT_FOUND;
            }
        }
    } else {
        dec = avcodec_find_decoder(stream->codecpar->codec_id);
    }

    if (!dec) {
        av_log(NULL, AV_LOG_ERROR, "Failed to find decoder %s for stream "
                "#%u\n", codec_name, i);
        return AVERROR_DECODER_NOT_FOUND;
    }

    codec_ctx = avcodec_alloc_context3(dec);
    if (!codec_ctx) {
        av_log(NULL, AV_LOG_ERROR, "Failed to allocate the decoder context for stream #%u\n", i);
        return AVERROR(ENOMEM);
    }
    ret = avcodec_parameters_to_context(codec_ctx, stream->codecpar);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Failed to copy decoder parameters to input decoder context "
                "for stream #%u\n", i);
        return ret;
    }

    av_log(NULL, AV_LOG_DEBUG, "#%d: stream.time_base=%d/%d, avg_frame_rate=%d/%d.\n",
            i, stream->time_base.num, stream->time_base.den,
            stream->avg_frame_rate.num, stream->avg_frame_rate.den);

    // process quadra parameters
    if (strstr(dec->name, "quadra")) {
        char str_devid[4] = {0};
        char dec_params[256] = {0};
        snprintf(str_devid, sizeof(str_devid), "%d", dec_worker->devid);
        av_opt_set(codec_ctx->priv_data, "dec", str_devid, 0);

        // by default decode in HW frame and decode session timeout 30s
        if (strlen(dec_worker->decoder_params)) {
            snprintf(dec_params, sizeof(dec_params), "%s",
                        dec_worker->decoder_params);
        }
        if (! strstr(dec_params, "out=hw")) {
            if (strlen(dec_params)) {
                snprintf(dec_params + strlen(dec_params),
                            256 - strlen(dec_params), "%s", ":out=hw");
            } else {
                snprintf(dec_params, sizeof(dec_params), "%s", "out=hw");
            }
        }
        av_opt_set(codec_ctx->priv_data, "xcoder-params", dec_params, 0);

        av_opt_set(codec_ctx->priv_data, "keep_alive_timeout", "30", 0);
    }

    if (codec_ctx->codec_type == AVMEDIA_TYPE_VIDEO)
        codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, stream, NULL);

    ret = avcodec_parameters_from_context(stream->codecpar, codec_ctx);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Failed to copy decoder parameters from codec context for stream #%u", i);
        return ret;
    }

    if (codec_ctx->codec_type == AVMEDIA_TYPE_VIDEO) {
        for (j = 0; j < filter_num; j++) {
            create_new_src_pad(dec_worker->xstack[j], dec_worker,
                                codec_ctx->width, codec_ctx->height,
                                codec_ctx->pix_fmt, codec_ctx->framerate,
                                codec_ctx->sample_aspect_ratio,
                                stream->time_base);
        }
    }

    /* Open decoder */
    if (codec_ctx->codec_type == AVMEDIA_TYPE_VIDEO ||
            codec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) {
        ret = avcodec_open2(codec_ctx, dec, NULL);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "Failed to open decoder for stream #%u\n", i);
            return ret;
        }
    }
    input_stream->dec_ctx = codec_ctx;
    input_stream->last_decoded_pts = AV_NOPTS_VALUE;
    input_stream->last_decoded_pts = AV_NOPTS_VALUE;

    av_dump_format(ifmt_ctx, 0, input_file, 0);
    dec_worker->ifmt_ctx = ifmt_ctx;
    dec_worker->video_index = video_index;
    dec_worker->input_stream = input_stream;
    dec_worker->nb_streams = ifmt_ctx->nb_streams;
    return 0;
}

static int read_yuv_frame(decoder_worker *dec_worker, AVFrame *frame) {
    int i, ret = 0;
    unsigned int read_size;
    // Read data for Y into frame buffer
    for (i = 0; i < dec_worker->input_height; i++)
    {
        read_size = fread(&frame->data[0][0] + i * frame->linesize[0], dec_worker->input_width, 1, dec_worker->input_fp);
        if (read_size != 1) {
            av_log(NULL, AV_LOG_ERROR, "Failed to read Y. read_size=%u.\n", read_size);
            ret = -1;
            goto end;
        }
    }
    // Read data for U into frame buffer
    for (i = 0; i < dec_worker->input_height / 2; i++)
    {
        read_size = fread(&frame->data[1][0] + i * frame->linesize[1], dec_worker->input_width / 2, 1, dec_worker->input_fp);
        if (read_size != 1) {
            av_log(NULL, AV_LOG_ERROR, "Failed to read U. read_size=%u.\n", read_size);
            ret = -1;
            goto end;
        }
    }
    // Read data for V into frame buffer
    for (i = 0; i < dec_worker->input_height / 2; i++)
    {
        read_size = fread(&frame->data[2][0] + i * frame->linesize[2], dec_worker->input_width / 2, 1, dec_worker->input_fp);
        if (read_size != 1)
        {
            av_log(NULL, AV_LOG_ERROR, "Failed to read V. read_size=%u.\n", read_size);
            ret = -1;
            goto end;
        }
    }
end:
    return ret;
}

static void dec_reconfig_wait(decoder_worker *dec_worker)
{
    // Unblock the filter thread in case it is already waiting
    for (int i = 0; i < filter_num; i++)
    {
        pthread_mutex_lock(&dec_worker->stack_entry[i]->lock);
        pthread_cond_signal(&dec_worker->stack_entry[i]->frame_cond);
        pthread_mutex_unlock(&dec_worker->stack_entry[i]->lock);
    }
    pthread_mutex_lock(&dec_worker->filter_common->lock);
    av_log(NULL, AV_LOG_INFO, "%s decoder=%d reconfig wait\n", __func__, dec_worker->index);
    reconfig_wait_count++;
    pthread_cond_wait(&dec_worker->filter_common->reconfig_cond, &dec_worker->filter_common->lock);
    reconfig_wait_count--;
    av_log(NULL, AV_LOG_INFO, "%s decoder=%d reconfig finish. force_exit %d\n", __func__, dec_worker->index, dec_worker->force_exit);
    pthread_mutex_unlock(&dec_worker->filter_common->lock);
}

// main decode function
static int decoder_get_frame(decoder_worker *dec_worker, AVPacket *packet)
{
    int i, ret = 0;
    InputStream *input_stream = dec_worker->input_stream;
    niFrameSurface1_t* p_data3;
    AVFrame *frame;
    bool all_filters_flushed = false;
    av_log(NULL, AV_LOG_DEBUG, "%s worker %d pkt %p\n",
           __func__, dec_worker->index, packet);

    ret = avcodec_send_packet(input_stream->dec_ctx, packet);
    if (ret < 0 && ret != AVERROR_EOF) {
        av_log(NULL, AV_LOG_ERROR, "decoder %d: failed to send packet. ret %d\n",
               dec_worker->index, ret);
        goto end;
    }
    frame = dec_worker->decoded_frame;
    while (!dec_worker->force_exit && !dec_worker->should_exit && (ret >= 0 || !packet)) {
        if (dec_worker->filter_common->reconfig == DO_RECONFIG ||
            dec_worker->filter_common->reconfig ==  WAIT_RECONFIG)
        {
            dec_reconfig_wait(dec_worker);

            if (dec_worker->force_exit)
            {
                return AVERROR_EOF;
            }
            // In case wait was interrupted during reconfig, continue waiting. This will be cleaned up in filter initialization
            pthread_mutex_lock(&dec_worker->list_lock);
            if (get_fifo_size(dec_worker->wait_to_free_list) == NI_MAX_DEC_CAPACITY)
            {
                av_log(NULL, AV_LOG_INFO, "%s continue waiting after reconfig. before recycle list size %d entry size %d index %d\n", __func__,
                       get_fifo_size(dec_worker->wait_to_free_list), get_fifo_size(dec_worker->stack_entry[0]->dec_frame_fifo), dec_worker->index);
                pthread_cond_wait(&dec_worker->list_cond, &dec_worker->list_lock);
                av_log(NULL, AV_LOG_INFO, "%s done waiting after reconfig. after recycle list size %d entry size %d index %d. reconfig %d\n", __func__,
                       get_fifo_size(dec_worker->wait_to_free_list), get_fifo_size(dec_worker->stack_entry[0]->dec_frame_fifo), dec_worker->index, dec_worker->filter_common->reconfig);
            }
            pthread_mutex_unlock(&dec_worker->list_lock);
        }
        av_log(NULL, AV_LOG_DEBUG, "dec_worker->force_exit %d ret %d packet %p\n", dec_worker->force_exit, ret, packet);
        ret = avcodec_receive_frame(input_stream->dec_ctx, frame);
        if (ret < 0) {
            if (ret != AVERROR(EAGAIN)) {
                if (ret != AVERROR_EOF) {
                    av_log(NULL, AV_LOG_ERROR, "decoder %d failed to receive "
                           "frame, ret %d\n", dec_worker->index, ret);
                } else
                    av_log(NULL, AV_LOG_DEBUG, "decoder %d got AVERROR_EOF\n",
                           dec_worker->index);
            } else {
                av_log(NULL, AV_LOG_DEBUG, "avcodec_receive_frame ret EAGAIN but changed to 0 and return\n");
                ret = 0;
            }
            goto end;
        }

        // once we got a frame, save this one
        // so that the last frame of this decoding can continuously be used to
        // feed into xstack together with other longer inputs

        av_log(NULL, AV_LOG_DEBUG, "%s %d frame->best_effort_timestamp %ld pts %ld "
               "will be adjusted by: +=  last_decoded_pts %ld to = %ld \n",
               __func__, dec_worker->index, frame->best_effort_timestamp, frame->pts,
               dec_worker->last_decoded_pts, frame->best_effort_timestamp + dec_worker->last_decoded_pts);
        //If new input is added via reconfig then catch up the pts of this thread
        //by adding the last decoded pts of the older input
#if IS_FFMPEG_61_AND_ABOVE
        frame->time_base = input_stream->dec_ctx->pkt_timebase;
#endif
        frame->best_effort_timestamp += dec_worker->last_decoded_pts;
        frame->pts = frame->best_effort_timestamp;
        if (frame->pts == AV_NOPTS_VALUE) {
#if IS_FFMPEG_70_AND_ABOVE
            frame->pts = input_stream->last_decoded_pts == AV_NOPTS_VALUE ? 0 : input_stream->last_decoded_pts + frame->duration;
#else
            frame->pts = input_stream->last_decoded_pts == AV_NOPTS_VALUE ? 0 : input_stream->last_decoded_pts + frame->pkt_duration;
#endif
            av_log(NULL, AV_LOG_DEBUG, "%s frame->pts == AV_NOPTS_VALUE, adjust to %ld\n", __func__, frame->pts);
        }

        if (packet) {
            av_log(NULL, AV_LOG_DEBUG, "decoded frame: stream=%d, pts=%ld, dts=%ld, "
                   "duration=%ld, best_effort=%ld\n",
                    dec_worker->index, frame->pts, frame->pkt_dts,
#if IS_FFMPEG_70_AND_ABOVE
                   frame->duration,
#else
                   frame->pkt_duration,
#endif
                   frame->best_effort_timestamp);
        } else {
            av_log(NULL, AV_LOG_DEBUG, "flush decoded frame: stream=%d, pts=%ld, dts=%ld, "
                   "duration=%ld, best_effort=%ld\n",
                    dec_worker->index, frame->pts, frame->pkt_dts,
#if IS_FFMPEG_70_AND_ABOVE
                   frame->duration,
#else
                   frame->pkt_duration,
#endif
                   frame->best_effort_timestamp);
        }

        if ((input_stream->last_decoded_pts != AV_NOPTS_VALUE) &&
            (frame->pts == input_stream->last_decoded_pts)) {
            av_log(NULL, AV_LOG_ERROR, "flush decoder: same pts!!!\n");
        }
        if ((input_stream->last_decoded_dts != AV_NOPTS_VALUE) &&
            (frame->pkt_dts == input_stream->last_decoded_dts)) {
            av_log(NULL, AV_LOG_ERROR, "flush decoder: same dts!!!\n");
        }

        input_stream->last_decoded_pts = frame->pts;
        input_stream->last_decoded_dts = frame->pkt_dts;

        pthread_mutex_lock(&dec_worker->list_lock);
        if (get_fifo_size(dec_worker->wait_to_free_list) >= NI_MAX_DEC_CAPACITY) {
            av_log(NULL, AV_LOG_ERROR, "wait list frame > %d\n", NI_MAX_DEC_CAPACITY);
            ret = -1;
            pthread_mutex_unlock(&dec_worker->list_lock);
            goto end;
        }
        pthread_mutex_unlock(&dec_worker->list_lock);

        dec_worker->decoded_frames++;

        // call av_frame_ref to increase buffer ref count / preserve buffer
        dec_worker->buffered_frame = av_frame_alloc();
        ret = av_frame_ref(dec_worker->buffered_frame, frame);
        if (ret < 0)
        {
            av_log(NULL, AV_LOG_ERROR, "%s: av_frame_ref ERROR !!!\n", __func__);
            goto end;
        }
        //check if all filters are flushed
        all_filters_flushed = true;
        for (i = 0; i < filter_num; i++) {
            if (!get_filter_flush_state(xstack_workers[i])) {
                all_filters_flushed = false;
                break;
            }
        }
        if (all_filters_flushed) {
            av_log(NULL, AV_LOG_DEBUG, "%s all filters flushed, skip send frame\n", __func__);
            goto filters_flushed;
        }

        pthread_mutex_lock(&dec_worker->list_lock);
        //add the buffered frame to wait to free
        ret = list_append_frame(dec_worker->wait_to_free_list, dec_worker->buffered_frame);
        if (ret < 0) {
            pthread_mutex_unlock(&dec_worker->list_lock);
            av_log(NULL, AV_LOG_ERROR, "%s: add to wait free list failed !!!\n", __func__);
            goto end;
        }
        av_log(NULL, AV_LOG_DEBUG, "append list size %d index %d\n",
               get_fifo_size(dec_worker->wait_to_free_list), dec_worker->index);
        pthread_mutex_unlock(&dec_worker->list_lock);

        for (i = 0; i < filter_num; i++)
        {
            //don't send frame to filter if it is flushed
            if (get_filter_flush_state(xstack_workers[i]))
                continue;
            ret = send_decode_frame(dec_worker->stack_entry[i], frame);
            if (ret < 0)
            {
                av_log(NULL, AV_LOG_ERROR, "send_decode_frame failed %d\n", ret);
                goto end;
            }
        }

        pthread_mutex_lock(&dec_worker->list_lock);
        // wait dec hw frame free
        // every decoder thread can preserve 3 hw frames at most in same time, if list is full
        // need to wait filter thread unref the hw frame and wait signal
        // filter thread will keep ref to this hw frames while do xstack, it unref the hw frame
        // in the list and signal until next hw frame send to filter
        if (get_fifo_size(dec_worker->wait_to_free_list) > NI_MAX_DEC_CAPACITY) {
            pthread_mutex_unlock(&dec_worker->list_lock);
            av_log(NULL, AV_LOG_ERROR, "wait list frame > %d\n", NI_MAX_DEC_CAPACITY);
            ret = -1;
            goto end;
        } else if (get_fifo_size(dec_worker->wait_to_free_list) == NI_MAX_DEC_CAPACITY) {
            av_log(NULL, AV_LOG_DEBUG, "before recycle list size %d entry size %d index %d\n",
                   get_fifo_size(dec_worker->wait_to_free_list), get_fifo_size(dec_worker->stack_entry[0]->dec_frame_fifo), dec_worker->index);
            pthread_cond_wait(&dec_worker->list_cond, &dec_worker->list_lock);
            av_log(NULL, AV_LOG_DEBUG, "after recycle list size %d entry size %d index %d. reconfig %d\n",
                   get_fifo_size(dec_worker->wait_to_free_list), get_fifo_size(dec_worker->stack_entry[0]->dec_frame_fifo), dec_worker->index, dec_worker->filter_common->reconfig);
        }
        pthread_mutex_unlock(&dec_worker->list_lock);

filters_flushed:
        p_data3 = NULL;
        if (frame->buf[0]) {
            p_data3 = (niFrameSurface1_t*)(frame->buf[0]->data);
        }
        av_log(NULL, AV_LOG_DEBUG, "%s while loop, before unref frame %p "
               "ui16FrameIdx = [%d] ref_cnt %d\n", __func__, frame,
               p_data3 ? p_data3->ui16FrameIdx : -1,
               p_data3 ? frame_get_ref_count(frame) : -1);

        av_frame_unref(frame);

        for (i = 0; i < filter_num; i++) {
            ret = get_filter_ret(xstack_workers[i]);
            if (ret < 0) {
                av_log(NULL, AV_LOG_ERROR, "%s decoder %d process_filter_graph "
                    "return < 0: %d, could be eos.\n",
                    __func__, dec_worker->index, ret);
                if (AVERROR_EOF == ret) {
                    dec_worker->should_exit = THREAD_STATE_EXIT_ISSUED;
                }
                goto end;
            } else {
                av_log(NULL, AV_LOG_DEBUG, "%s decoder %d process_filter_graph "
                    "return > 0: %d.\n",
                    __func__, dec_worker->index, ret);
            }
        }
    }

end:
    return ret;
}

// handle end-of-stream in decoding
static void process_decoder_eos(decoder_worker *dec_worker)
{
    int i, j, ret, eos_num;
    filter_worker *f;
    ni_xstack_entry_t *e;
    ni_xstack_entry_t *entry;
    decoder_worker *dec_other;
    // InputStream *input_stream = dec_worker->input_stream;
    // niFrameSurface1_t* p_data3;

    for (i = 0; i < filter_num; i++) {
        f = xstack_workers[i];
        pthread_mutex_lock(&f->filter_lock);
        eos_num = ++f->input_eos_num;
        pthread_mutex_unlock(&f->filter_lock);

        av_log(NULL, AV_LOG_INFO, "%s %d , eos_num: %d xstack shortest: %d\n",
            __func__, dec_worker->index, eos_num, f->shortest);
    }

    // for shortest=1, it will exit if one of decoder thread get eof
    // for shortest=0, it will exit until all decoder threads get eof
    bool exit_now = true;
    for (i = 0; i < filter_num; i++)
    {
        dec_worker->stack_entry[i]->eos_flag = 1;
    }
    for (i = 0; i < filter_num; i++) {
        if (!xstack_workers[i]->shortest) {
            exit_now = false;
            break;
        }
    }
    if (exit_now || dec_worker->force_exit) {
        av_log(NULL, AV_LOG_DEBUG, "goto exit now\n");
        goto exit;
    }

    do {
        for (i = 0; i < filter_num; i++) {
            if (dec_worker->force_exit) {
                goto exit;
            }
            e = dec_worker->stack_entry[i];
            if (! xstack_workers[i]->shortest) {
                av_log(NULL, AV_LOG_DEBUG, "%s %d EOS. Waiting for the longest input\n",
                    __func__, dec_worker->index);

                av_log(NULL, AV_LOG_DEBUG, "dec index %d fifo %d, flag %d\n",
                       dec_worker->index, get_fifo_size(e->dec_frame_fifo), e->eos_flag);
                pthread_mutex_lock(&e->lock);
                while (get_decoder_eos_num_total(xstack_workers[i]) != xstack_workers[i]->num_src_pads)
                {
                    pthread_cond_wait(&e->eos_cond, &e->lock);
                    // if filter needs to do reconfig, decoder threads need to block until
                    // reconfig is finished
                    if (dec_worker->filter_common->reconfig == DO_RECONFIG ||
                        dec_worker->filter_common->reconfig == WAIT_RECONFIG)
                    {
                        pthread_mutex_unlock(&e->lock);

                        dec_reconfig_wait(dec_worker);
                        if (dec_worker->force_exit)
                            goto exit;

                        //reconfig resets eos num
                        for (j = 0; j < filter_num; j++)
                        {
                            f = xstack_workers[j];
                            pthread_mutex_lock(&f->filter_lock);
                            eos_num = ++f->input_eos_num;
                            pthread_mutex_unlock(&f->filter_lock);

                            av_log(NULL, AV_LOG_INFO, "%s %d , eos_num: %d after reconfig\n",
                                   __func__, dec_worker->index, eos_num);
                        }
                        //after reconfig send a frame to initialize the filter
                        ret = send_decode_frame(e, e->last_frame);
                        if (ret < 0)
                        {
                            av_log(NULL, AV_LOG_ERROR, "add temp frame failed\n");
                            return;
                        }
                        pthread_mutex_lock(&e->lock);
                    }
                }
                pthread_mutex_unlock(&e->lock);
                // all decoder threads have finished, send one last frame with pts = -1 to exit
                exit_now = true;
                goto exit;
            }
        }
    } while (get_decoder_eos_num_total(xstack_workers[0]) < xstack_workers[0]->num_src_pads);

exit:
    av_log(NULL, AV_LOG_DEBUG, "%s %d, exit_now %d\n", __func__, dec_worker->index, exit_now);
    pthread_mutex_lock(&dec_worker->frame_lock);
    if (!dec_worker->should_exit) {
        dec_worker->should_exit = THREAD_STATE_EXIT_PROCESSED;
    }
    pthread_mutex_unlock(&dec_worker->frame_lock);

    /*
    Unlock filter for this input in case it started waiting before seeing the eos flag
    */
    for (j = 0; j < filter_num; j++)
    {
        entry = dec_worker->stack_entry[j];
        if (get_filter_flush_state(xstack_workers[j]))
        {
            av_log(NULL, AV_LOG_DEBUG, "%s %d, filter %d is flushed, skip sending last frame\n", __func__, dec_worker->index, j);
            continue;
        }
        pthread_mutex_lock(&entry->lock);
        if (entry->last_frame && entry->last_frame->buf[0] && entry->last_frame->pts != -1)
        {
            av_log(NULL, AV_LOG_DEBUG, "%s dec%d sending last frame with pts -1 to filter %d\n", __func__,
                   dec_worker->index, j);
            entry->last_frame->pts = -1;
            pthread_mutex_unlock(&entry->lock);
            ret = send_decode_frame(entry, entry->last_frame);
            if (ret < 0)
            {
                av_log(NULL, AV_LOG_ERROR, "add temp frame failed\n");
                return;
            }
        }
        else
        {
            av_log(NULL, AV_LOG_DEBUG, "%s dec%d not sending last frame with pts -1 to dec%d of filter %d, someone else already did "
                                       "or buf freed: last_frame  %p buf[0] freed %p\n",
                   __func__, dec_worker->index, i, j, entry->last_frame, entry->last_frame ? entry->last_frame->buf[0] : NULL);
            pthread_mutex_unlock(&entry->lock);
        }
    }

    // send the last frame to filter to tell filter all decoder threads ready to exit
    if (exit_now && get_decoder_eos_num_total(xstack_workers[0]) == xstack_workers[0]->num_src_pads) {
        for (i = 0; i < active_decoder_workers; i++) {
            dec_other = decoder_workers[i];
            if (dec_other->type == SW_PICTURE) {
                pthread_mutex_lock(&dec_other->frame_lock);
                dec_other->should_exit = THREAD_STATE_EXIT_ISSUED;
                for (j = 0; j < filter_num; j++) {
                    dec_other->stack_entry[j]->eos_flag = 1;
                }
                pthread_mutex_unlock(&dec_other->frame_lock);
                av_log(NULL, AV_LOG_DEBUG, "%s %d: signal image decoder %d to exit.\n",
                       __func__, dec_worker->index, dec_other->index);
                continue;
            }
            for (j = 0; j < filter_num; j++) {
                if (get_filter_flush_state(xstack_workers[j]))
                {
                    av_log(NULL, AV_LOG_DEBUG, "%s %d, filter %d is flushed, skip sending last frame\n", __func__, dec_worker->index, j);
                    continue;
                }
                entry = dec_other->stack_entry[j];
                pthread_mutex_lock(&entry->lock);
                if (entry->last_frame && entry->last_frame->buf[0] && entry->last_frame->pts != -1)
                {
                    av_log(NULL, AV_LOG_DEBUG, "%s dec%d sending last frame with pts -1 to dec%d of filter %d\n", __func__,
                           dec_worker->index, i, j);
                    entry->last_frame->pts = -1;
                    pthread_mutex_unlock(&entry->lock);
                    ret = send_decode_frame(entry, entry->last_frame);
                    if (ret < 0)
                    {
                        av_log(NULL, AV_LOG_ERROR, "add temp frame failed\n");
                        return;
                    }
                }
                else
                {
                    av_log(NULL, AV_LOG_DEBUG, "%s dec%d not sending last frame with pts -1 to dec%d of filter %d, someone else already did "
                                               "or buf freed: last_frame  %p buf[0] freed %p\n",
                           __func__, dec_worker->index, i, j, entry->last_frame, entry->last_frame ? entry->last_frame->buf[0] : NULL);
                    pthread_mutex_unlock(&entry->lock);
                }
            }
        }
    }
}

// decoder loop for sw input
static int sw_decoder_run(decoder_worker *dec_worker) {
    int i, j, ret;
    AVFrame *frame, *src, *dst;
    bool all_filters_flushed = false;

    // Allocate frame object
    src = av_frame_alloc();
    if (!src)
    {
      av_log(NULL, AV_LOG_ERROR, "Could not allocate src AVFrame\n");
      return -1;
    }
    src->format = dec_worker->pix_fmt;
    src->width  = dec_worker->input_width;
    src->height = dec_worker->input_height;

    ret = av_frame_get_buffer(src, 32);
    if (ret < 0)
    {
      av_log(NULL, AV_LOG_ERROR, "Could not allocate the src AVFrame buffers ret = %d\n", ret);
      return ret;
    }
    av_log(NULL, AV_LOG_INFO, "Input line sizes: Y=%d, U=%d, V=%d count %d.\n", src->linesize[0],
           src->linesize[1], src->linesize[2], dec_worker->frame_cnt);

    // Make sure the frame data is writable
    ret = av_frame_make_writable(src);
    if (ret < 0)
    {
        av_log(NULL, AV_LOG_ERROR, "av_frame_make_writable() error %d.\n", ret);
        goto end;
    }

    if (dec_worker->need_scale) {
        dst = av_frame_alloc();
        if (!dst) {
            av_log(NULL, AV_LOG_ERROR, "Could not allocate dst AVFrame\n");
            goto end;
        }
        dst->format = AV_PIX_FMT_YUV420P;
        dst->width = dec_worker->width;
        dst->height = dec_worker->height;
        ret = av_frame_get_buffer(dst, 32);
        if (ret < 0)
        {
        av_log(NULL, AV_LOG_ERROR, "%s Could not allocate the dst AVFrame buffers ret = %d\n",
               __func__, ret);
        return ret;
        }
    }

    // read yuv frame file and send to list
    for (i = 0; i < dec_worker->frame_cnt; i++) {
        if (dec_worker->should_exit || dec_worker->encode_exited || dec_worker->force_exit) {
            break;
        }
        ret = read_yuv_frame(dec_worker, src);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "read yuv frame fail index %d\n", i);
            goto end;
        }

        if (dec_worker->need_scale) {
            sws_scale(dec_worker->sws_ctx, (const uint8_t* const*)src->data,
                    src->linesize, 0, src->height, dst->data, dst->linesize);
        } else {
            dst = src;
        }

        pthread_mutex_lock(&dec_worker->list_lock);
        if (get_fifo_size(dec_worker->wait_to_free_list) >= NI_MAX_DEC_CAPACITY) {
            av_log(NULL, AV_LOG_ERROR, "wait list frame > %d\n", NI_MAX_DEC_CAPACITY);
            ret = -1;
            goto end;
        }
        pthread_mutex_unlock(&dec_worker->list_lock);

        // upload the sw frame to hw frame, place in dec_worker->decoded_frame
        ret = retrieve_hwframe(dec_worker, dst);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "retrieve hwframe[%d] for image failed\n", i);
            return ret;
        }

        frame = dec_worker->decoded_frame;
        frame->pts = DEFAULT_YUV_PTS_STEP + dec_worker->last_decoded_pts;
        dec_worker->decoded_frames++;
        all_filters_flushed = true;
        for (j = 0; j < filter_num; j++) {
            // don't send frame to filter if it is flushed
            if (get_filter_flush_state(xstack_workers[j]))
                continue;
            ret = send_decode_frame(dec_worker->stack_entry[j], frame);
            if (ret < 0) {
                av_log(NULL, AV_LOG_ERROR, "send_decode_frame failed %d\n", ret);
                goto end;
            }
            all_filters_flushed = false;
        }
        if (all_filters_flushed) {
            av_log(NULL, AV_LOG_DEBUG, "%s all filters flushed, skip send frame\n", __func__);
            goto filters_flushed;
        }

        // call av_frame_ref to increase buffer ref count / preserve buffer
        dec_worker->buffered_frame = av_frame_alloc();
        ret = av_frame_ref(dec_worker->buffered_frame, frame);
        if (ret < 0)
        {
            av_log(NULL, AV_LOG_ERROR, "%s: av_frame_ref ERROR !!!\n", __func__);
            goto end;
        }
        pthread_mutex_lock(&dec_worker->list_lock);
        //add the buffered frame to wait to free
        ret = list_append_frame(dec_worker->wait_to_free_list, dec_worker->buffered_frame);
        if (ret < 0) {
            pthread_mutex_unlock(&dec_worker->list_lock);
            av_log(NULL, AV_LOG_ERROR, "%s: add to wait free list failed !!!\n", __func__);
            goto end;
        }
        av_log(NULL, AV_LOG_DEBUG, "append list size %d index %d\n",
               get_fifo_size(dec_worker->wait_to_free_list), dec_worker->index);

        // wait dec hw frame free
        // every decoder thread can preserve 3 hw frames at most in same time, if list is full
        // need to wait filter thread unref the hw frame and wait signal
        // filter thread will keep ref to this hw frames while do xstack, it unref the hw frame
        // in the list and signal until next hw frame send to filter
        if (get_fifo_size(dec_worker->wait_to_free_list) > NI_MAX_DEC_CAPACITY) {
            pthread_mutex_unlock(&dec_worker->list_lock);
            av_log(NULL, AV_LOG_ERROR, "wait to free list frame %d > %d\n",
                   get_fifo_size(dec_worker->wait_to_free_list), NI_MAX_DEC_CAPACITY);
            ret = -1;
            goto end;
        } else if (get_fifo_size(dec_worker->wait_to_free_list) == NI_MAX_DEC_CAPACITY) {
            av_log(NULL, AV_LOG_DEBUG, "before recycle list size %d entry size %d index %d\n",
                   get_fifo_size(dec_worker->wait_to_free_list), get_fifo_size(dec_worker->stack_entry[0]->dec_frame_fifo), dec_worker->index);
            pthread_cond_wait(&dec_worker->list_cond, &dec_worker->list_lock);
            av_log(NULL, AV_LOG_DEBUG, "after recycle list size %d entry size %d index %d\n",
                   get_fifo_size(dec_worker->wait_to_free_list), get_fifo_size(dec_worker->stack_entry[0]->dec_frame_fifo), dec_worker->index);
        }
        pthread_mutex_unlock(&dec_worker->list_lock);

filters_flushed:
        dec_worker->last_decoded_pts = frame->pts;
        niFrameSurface1_t *p_data3 = NULL;
        if (frame->buf[0]) {
            p_data3 = (niFrameSurface1_t*)(frame->buf[0]->data);
        }
        av_log(NULL, AV_LOG_DEBUG, "%s while loop, before unref frame %p "
               "ui16FrameIdx = [%d] ref_cnt %d\n", __func__, frame,
               p_data3 ? p_data3->ui16FrameIdx : -1,
               p_data3 ? frame_get_ref_count(frame) : -1);

        av_frame_unref(frame);

        // get ret from filter thread, if ret < 0 means need filter thread failed
        // and decoder thread need to exit now
        for (j = 0; j < filter_num; j++) {
            ret = get_filter_ret(xstack_workers[j]);
            if (ret < 0) {
                av_log(NULL, AV_LOG_ERROR, "%s decoder %d process_filter_graph "
                    "return < 0: %d, could be eos.\n",
                    __func__, dec_worker->index, ret);
                if (AVERROR_EOF == ret) {
                    dec_worker->should_exit = THREAD_STATE_EXIT_ISSUED;
                }
                goto end;
            } else {
                av_log(NULL, AV_LOG_DEBUG, "%s decoder %d process_filter_graph "
                    "return > 0: %d.\n",
                    __func__, dec_worker->index, ret);
            }
        }
    }

    dec_worker->loop--;
    /*
    Dont skip eos processing if there are loops left but exit has been signalled.
    If eos processing is skipped decoder session would be closed. This will to lead
    to an issue where filter thread will get stuck waiting for a frame when decoder
    threads have started exiting. This is because if any eos is skipped then eos count
    is not incremented and last decoder to exit never sends the last frame with
    pts = -1 to the filter thread to unblock it and exit.
    */
    if (dec_worker->loop &&
        THREAD_STATE_RUNNING == dec_worker->should_exit)
    {
        goto one_loop_end;
    }

    av_log(NULL, AV_LOG_INFO, "%s Decoder %d completed decoding, or force_exit"
           ": %d, notify eos.\n",
           __func__, dec_worker->index, dec_worker->force_exit);

    /*
    Force exit is signalled when reconfig is dropping this input. In this case
    only this thread needs to exit so skip eos process as it sets exit flag for
    all decoder threads.
    */
    if (!dec_worker->force_exit)
        process_decoder_eos(dec_worker);

one_loop_end:
    av_log(NULL, AV_LOG_DEBUG, "%s Decoder %d end of one iteration.\n",
           __func__, dec_worker->index);
end:
    if (src) {
        av_frame_free(&src);
        src = NULL;
    }
    if (dec_worker->need_scale) {
        av_frame_free(&dst);
        dst = NULL;
    }
    return ret;
}

/**
 * @brief check if fifo has capacity to add a new frame
 *
 * @param e xstack context
 * @param dec_worker decoder worker context
 *
 * @return true if has no capacity, false otherwise
*/
bool dec_fifo_eos_wait(ni_xstack_entry_t *e, decoder_worker *dec_worker)
{
    pthread_mutex_lock(&e->lock);
    if (get_fifo_size(e->dec_frame_fifo) >= NI_MAX_DEC_CAPACITY){
        av_log(NULL, AV_LOG_DEBUG, "%s %d, fifo full, wait.\n",
                __func__, dec_worker->index);
        /*
        EOS is signalled by filter thread if:
        1. dec_frame_fifo has space and eos_flag is set
        2. reconfig is signalled
        3. filter exits
        */
        pthread_cond_wait(&e->eos_cond, &e->lock);
        av_log(NULL, AV_LOG_DEBUG, "%s %d, eos signalled. Fifo size %d. Exit: %d.\n",
                __func__, dec_worker->index, get_fifo_size(e->dec_frame_fifo),
                dec_worker->should_exit);
    }
    pthread_mutex_unlock(&e->lock);
    return get_fifo_size(e->dec_frame_fifo) >= NI_MAX_DEC_CAPACITY;
}

// decoder loop for picture input
static int image_decoder_run(decoder_worker *dec_worker) {
    int i, ret;
    AVPacket* packet = av_packet_alloc();
    AVFrame *src = av_frame_alloc();
    AVFrame *dst;
    AVFrame *frame;
    ni_xstack_entry_t *e;
    int eos_num;
    filter_worker *f;
    bool all_longest = true;
    int first_frames_to_filters = 0;//count to track first frames to filter

    if (!src) {
        av_log(NULL, AV_LOG_ERROR, "Can't allocate memory for AVFrame\n");
        return AVERROR(ENOMEM);
    }

    while (av_read_frame(dec_worker->ifmt_ctx, packet) >= 0 && !dec_worker->should_exit &&
            !dec_worker->encode_exited && !dec_worker->force_exit) {
        if (packet->stream_index != dec_worker->stream_index) {
            continue;
        }
        ret = avcodec_send_packet(dec_worker->input_stream->dec_ctx, packet);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "avcodec_send_packet failed");
            av_packet_unref(packet);
            return ret;
        }
        av_packet_unref(packet);
        ret = avcodec_receive_frame(dec_worker->input_stream->dec_ctx, src);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "avcodec_receive_frame failed");
            return ret;
        }
    }
    av_packet_free(&packet);

    if (dec_worker->need_scale) {
        dst = av_frame_alloc();
        if (!dst) {
            av_log(NULL, AV_LOG_ERROR, "Could not allocate dst AVFrame\n");
            goto end;
        }
        dst->format = AV_PIX_FMT_YUV420P;
        dst->width = dec_worker->width;
        dst->height = dec_worker->height;
        ret = av_frame_get_buffer(dst, 32);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "%s Could not allocate the dst AVFrame buffers ret = %d\n",
                    __func__, ret);
            return ret;
        }
        sws_scale(dec_worker->sws_ctx, (const uint8_t* const*)src->data,
                src->linesize, 0, src->height, dst->data, dst->linesize);
    } else {
        dst = src;
    }

    // upload the sw frame to hw frame, place in dec_worker->decoded_frame
    ret = retrieve_hwframe(dec_worker, dst);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "retrieve hwframe for image failed\n");
        return ret;
    }
    frame = dec_worker->decoded_frame;

    for (i = 0; i < filter_num; i++) {
        dec_worker->stack_entry[i]->eos_flag = 0;
        f = xstack_workers[i];
        if (f->shortest) {
            all_longest = false;
        } else {
            dec_worker->stack_entry[i]->eos_flag = 1;
        }
        pthread_mutex_lock(&f->filter_lock);
        eos_num = ++f->input_eos_num;
        pthread_mutex_unlock(&f->filter_lock);

        av_log(NULL, AV_LOG_INFO, "%s %d , eos_num: %d xstack shortest: %d\n",
            __func__, dec_worker->index, eos_num, f->shortest);
    }

    // Continuous send last frame even shortest is 1
    // need other decoder thread tell image thread to exit
    do {
        frame->pts = DEFAULT_YUV_PTS_STEP + dec_worker->last_decoded_pts;
        dec_worker->decoded_frames++;
        for (i = 0; i < filter_num; i++) {
            e = dec_worker->stack_entry[i];
            // if filter needs to do reconfig, decoder threads need to block until
            // reconfig is finished
            if (dec_worker->filter_common->reconfig == DO_RECONFIG ||
                dec_worker->filter_common->reconfig == WAIT_RECONFIG)
            {
                dec_reconfig_wait(dec_worker);
                first_frames_to_filters = 0;
                break;//start from first filter
            }
            else if (first_frames_to_filters == filter_num && //first frames sent to all filters
                     !xstack_workers[i]->shortest && //filter is following longest video
                     !dec_worker->should_exit)
            {
                /*
                This is to have the application always follow video inputs for EOS. This is done for:
                    Shortest=0 by making image input end early(only send one frame) then EOS
                    Shortest=1 by making the image input longest continuously sending frames with updated pts.
                If filter is shortest then send frames continuously to not let sync
                mistake an eos on empty fifo, longest is taken care of
                automatically by frame sync logic.
                */
                av_log(NULL, AV_LOG_DEBUG, "%s %d, filter %d is longest, skip sending another frame.\n",
                    __func__, dec_worker->index, i);
                /*
                If no filter needs a second frame i.e. all filters are longest
                then it is more effiecent to use a wait cond.
                */
                if (all_longest)
                {
                    pthread_mutex_lock(&e->lock);
                    pthread_cond_wait(&e->eos_cond, &e->lock);
                    pthread_mutex_unlock(&e->lock);
                }
                continue;
            }
            if (dec_worker->should_exit) {
                goto end;
            }
            if (get_filter_flush_state(xstack_workers[i]))
            {
                av_log(NULL, AV_LOG_DEBUG, "%s %d, filter %d is flushed, skip sending last frame. Exit: %d\n",
                       __func__, dec_worker->index, i, dec_worker->should_exit);
                continue;
            }
            if (dec_fifo_eos_wait(e, dec_worker))
            {
                av_log(NULL, AV_LOG_DEBUG, "%s %d, filter %d fifo full skip sending frame. reconfig %d\n",
                       __func__, dec_worker->index, i, dec_worker->filter_common->reconfig);
                continue;
            }
            ret = send_decode_frame(e, frame);
            if (ret < 0) {
                av_log(NULL, AV_LOG_ERROR, "send_decode_frame failed %d\n", ret);
                goto end;
            }
            if (first_frames_to_filters < filter_num) {
                first_frames_to_filters++;
                av_log(NULL, AV_LOG_DEBUG, "%s %d, first_frames_to_filters: %d\n",
                       __func__, dec_worker->index, first_frames_to_filters);
            }
        }
        dec_worker->last_decoded_pts = frame->pts;
    } while (!dec_worker->should_exit);

end:
    av_frame_unref(frame);
    dec_worker->loop--;
    if (src) {
        av_frame_free(&src);
        src = NULL;
    }
    if (dec_worker->need_scale) {
        av_frame_free(&dst);
        dst = NULL;
    }
    return ret;
}

// decoder main loop
static int hw_decoder_run(decoder_worker *dec_worker)
{
    AVPacket packet = { 0 };
    AVFormatContext *ifmt_ctx = dec_worker->ifmt_ctx;
    InputStream *input_stream = dec_worker->input_stream;
    int ret, err_ret;

    av_log(NULL, AV_LOG_INFO, "%s decoder=%d should_exit=%d force_exit=%d encode_exited=%d\n", __func__, dec_worker->index,
           dec_worker->should_exit,
           dec_worker->force_exit, dec_worker->encode_exited);

    while (!dec_worker->should_exit && !dec_worker->encode_exited &&
           !dec_worker->force_exit) {
        // if filter needs to do reconfig, decoder threads need to block until
        // reconfig is finished
        if (dec_worker->filter_common->reconfig == DO_RECONFIG ||
            dec_worker->filter_common->reconfig ==  WAIT_RECONFIG)
        {
            dec_reconfig_wait(dec_worker);
            if (dec_worker->force_exit)
                goto eos;
        }

        ret = av_read_frame(ifmt_ctx, &packet);
        if (ret < 0) {
            if (ret == AVERROR_EOF) {
                av_log(NULL, AV_LOG_DEBUG, "%s stream=%d av_read_frame got "
                        "EOF\n", __func__, dec_worker->index);
                if (dec_worker->loop == 1)
                {
                    err_ret = 0;
                    do {
                        ret = decoder_get_frame(dec_worker, NULL);
                    } while (ret >= 0);

                    avcodec_flush_buffers(input_stream->dec_ctx);
                    if (ret < 0 && ret != AVERROR_EOF) {
                        av_log(NULL, AV_LOG_ERROR, "%s flush decoder: %d "
                                "error\n", __func__, dec_worker->index);
                        err_ret = ret;
                    }

                    if (err_ret < 0) {
                        goto eos;
                    }
                } else goto skip_get_frame;
            } else {
                goto eos;
            }
        }

        if (packet.stream_index != dec_worker->video_index) {
            av_log(NULL, AV_LOG_DEBUG, "Demuxer gave frame of stream_index %u\n",
                   packet.stream_index);
            continue;
        }
        ret = decoder_get_frame(dec_worker, &packet);
        av_packet_unref(&packet);

skip_get_frame:
        if (ret < 0) {
            if (ret == AVERROR_EOF) {
                av_log(NULL, AV_LOG_DEBUG, "%s stream=%d __decode got EOF\n",
                        __func__, dec_worker->index);
                ret = 0;
                break;
            } else {
                goto eos;
            }
        }

        av_log(NULL, AV_LOG_DEBUG, "Decoder %d while loop, should_exit=%d, "
                "encode_exited=%d.\n", dec_worker->index,
                dec_worker->should_exit, dec_worker->encode_exited);
    }

    dec_worker->loop--;
    /*
    Dont skip eos processing if there are loops left but exit has been signalled.
    If eos processing is skipped decoder session would be closed. This will to lead
    to an issue where filter thread will get stuck waiting for a frame when decoder
    threads have started exiting. This is because if any eos is skipped then eos count
    is not incremented and last decoder to exit never sends the last frame with
    pts = -1 to the filter thread to unblock it and exit.
    */
    if (dec_worker->loop &&
        THREAD_STATE_RUNNING == dec_worker->should_exit)
    {
        goto one_loop_end;
    }
eos:
    av_log(NULL, AV_LOG_INFO, "%s Decoder %d completed decoding, or force_exit"
           ": %d, notify eos.\n",
           __func__, dec_worker->index, dec_worker->force_exit);
    /*
    Force exit is signalled when reconfig is dropping this input. In this case
    only this thread needs to exit so skip eos process as it sets exit flag for
    all decoder threads.
    */
    if(!dec_worker->force_exit)
        process_decoder_eos(dec_worker);

one_loop_end:
    av_log(NULL, AV_LOG_DEBUG, "%s Decoder %d end of one iteration.\n",
           __func__, dec_worker->index);

    return ret;
}

// decoder thread routine
static void *dec_worker_thread_run(void *thread_data)
{
    decoder_worker *dec_worker = (decoder_worker *)thread_data;
    int i, ret;

    pthread_mutex_lock(&dec_worker->common->lock);
    dec_worker->common->ready_num++;
    if (dec_worker->common->ready_num >= dec_worker->common->total_dec_threads) {
        pthread_cond_signal(&dec_worker->common->ready_cond);
    }
    pthread_mutex_unlock(&dec_worker->common->lock);

    if (! dec_worker->input_file_already_opened) {
        switch (dec_worker->type)
        {
        case SW_VIDEO:
            ret = open_yuv_file(dec_worker);
            break;
        case SW_PICTURE:
            ret = open_image_file(dec_worker);
            break;
        case HW_VIDEO:
            ret = open_hw_input_file(dec_worker, dec_worker->decoder_name,
                                dec_worker->input_file);
            break;
        default:
            av_log(NULL, AV_LOG_ERROR, "invalid decoder type %d.\n",
                   dec_worker->type);
            ret = -1;
            break;
        }
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "fail to open input file.\n");
            goto end;
        }
    }

    for (;;) {
        av_log(NULL, AV_LOG_INFO, "%s: stream=%d remaining iterations: %u\n",
               __func__, dec_worker->index, dec_worker->loop);

        switch (dec_worker->type)
        {
        case SW_VIDEO:
            ret = sw_decoder_run(dec_worker);
            break;
        case SW_PICTURE:
            ret = image_decoder_run(dec_worker);
            dec_worker->loop = 0;
            break;
        case HW_VIDEO:
            ret = hw_decoder_run(dec_worker);
            break;
        default:
            av_log(NULL, AV_LOG_ERROR, "invalid decoder type %d.\n",
                   dec_worker->type);
            break;
        }

        if (ret >= 0 && dec_worker->loop && !dec_worker->force_exit && !dec_worker->should_exit) {
            if (dec_worker->type == SW_VIDEO) {
                fseek(dec_worker->input_fp, 0, SEEK_SET);
            } else {
                ret = av_seek_frame(dec_worker->ifmt_ctx,
                                    -1,
                                    INT64_MIN,
                                    AVSEEK_FLAG_BYTE|AVSEEK_FLAG_BACKWARD);
            }
            av_log(NULL, AV_LOG_DEBUG, "%s: stream=%d Rereading input: %d\n",
                __func__, dec_worker->index, ret);
        } else
            break;
    }
    av_log(NULL, AV_LOG_INFO, "decoder %d total dec num %d.\n",
           dec_worker->index, dec_worker->decoded_frames);
    av_log(NULL, AV_LOG_ERROR, "decoder %d exit: ret=0x%x.\n",
           dec_worker->index, ret);

end:
    if (dec_worker->type != SW_PICTURE && !dec_worker->force_exit)
    {
        //shared memory debt can incurred if decoder closes before frames have
        //been recycled. after force_exit on reconfig there might be some frames left.
        av_log(NULL, AV_LOG_DEBUG, "%s: stream=%d waiting for recycle list to be empty\n",
               __func__, dec_worker->index);
        pthread_mutex_lock(&dec_worker->list_lock);
        while (get_fifo_size(dec_worker->wait_to_free_list) != 0)
        {
            pthread_cond_wait(&dec_worker->list_cond, &dec_worker->list_lock);
        }
        pthread_mutex_unlock(&dec_worker->list_lock);
        av_log(NULL, AV_LOG_DEBUG, "%s: stream=%d recycle list empty\n",
               __func__, dec_worker->index);
    }
    if (dec_worker->type != HW_VIDEO) {
        if (dec_worker->sws_ctx) {
            sws_freeContext(dec_worker->sws_ctx);
        }
        if (dec_worker->hwctx_upload) {
            av_buffer_unref(&dec_worker->hwctx_upload);
        }
        if (dec_worker->hwdevice_upload) {
            av_buffer_unref(&dec_worker->hwdevice_upload);
        }
    }

    if (dec_worker->decoded_frame) {
        av_frame_free(&dec_worker->decoded_frame);
        dec_worker->decoded_frame = NULL;
    }

    if (dec_worker->type != SW_VIDEO && dec_worker->input_stream->dec_ctx) {
        avcodec_free_context(&dec_worker->input_stream->dec_ctx);
    }

    if (dec_worker->input_stream) {
        av_free(dec_worker->input_stream);
    }

    if (dec_worker->ifmt_ctx) {
        avformat_close_input(&dec_worker->ifmt_ctx);
    }

    if (! dec_worker->force_exit) {
        pthread_mutex_lock(&dec_worker->common->lock);
        dec_worker->common->exit_dec_num++;
        av_log(NULL, AV_LOG_INFO, "exit dec %d\n", dec_worker->common->exit_dec_num);
        pthread_mutex_unlock(&dec_worker->common->lock);
    } else {
        // if dec_work is force exit
        // clean up and self destroy if early end, since main thread won't track
        // us any more
        ni_xstack_entry_t *e;
        int nb_entries;
        for (i = 0; i < filter_num; i++) {
            e = dec_worker->stack_entry[i];
            av_log(NULL, AV_LOG_DEBUG, "%s force exit recycle list size %d entry size %d index %d\n",
                   __func__,
                   get_fifo_size(dec_worker->wait_to_free_list),
                   get_fifo_size(dec_worker->stack_entry[0]->dec_frame_fifo), dec_worker->index);

            nb_entries = 0;

            av_log(NULL, AV_LOG_DEBUG, "src %s has %d entries.\n",
                e->name, nb_entries);

            av_frame_free(&e->first_frame);
            av_frame_free(&e->last_frame);

            pthread_mutex_lock(&dec_worker->list_lock);
            drain_fifo(dec_worker->wait_to_free_list, __func__, e->name);
            pthread_mutex_unlock(&dec_worker->list_lock);

            free(e);
        }

        cleanup_decoder_worker(dec_worker);

        free(dec_worker);
    }

    return (void *)((long)ret);
}



// FILTEER THREAD FUNCTION
// init filter description and output sequence
static void init_desc(filter_worker *f, int input, int output,
               filter_info f_info, output_info *out_info) {
    int i, sequence_index = 0;
    char suffix[1024] = {0};
    char prefix[512] = {0};
    char filter_description[3072] = {0};

    // generate the prefix of filter description
    for (i = 0; i < input; i++) {
        snprintf(prefix + strlen(prefix), sizeof(prefix) - strlen(prefix),"[%d:v]",i);
    }
    snprintf(prefix + strlen(prefix), sizeof(prefix) - strlen(prefix),"ni_quadra_xstack=");

    // generate the suffix of filter description
    if (output == 1) {
        int internal_link = 0;
        if (out_info[0].specific_res) {
            snprintf(suffix, sizeof(suffix),
                     "[internal%d];[internal%d]ni_quadra_scale=%d:%d",
                     internal_link, internal_link, out_info[0].width, out_info[0].height);
            internal_link++;
        }
        if (f_info.get_drawtext) {
            snprintf(suffix, sizeof(suffix),"[internal%d];[internal%d]ni_quadra_drawtext=%s",
                     internal_link, internal_link,f_info.drawtext_filter_desc);
            internal_link++;
        }
        if (f_info.need_pad) {
            snprintf(suffix, sizeof(suffix),"[internal%d];[internal%d]ni_quadra_pad=%s",
                     internal_link, internal_link,f_info.pad_filter_desc);
            internal_link++;
        }
        f->filters_sequence[sequence_index] = 0;
        sequence_index++;
    } else { // not support drawtext and pad for multiple output now
        snprintf(suffix, sizeof(suffix),"[in];[in]ni_quadra_split=%d:0:0",output);
        for (i = 0; i < output; i++) {
            if (!out_info[i].specific_res) {
                snprintf(suffix + strlen(suffix), sizeof(suffix) - strlen(suffix),"[out%d]",i);
                f->filters_sequence[sequence_index] = i;
                sequence_index++;
            } else {
                snprintf(suffix + strlen(suffix), sizeof(suffix) - strlen(suffix),"[in%d]",i);
            }
        }
        for (i = 0; i < output; i++) {
            if (!out_info[i].specific_res) {
                //do nothing
            } else {
                snprintf(suffix + strlen(suffix), sizeof(suffix) - strlen(suffix),";[in%d]ni_quadra_scale=%d:%d[out%d]",
                         i, out_info[i].width, out_info[i].height, i);
                f->filters_sequence[sequence_index] = i;
                sequence_index++;
            }
        }
    }

    snprintf(filter_description, sizeof(filter_description),"%s%s%s", prefix, f_info.filter_desc, suffix);
    strcpy(f->desc_preix, prefix);
    strcpy(f->desc_suffix, suffix);
    strcpy(f->filter_desc, filter_description);
    av_log(NULL, AV_LOG_INFO, "filter_desc: %s.\n", f->filter_desc);
}

// ni_xstack init, return 0 if all successful, -1 otherwise
static int init_xstack(filter_worker *f, int entries, int exits)
{
    int i, ret;
    f->init = 0;
    f->need_reconfig = 0;
    f->inputs = entries;
    f->outputs = exits;
    f->shortest = 0;
    if (strstr(f->filter_desc, "shortest=1")) {
        f->shortest = 1;
    }

    ret = pthread_mutex_init(&f->filter_lock, NULL);
    if (ret) {
        goto fail_init_filter;
    }
    ret = pthread_mutex_init(&f->ret_lock, NULL);
    if (ret) {
        goto fail_init_ret;
    }
    ret = pthread_cond_init(&f->init_cond, NULL);
    if (ret) {
        goto fail_init_cond;
    }
    for (i = 0; i < NI_MAX_XSTACK_INPUTS; i++) {
        f->src_pads[i] = NULL;
    }
    f->num_src_pads = 0;
    for (i = 0; i < NI_MAX_XSTACK_OUTPUTS; i++) {
        f->dst_pads[i] = NULL;
    }
    f->num_dst_pads = 0;

    f->filter_graph = NULL;
    // f->got_input_num = 0;
    f->input_eos_num = 0;
    f->filter_ret = 0;
    f->flushed = false;

    return 0;

fail_init_cond:
    pthread_mutex_destroy(&f->ret_lock);
fail_init_ret:
    pthread_mutex_destroy(&f->filter_lock);
fail_init_filter:
    return ret;
}

static void cleanup_filter_worker(filter_worker *worker) {
    if (worker) {
        pthread_mutex_destroy(&worker->filter_lock);
        pthread_mutex_destroy(&worker->ret_lock);
        pthread_cond_destroy(&worker->init_cond);
    }
}

static void uninit_filter_graph(filter_worker *f)
{
    if (f->filter_graph)
    {
        avfilter_graph_free(&f->filter_graph);
        for (int i = 0; i < f->num_src_pads; i++)
        {
            ni_xstack_entry_t *e = f->src_pads[i];
            if (e->worker->type == SW_PICTURE)
            {
                continue;
            }
            if (e->eos_flag)
            {
                av_log(NULL, AV_LOG_DEBUG, "%s %d %s %d eos stream, skip cleanup. last frame %p buf[0] %p data %p\n",
                       __func__, e->worker->index, e->name, e->eos_flag, e->last_frame,
                       e->last_frame ? e->last_frame->buf[0] : NULL,
                       e->last_frame && e->last_frame->buf[0] ? e->last_frame->buf[0]->data : NULL);
                continue;
            }
            fifo_print(e->dec_frame_fifo, e->name, e->worker->index, "uninit_filter_graph after graph free dec_frame_fifo");
            pthread_mutex_lock(&e->worker->list_lock);
            if (e->last_frame && e->last_frame->buf[0] && e->last_frame->buf[0]->data)
                av_log(NULL, AV_LOG_DEBUG, "%s free last ui16FrameIdx = [%d] ref_count = %d\n", __func__,
                       ((niFrameSurface1_t *)(e->last_frame->buf[0]->data))->ui16FrameIdx,
                       frame_get_ref_count(e->last_frame));
            av_frame_unref(e->last_frame);
            /*
            Cycle through the full list to try and free any remaining references.
            This will only clear out the frames from before the reconfig as newer
            frames' ref count will be 2 at this point, one for dec_frame_fifo and
            one for wait_to_free_list.
            */
            for (int j = 0; j < NI_MAX_DEC_CAPACITY; j++)
            {
                if (list_recycle_frames(e->worker->wait_to_free_list, e->name) < NI_MAX_DEC_CAPACITY)
                {
                    av_log(NULL, AV_LOG_DEBUG, "%s fifo size %d index %d\n", __func__,
                           get_fifo_size(e->worker->wait_to_free_list), e->worker->index);
                    pthread_cond_signal(&e->worker->list_cond);
                }
            }
            fifo_print(e->worker->wait_to_free_list, e->name, e->worker->index, "uninit_filter_graph after graph free wait_to_free_list");
            pthread_mutex_unlock(&e->worker->list_lock);
        }
        f->filter_graph = NULL;
    }
}

// ni_xstack filter graph initialization
// use filter description to init the filter graph
// link the inputs to entries and outputs to exits
static int init_filter_graph2(filter_worker *f)
{
    int i, j, ret = 0;
    char args[512] = { 0 };
    char name[32] = { 0 };
    AVFilterInOut *inputs, *outputs, *cur;

    f->filter_graph = avfilter_graph_alloc();
    if (f->filter_graph == NULL) {
        av_log(NULL, AV_LOG_ERROR, "failed to allocate filter graph\n");
        goto end;
    }

    // parse filter description and generate inputs and outputs
    // each input will link to a decoder
    // each output will link to a encoder through filters_sequence
    ret = avfilter_graph_parse2(f->filter_graph, f->filter_desc, &inputs, &outputs);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "failed to parse graph\n");
        goto end;
    }

    // process filter inputs
    // link entry to src pad
    int curr_worker_idx = -1;
    for (cur = inputs, i = 0; cur && i < f->num_src_pads; cur = cur->next, i++) {
        ni_xstack_entry_t *e = NULL;
        // get the pad that matches the incrementing index
        for (j = 0; j < f->num_src_pads; j++) {
            if (f->src_pads[j]->worker->index > curr_worker_idx &&
                f->src_pads[j]->worker->index >= i) {
                e = f->src_pads[j];
                curr_worker_idx = f->src_pads[j]->worker->index;
                break;
            }
        }
        if (! e) {
            av_log(NULL, AV_LOG_ERROR, "%s failed to get pad %d\n",
                   __func__, i);
            goto end;
        }

        snprintf(name, sizeof(name), "in_%d_%d", f->index, i);
        snprintf (args, sizeof (args),
                  "video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d:frame_rate=%d/%d",
                  e->width, e->height, e->hw_pixfmt,
                  e->time_base.num, e->time_base.den,
                  e->par.num, e->par.den,
                  e->fps.num, e->fps.den);

        av_log(NULL, AV_LOG_DEBUG, "input filter args: %s\n", args);
        ret = avfilter_graph_create_filter(&e->buffersrc_ctx, avfilter_get_by_name("buffer"),
                                           name, args, NULL, f->filter_graph);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "failed to create input filter: %d\n", i);
            goto end;
        }

        // set buffer src HW frames context
        e->buffersrc_par = av_buffersrc_parameters_alloc();
        e->buffersrc_par->hw_frames_ctx = e->first_frame->hw_frames_ctx;
        ret = av_buffersrc_parameters_set(e->buffersrc_ctx, e->buffersrc_par);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "%s av_buffersrc_parameters_set failed"
                   " for entity %s\n", __func__, e->name);
            goto end;
        }

        // connect buffer src (index 0) pad to one of ni_xstack's src
        // (index i) pads
        ret = avfilter_link(e->buffersrc_ctx, 0, cur->filter_ctx, cur->pad_idx);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "failed to link input filter: %d\n", i);
            goto end;
        }
    }

    // process filter outputs
    // create dst pad if not been created, it will only create for one time
    if (!f->num_dst_pads) {
        for (cur = outputs, i = 0; cur; cur = cur->next, i++) {
            f->num_dst_pads++;
        }
    }

    if (f->num_dst_pads != f->outputs) {
        av_log(NULL, AV_LOG_ERROR, "filter outputs not equal to file outputs dst_pad %d output %d",
        f->num_dst_pads, f->outputs);
        goto end;
    }
    // link entry to dst pad
    for (cur = outputs, i = f->num_dst_pads - 1; cur && i >= 0; cur = cur->next, i--) {

        ni_xstack_exit_t *e = f->dst_pads[f->filters_sequence[i]];
        snprintf(name, sizeof(name), "out_%d_%d", f->index, i);
        ret = avfilter_graph_create_filter(&e->buffersink_ctx, avfilter_get_by_name("buffersink"),
                                           name, NULL, NULL, f->filter_graph);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "failed to create output filter: %d\n", i);
            goto end;
        }

        // connect ni_xstack's dst (index i) pads to one of buffer sink
        // (index 0) pad
        ret = avfilter_link(cur->filter_ctx, cur->pad_idx, e->buffersink_ctx, 0);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "failed to link output filter: %d\n", i);
            goto end;
        }
    }

    // configure and validate the filter graph
    ret = avfilter_graph_config(f->filter_graph, NULL);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "%s failed to config graph filter\n",
               __func__);
        goto end;
    } else {
        av_log(NULL, AV_LOG_INFO, "%s success config graph filter %d %s\n",
               __func__, f->index, f->filter_desc);
        //av_log(NULL, AV_LOG_INFO, "%s filter graph dump:\n%s\n", __func__,
        //       avfilter_graph_dump(f->filter_graph, NULL));
    }

end:
    for (i = 0; i < f->num_src_pads; i++) {
        av_freep (&f->src_pads[i]->buffersrc_par);
    }

    avfilter_inout_free(&inputs);
    avfilter_inout_free(&outputs);
    return ret;
}

// run once for init
// Before calling this function, caller should have already locked f->filter_lock
// checks each decode stream's pts and timebase to find which has latest start time
void normalize_pts_for_sw_decoder(filter_worker* f)
{
    int i;
    AVRational fps = av_make_q(25, 1);
    AVRational tb = av_make_q(1, 1200000);
    bool get_param = false;
    for (i = 0; i < f->num_src_pads; i++) {
        ni_xstack_entry_t* e = f->src_pads[i];
        if (e->worker->type == HW_VIDEO) {
            if (e->first_frame->pts > f->latest_stream_start) {
                f->latest_stream_start = e->first_frame->pts;
            }
            if (!get_param) {
                fps = e->fps;
                tb = e->time_base;
                get_param = true;
            }
        }
    }
    for (i = 0; i < f->num_src_pads; i++) {
        ni_xstack_entry_t* e = f->src_pads[i];
        if (e->worker->type != HW_VIDEO) {
            e->first_frame->pts = f->latest_stream_start;
            e->last_pts = f->latest_stream_start;
            if (fps.num && tb.num) {
                e->fps = fps;
                e->time_base = tb;
                e->worker->pts_step = (tb.den * fps.den) / (tb.num * fps.num);
            }
        }
    }
    av_log(NULL, AV_LOG_DEBUG, "%s latest_stream_start %ld s\n",
           __func__, f->latest_stream_start);
}

static input_type get_input_type(const char *file_name)
{
    char *suffix = strrchr(file_name, '.');
    if (strcasecmp(suffix, ".yuv") == 0)  {
        return SW_VIDEO;
    }
    if (strcasecmp(suffix, ".jpg") == 0 || strcasecmp(suffix, ".jpeg") == 0 ||
        strcasecmp(suffix, ".png") == 0 || strcasecmp(suffix, ".bmp") == 0 ||
        strcasecmp(suffix, ".gif") == 0)  {
        return SW_PICTURE;
    }
    return HW_VIDEO;
}

// apply participant addition/removal and xstack layout changes to the
// stack filter graph; when called, this is being protected by f->filter_lock
static int reconfig_xstack_layout(ni_src_desc_t src_to_remove[NI_MAX_XSTACK_INPUTS],
                                  int num_src_to_remove,
                                  ni_src_desc_t src_to_add[NI_MAX_XSTACK_INPUTS],
                                  int num_src_to_add,
                                  char new_xstack_desc[NI_MAX_XSTACK_FILTER][2048])
{
    filter_worker *f = NULL;
    decoder_worker *to_remove[NI_MAX_XSTACK_INPUTS] = {0};
    decoder_worker *dec;
    int i, j, k, index, ret;
    int num_real_remove = 0;
    int total_active = active_decoder_workers;
    int total_pads = active_decoder_workers;
    // ni_xstack_entry_t *e;

    for (index = 0; index < filter_num; index++) {
        f = xstack_workers[index];
        f->last_filter_pts = f->src_pads[0]->last_frame->pts;
    }
    av_log(NULL, AV_LOG_INFO, "%s to_remove %d to_add %d!\n",
           __func__, num_src_to_remove, num_src_to_add);

    // remove input src
    for (i = 0; i < num_src_to_remove; i++) {
        for (j = 0; j < total_active; j++) {
            if (0 == strcmp(decoder_workers[j]->input_file,
                            src_to_remove[i].file_name))
            {
                to_remove[num_real_remove++] = decoder_workers[j];
                total_active--;

                for (k = j; k < total_active; k++) {
                    decoder_workers[k] = decoder_workers[k + 1];
                }
                decoder_workers[total_active] = NULL;
                break;
            }
        }
    }

    if (num_src_to_remove && num_real_remove != num_src_to_remove) {
        av_log(NULL, AV_LOG_ERROR, "%s some decoder didn't found for removal!"
               " please check the input file name\n", __func__);
        ret = -1;
        goto end;
    }

    for (i = 0; i < num_real_remove; i++) {
        dec = to_remove[i];

        pthread_mutex_lock(&dec->frame_lock);
        dec->force_exit = 1;
        pthread_mutex_unlock(&dec->frame_lock);

        av_log(NULL, AV_LOG_INFO, "%s decoder idx %d force_exit 1 !\n",
               __func__, dec->index);
    }

    if (num_real_remove > 0) {
        for (index = 0; index < filter_num; index++) {
            f = xstack_workers[index];
            pthread_mutex_lock(&f->filter_lock);
            total_pads = active_decoder_workers;
            for (i = 0; i < num_real_remove; i++) {
                dec = to_remove[i];
                for (j = 0; j < active_decoder_workers; j++) {
                    if (f->src_pads[j] && f->src_pads[j]->worker &&
                        f->src_pads[j]->worker == dec) {
                        // remove it from the src pads and move those behind forward
                        total_pads--;

                        for (k = j; k < total_pads; k++) {
                            f->src_pads[k] = f->src_pads[k + 1];
                        }
                        f->src_pads[total_pads] = NULL;
                        break;
                    }
                }
            }
            f->num_src_pads = total_pads;
            assert(total_active == total_pads);
            pthread_mutex_unlock(&f->filter_lock);
        }
        active_decoder_workers = total_active;
    }

    // add input src
    if (num_src_to_add && total_active + num_src_to_add >NI_MAX_XSTACK_INPUTS) {
        av_log(NULL, AV_LOG_ERROR, "%s existing %d + %d to add exceeding max "
               "allowed: %d, not adding !\n", __func__, total_active,
               num_src_to_add, NI_MAX_XSTACK_INPUTS);
        num_src_to_add = 0;
    }

    j = total_active; // the first empty slot in the decoder_workers array
    for (i = 0; i < num_src_to_add; i++) {
        decoder_workers[j] = calloc(1, sizeof(decoder_worker));
        if (! decoder_workers[j]) {
            av_log(NULL, AV_LOG_ERROR, "failed to alloc new decoder worker.\n");
            ret = -1;
            goto end;
        }

        decoder_workers[j]->xstack = decoder_workers[0]->xstack;
        decoder_workers[j]->index = decoder_workers[j - 1]->index + 1;
        strcpy(decoder_workers[j]->input_file, src_to_add[i].file_name);
        strcpy(decoder_workers[j]->decoder_name, src_to_add[i].decoder_name);
        strcpy(decoder_workers[j]->decoder_params, src_to_add[i].decoder_params);

        decoder_workers[j]->common = decoder_workers[0]->common;
        decoder_workers[j]->filter_common = decoder_workers[0]->filter_common;
        decoder_workers[j]->devid = decoder_workers[0]->devid;
        decoder_workers[j]->loop = decoder_workers[0]->loop;

        decoder_workers[j]->decoded_frame = av_frame_alloc();
        if (!decoder_workers[j]->decoded_frame) {
            av_log(NULL, AV_LOG_ERROR, "failed to alloc decoded_frame for new "
                   "decoder worker.\n");
            ret = -1;
            goto end;
        }

        ret = init_decoder_worker(decoder_workers[j]);
        if (ret) {
            av_log(NULL, AV_LOG_ERROR, "failed to init_decoder_worker for new.\n");
            goto end;
        }

        // set file open flag here so that: create_new_src_pad called in
        // open_input_file knows to find the first empty pad slot for it not
        // relying on its index, and the input file won't be opened again
        decoder_workers[j]->input_file_already_opened = 1;
        if (open_hw_input_file(decoder_workers[j],
                            decoder_workers[j]->decoder_name,
                            decoder_workers[j]->input_file) < 0)
        {
            av_log(NULL, AV_LOG_ERROR, "failed to open_input_file for new.\n");
            ret = -1;
            goto end;
        }

        // sync with the current stream (video only) in progress
        decoder_workers[j]->last_decoded_pts = f->last_filter_pts;

        av_log(NULL, AV_LOG_INFO, "%s allocated new dec worker at pos: %d idx "
               "%d last_decoded_pts set to: %ld\n",
               __func__, j, decoder_workers[j]->index,
               decoder_workers[j]->last_decoded_pts);

        j++;
    }

    total_pads = j;
    active_decoder_workers = j;

    // re-init xstack so filter graph can be re-generated based on the new
    // filter graph description
    if (num_src_to_add || num_src_to_remove) {
        for (index = 0; index < filter_num; index++) {
            f = xstack_workers[index];
            pthread_mutex_lock(&f->filter_lock);
            f->inputs = active_decoder_workers;
            f->shortest = 0;
            if (strstr(new_xstack_desc[index], "shortest=1")) {
                f->shortest = 1;
            }
            f->num_src_pads = total_pads;
            // now need to regenerate the filter description
            char filter_desc[2048] = {0};
            for (i = 0; i < total_pads; i++) {
                snprintf(filter_desc + strlen(filter_desc), sizeof(filter_desc) - strlen(filter_desc),"[%d:v]",i);
            }
            snprintf(filter_desc + strlen(filter_desc), sizeof(filter_desc) - strlen(filter_desc),"ni_quadra_xstack=");
            snprintf(filter_desc + strlen(filter_desc), sizeof(filter_desc) - strlen(filter_desc),"%s%s", new_xstack_desc[index], f->desc_suffix);
            strcpy(f->filter_desc, filter_desc);
            av_log(NULL, AV_LOG_INFO, "filter_desc: %s.\n", f->filter_desc);

            f->init = 0;
            f->input_eos_num = 0;
            pthread_mutex_unlock(&f->filter_lock);

            av_log(NULL, AV_LOG_INFO, "%s filter graph has active dec workers: %d, "
                "num_src_pads: %d\n", __func__, f->inputs, f->num_src_pads);
        }
    }



    pthread_attr_t attr;
    if ((ret = pthread_attr_init(&attr)) ||
        (ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE))) {
        av_log(NULL, AV_LOG_ERROR, "fail to init attr: %s.\n", strerror(ret));
    } else {
        // and spawn newly added decoder threads
        for (i = total_active; i < active_decoder_workers; i++) {
            dec = decoder_workers[i];
            if (pthread_create(&dec->tid, &attr, &dec_worker_thread_run, dec)) {
                av_log(NULL, AV_LOG_ERROR, "failed to create decoder thread %d"
                       "\n", dec->index);
            }
        }
        pthread_attr_destroy(&attr);
    }

end:
    return ret;
}

// read reconfig info from file
static int read_reconfig_file_and_apply_update(const char *reconf_file) {
    ni_src_desc_t src_to_remove[NI_MAX_XSTACK_INPUTS] = {{{0}}};
    int num_src_to_remove = 0;
    ni_src_desc_t src_to_add[NI_MAX_XSTACK_INPUTS] = {{{0}}};
    int num_src_to_add = 0;
    int i, index = 0, modify_num = 0;
    char index_expr[3];
    char new_xstack_desc[NI_MAX_XSTACK_FILTER][2048] = {0};
    FILE *reconf = NULL;
    char one_line[2048] = {0};
    int parse_filter = 0;
    char *curr = NULL;
    char *str_start = NULL;

    reconf = fopen(reconf_file, "r");
    if (! reconf) {
        av_log(NULL, AV_LOG_ERROR, "ERROR: %s: Cannot open reconfig_file: %s\n",
               __func__, reconf_file);
        return -1;
    }

    while (fgets(one_line, sizeof(one_line), reconf)) {
        curr = one_line;
        skip_blank(curr)

        if (*curr == '-') {
            // get src input file name to remove: terminated by blank or '\n'
            curr++;
            skip_blank(curr)
            str_start = curr;
            while (*curr && ! isblank(*curr) && *curr != '\n') {
                curr++;
            }
            strncpy(src_to_remove[num_src_to_remove].file_name, str_start,
                    curr - str_start);
            src_to_remove[num_src_to_remove].file_name[curr - str_start] = '\0';

            // check the reconfig file name, not support for yuv and image
            if (get_input_type(src_to_remove[num_src_to_remove].file_name) != HW_VIDEO)  {
                av_log(NULL, AV_LOG_ERROR, "not support to remove image or yuv input\n");
                return -1;
            }
            num_src_to_remove++;
        } else if (*curr == '+') {
            // get src input file
            curr++;
            skip_blank(curr)
            str_start = curr;
            while (*curr && ! isblank(*curr) && *curr != '\n') {
                curr++;
            }
            strncpy(src_to_add[num_src_to_add].file_name, str_start,
                    curr - str_start);
            src_to_add[num_src_to_add].file_name[curr - str_start] = '\0';

            // check the reconfig file name, not support for yuv and image
            if (get_input_type(src_to_add[num_src_to_add].file_name) != HW_VIDEO)  {
                av_log(NULL, AV_LOG_ERROR, "not support to remove image or yuv input\n");
                return -1;
            }
            if (*curr == '\n') {
                av_log(NULL, AV_LOG_ERROR, "ERROR: %s <%s> missing decoder "
                       "name, line ignored.\n",
                       __func__, one_line);
                continue;
            } else {
                // get decoder name
                skip_blank(curr)
                str_start = curr;
                while (*curr && ! isblank(*curr) && *curr != '\n' ) {
                    curr++;
                }
                strncpy(src_to_add[num_src_to_add].decoder_name, str_start,
                        curr - str_start);
                src_to_add[num_src_to_add].decoder_name[curr - str_start]= '\0';

                // get decoder param
                skip_blank(curr)
                if (*curr == '\n') {
                    av_log(NULL, AV_LOG_INFO, "%s <%s> missing decoder param, set out=hw.\n",
                           __func__, one_line);
                    strcpy(src_to_add[num_src_to_add].decoder_params, "out=hw");
                } else {
                    str_start = curr;
                    while (*curr && ! isblank(*curr) && *curr != '\n' ) {
                        curr++;
                    }
                    strncpy(src_to_add[num_src_to_add].decoder_params, str_start,
                            curr - str_start);
                    src_to_add[num_src_to_add].decoder_params[curr - str_start]= '\0';
                }
                num_src_to_add++;
            }
        } else if (*curr == 'f' || parse_filter) {
            // get new xstack filter description until file end, stop at each
            // line terminating at \ or \n
            if (! parse_filter) {
                parse_filter = 1;
                curr++;
            }

            // get filter index
            if (isdigit(*curr) == 0) {
                index = 0;
                modify_num++;
            } else {
                if (isdigit(*(curr + 1)) == 0) {
                    strncpy(index_expr, curr, 2);
                    index_expr[3] = '\0';
                    curr += 2;
                } else {
                    strncpy(index_expr, curr, 1);
                    index_expr[2] = '\0';
                    curr += 1;
                }
                index = atoi(index_expr);
                modify_num++;
            }
            // get filter desc
            skip_blank(curr)
            str_start = curr;
            while (*curr && *curr != '\\' && *curr != '\n') {
                curr++;
            }
            strncpy(&new_xstack_desc[index][strlen(new_xstack_desc[index])], str_start,
                    curr - str_start);
            new_xstack_desc[index][strlen(new_xstack_desc[index]) + curr - str_start] = '\0';
            parse_filter = 0;
        } else if (*curr == '\n') {
            ; // just ignore empty line
        } else {
            // get filter desc
            if (! parse_filter) {
                av_log(NULL, AV_LOG_ERROR, "ERROR: %s <%s> not accepted.\n",
                       __func__, one_line);
            } else {
                skip_blank(curr)
                str_start = curr;
                while (*curr && *curr != '\\' && *curr != '\n') {
                    curr++;
                }
                strncpy(&new_xstack_desc[index][strlen(new_xstack_desc[index])], str_start,
                        curr - str_start);
                new_xstack_desc[index][strlen(new_xstack_desc[index]) + curr - str_start] = '\0';
                parse_filter = 0;
            }
        }
    }

    fclose(reconf);

    if (modify_num != filter_num) {
        av_log(NULL, AV_LOG_ERROR, "%s reconfig filter number %d must equal"
                " to orignal filter number %d\n",
                __func__, modify_num, filter_num);
        return -1;
    }

    for (i = 0; i < num_src_to_remove; i++) {
        av_log(NULL, AV_LOG_INFO, "%s src to remove %d: <%s>\n", __func__, i,
               src_to_remove[i].file_name);
    }
    for (i = 0; i < num_src_to_add; i++ ) {
        av_log(NULL, AV_LOG_INFO, "%s src to add %d: <%s> <%s>\n", __func__, i,
               src_to_add[i].file_name, src_to_add[i].decoder_name);
    }

    int final_active = active_decoder_workers - num_src_to_remove + num_src_to_add;
    if ((num_src_to_remove || num_src_to_add) && final_active < NI_MAX_XSTACK_INPUTS &&
        final_active > 1) {
        for (i = 0; i < modify_num; i++) {
            av_log(NULL, AV_LOG_INFO, "%s num_src_to_remove %d num_src_to_add %d\n"
                "New filter desc: <%s>.\n",
                __func__, num_src_to_remove, num_src_to_add, new_xstack_desc[i]);
        }
        if (reconfig_xstack_layout(src_to_remove, num_src_to_remove,
                                    src_to_add, num_src_to_add, new_xstack_desc) < 0) {
            av_log(NULL, AV_LOG_ERROR, "%s reconfig_xstack_layout failed\n", __func__);
            return -1;
        }

    } else {
        av_log(NULL, AV_LOG_ERROR, "%s can not get src to add or remove\n", __func__);
        return -1;
    }
    return 0;
}

// flush the filter at the end of filter
static int flush_filter(filter_worker *f) {
    int i, ret, flush_num;
    ni_xstack_entry_t *e;
    ni_xstack_exit_t *exit;

    pthread_mutex_lock(&f->ret_lock);
    f->flushed = true;
    pthread_mutex_unlock(&f->ret_lock);
    for (i = 0; i < f->num_src_pads; i++) {
        e = f->src_pads[i];
        pthread_mutex_lock(&e->lock);
        av_log(NULL, AV_LOG_DEBUG, "%s filter %d dec_frame_fifo %d size %d empty %d\n", __func__,
               f->index, e->worker->index,
               get_fifo_size(e->dec_frame_fifo),
               is_fifo_empty(e->dec_frame_fifo));
        while (!is_fifo_empty(e->dec_frame_fifo)) {
#if IS_FFMPEG_70_AND_ABOVE
            av_fifo_read(e->dec_frame_fifo, &(e->first_frame), 1);
#else
            av_fifo_generic_read(e->dec_frame_fifo, &(e->first_frame), sizeof(AVFrame*), NULL);
#endif
            av_log(NULL, AV_LOG_DEBUG, "%s filter %d index %d stream frame -> pts=%ld,dts=%ld\n",
                   __func__, f->index, e->worker->index, e->first_frame->pts, e->first_frame->pkt_dts);
            av_frame_free(&e->first_frame);
        }
        av_frame_unref(e->last_frame);
        pthread_mutex_unlock(&e->lock);
        if (f->init) {
            ret = av_buffersrc_add_frame_flags(
                    e->buffersrc_ctx, NULL,
                    AV_BUFFERSRC_FLAG_KEEP_REF | AV_BUFFERSRC_FLAG_PUSH);
            if (ret < 0) {
                av_log(NULL, AV_LOG_ERROR, "%s av_buffersrc_add_frame_flags got error %d\n",
                    __func__, ret);
                goto end;
            }
        }
    }

    for (i = 0; i < f->num_src_pads; i++) {
        e = f->src_pads[i];
        if (e->worker->type == SW_PICTURE) {
            continue;
        }
        int attempt_cnt = 0;
        pthread_mutex_lock(&e->worker->list_lock);
        while (get_fifo_size(e->worker->wait_to_free_list)) {
            if (list_recycle_frames(e->worker->wait_to_free_list, e->name) < NI_MAX_DEC_CAPACITY) {
                av_log(NULL, AV_LOG_DEBUG, "fifo size %d index %d\n",
                        get_fifo_size(e->worker->wait_to_free_list), e->worker->index);
            }
            if (++attempt_cnt > NI_MAX_DEC_CAPACITY) {
                av_log(NULL, AV_LOG_INFO, "fail to recycle the last frame now, recycle later, fifo size %d index %d\n",
                        get_fifo_size(e->worker->wait_to_free_list), e->worker->index);
                break;
            }
        }
        pthread_mutex_unlock(&e->worker->list_lock);
    }

    while (f->init) {
        for (i = 0; i < f->num_dst_pads; i++) {
            exit = f->dst_pads[i];
            ret = av_buffersink_get_frame(exit->buffersink_ctx, exit->filter_frame);

            if (ret < 0 && ret != AVERROR (EAGAIN) && ret != AVERROR_EOF) {
                av_log(NULL, AV_LOG_ERROR, "%s av_buffersink_get_frame got error %d\n",
                       __func__, ret);
                goto end;
            } else if (ret == AVERROR (EAGAIN) || ret == AVERROR_EOF) {
                av_log(NULL, AV_LOG_INFO, "%s av_buffersink_get_frame got %s\n",
                        __func__, ret == AVERROR (EAGAIN) ? "EAGAIN" : "EOF");
                ret = 0;
                goto end;
            } else {
                ret = send_encode_frame(exit->enc_worker, exit->filter_frame);

                if (ret < 0)
                {
                    av_log(NULL, AV_LOG_ERROR, "%s: send_encode_frame ERROR !!! num\n",
                           __func__);
                    goto end;
                }
                av_frame_unref(exit->filter_frame);
                f->filtered_frames++;
            }
        }
    }
end:
    pthread_mutex_lock(&f->filter_common->lock);
    flush_num = ++(f->filter_common->flush_num);
    pthread_mutex_unlock(&f->filter_common->lock);
    if (flush_num < filter_num)
    {
        av_log(NULL, AV_LOG_INFO, "%s %d / %d filters remaining. Shouldn't drain wait list\n",
               __func__, (filter_num - flush_num), filter_num);
        return ret;
    }
    av_log(NULL, AV_LOG_INFO, "%s All %d / %d filters flushed. Drain wait list\n",
           __func__, flush_num, filter_num);
    for (i = 0; i < f->num_src_pads; i++)
    {
        e = f->src_pads[i];
        if (e->worker->type == SW_PICTURE)
        {
            continue;
        }
        pthread_mutex_lock(&e->worker->list_lock);
        drain_fifo(e->worker->wait_to_free_list, __func__, e->name);
        pthread_cond_signal(&e->worker->list_cond);
        pthread_mutex_unlock(&e->worker->list_lock);
    }
    return ret;
}

// tell encoder filter have finish
static int finish_filter(filter_worker *f) {
    int i;
    encoder_worker *enc_worker;
    for (i = 0; i < f->outputs; i++) {
        enc_worker = f->dst_pads[i]->enc_worker;
        pthread_mutex_lock(&enc_worker->frame_lock);
        enc_worker->filter_flush = 1;
        if (is_fifo_empty(enc_worker->enc_frame_fifo)) {
            pthread_cond_signal(&enc_worker->consume_cond);
        }
        pthread_mutex_unlock(&enc_worker->frame_lock);
    }
    return 0;
}

static filt_dec_frame_t filter_dec_frame_read(filter_worker *f, ni_xstack_entry_t *e)
{
    int wait_list_size;

    pthread_mutex_lock(&e->lock);
    if (e->eos_flag && is_fifo_empty(e->dec_frame_fifo))
    {
        av_log(NULL, AV_LOG_DEBUG, "%s dec %s %s eos %d. eos current/total  %d/%d\n",
               __func__, e->name, e->worker->input_file, e->eos_flag,
               f->input_eos_num, f->num_src_pads);
        av_frame_free(&e->first_frame);
        e->first_frame = NULL;
        pthread_mutex_unlock(&e->lock);
        return FILT_DEC_FRAME_VALID;
    }
    if (is_fifo_empty(e->dec_frame_fifo))
    {
        pthread_mutex_lock(&e->worker->list_lock);
        wait_list_size = get_fifo_size(e->worker->wait_to_free_list);
        pthread_mutex_unlock(&e->worker->list_lock);
        // check for deadlock conditions
        if (                                                // f->input_eos_num != f->num_src_pads && // if all decoders have reached eos, don't wait
            wait_list_size < NI_MAX_DEC_CAPACITY &&         // if wait list is full, don't wait
            (DONE_RECONFIG == f->filter_common->reconfig || // if doing reconfig, don't wait
             NO_RECONFIG == f->filter_common->reconfig))
        {
            av_log(NULL, AV_LOG_DEBUG, "%s dec frame wait %s %s\n", __func__, e->name, e->worker->input_file);
            pthread_cond_wait(&e->frame_cond, &e->lock);
            if (is_fifo_empty(e->dec_frame_fifo))
            {
                av_log(NULL, AV_LOG_DEBUG, "%s dec frame unavailable %s %s. reconfig %d. eos %d/%d\n",
                       __func__, e->name, e->worker->input_file, f->filter_common->reconfig,
                       f->input_eos_num, f->num_src_pads);
                pthread_mutex_unlock(&e->lock);
                return FILT_DEC_FRAME_SKIP;
            }
            else
            {
                av_log(NULL, AV_LOG_DEBUG, "%s dec frame available %s %s\n",
                       __func__, e->name, e->worker->input_file);
            }
        }
        else
        {
            av_log(NULL, AV_LOG_DEBUG, "%s dec frame wait skip %s %s. Wait list is full dec wont generate a frame. "
                                       "Continue to allow frame to be unreferenced and wait spot to be released."
                                       "reconfig %d\n",
                   __func__, e->name, e->worker->input_file, f->filter_common->reconfig);
            pthread_mutex_unlock(&e->lock);
            return FILT_DEC_FRAME_SKIP;
        }
    }
#if IS_FFMPEG_70_AND_ABOVE
    av_fifo_read(e->dec_frame_fifo, &(e->first_frame), 1);
#else
    av_fifo_generic_read(e->dec_frame_fifo, &(e->first_frame), sizeof(AVFrame *), NULL);
#endif

    if (e->first_frame->pts == -1)
    {
        av_frame_free(&e->first_frame);
        pthread_mutex_unlock(&e->lock);
        av_log(NULL, AV_LOG_ERROR, "%s %s decoder finished. eos %d/%d\n",
               __func__, e->name, get_decoder_eos_num_total(f), f->num_src_pads);
        av_frame_free(&e->first_frame);
        e->first_frame = NULL;//send NULL to filter indicate EOF
        pthread_mutex_unlock(&e->lock);
        return FILT_DEC_FRAME_VALID;
    }

    e->first_frame->extended_data = e->first_frame->data;
    av_log(NULL, AV_LOG_DEBUG,
           "%s dec frame get %s %s ui16FrameIdx = [%d] ref_count = %d from fifo %p\n",
           __func__, e->name, e->worker->input_file,
           ((niFrameSurface1_t *)(e->first_frame->buf[0]->data))->ui16FrameIdx,
           av_buffer_get_ref_count(e->first_frame->buf[0]), e->dec_frame_fifo);

    // recalculate pts for yuv and image
    if (e->worker->type != HW_VIDEO)
    {
        e->first_frame->pts = e->last_pts + e->worker->pts_step;
        e->last_pts = e->first_frame->pts;
        av_log(NULL, AV_LOG_DEBUG, "pts %ld step %d\n", e->last_pts, e->worker->pts_step);
    }

    if (e->worker->type == SW_PICTURE && get_fifo_size(e->dec_frame_fifo) < NI_MAX_DEC_CAPACITY)
    {
        av_log(NULL, AV_LOG_DEBUG, "eos fifo size is %d\n", get_fifo_size(e->dec_frame_fifo));
        pthread_cond_signal(&e->eos_cond);
    }
    pthread_mutex_unlock(&e->lock);
    return FILT_DEC_FRAME_VALID;
}

// filter thread routine
// central filtering processing
// a decoded frame getting into filter graph to be processed
static void *filter_thread_run(void *thread_data)
{
    filter_worker *f = (filter_worker *)thread_data;
    int ret, i, eof_cnt;
    ni_xstack_entry_t *e;
    ni_xstack_exit_t *exit;
    filt_dec_frame_t frame_status;
    int64_t last_pts = 0;
    while (1) {
        pthread_mutex_lock(&f->filter_lock);
        // filter thread exit when all the decoder threads finish
        if (f->input_eos_num == f->num_src_pads && f->init) {
            if (entry_empty(f)) {
                av_log(NULL, AV_LOG_INFO, "%s all %d video decoders flushed, filter %d ready to flush\n",
                       __func__, f->input_eos_num, f->index);
                pthread_mutex_unlock(&f->filter_lock);
                goto end;
            }
            av_log(NULL, AV_LOG_ERROR, "%s need to flush the filter\n", __func__);
        }

        if (!f->init) {
            av_log(NULL, AV_LOG_INFO, "filter init wait\n");
            if (f->filter_common->reconfig == NO_RECONFIG){
                pthread_cond_wait(&f->init_cond, &f->filter_lock);
            }

            av_log(NULL, AV_LOG_INFO, "src pad number %d\n", f->num_src_pads);
            uninit_filter_graph(f);
            pthread_mutex_unlock(&f->filter_lock);

            for (i = 0; i < f->num_src_pads; i++) {
                e = f->src_pads[i];
                pthread_mutex_lock(&e->lock);
                if (is_fifo_empty(e->dec_frame_fifo)) {
                    av_log(NULL, AV_LOG_INFO, "%s init dec frame wait %s %s\n", __func__, e->name, e->worker->input_file);
                    pthread_cond_wait(&e->frame_cond, &e->lock);
                    av_log(NULL, AV_LOG_INFO, "%s init dec frame available %s %s\n", __func__, e->name, e->worker->input_file);
                }
#if IS_FFMPEG_70_AND_ABOVE
                av_fifo_read(e->dec_frame_fifo, &(e->first_frame), 1);
#else
                av_fifo_generic_read(e->dec_frame_fifo, &(e->first_frame), sizeof(AVFrame*), NULL);
#endif
                e->first_frame->extended_data = e->first_frame->data;
                av_log(NULL, AV_LOG_INFO,
                       "%s init dec frame get %s %s ui16FrameIdx = [%d] ref_count = %d from fifo %p\n",
                       __func__, e->name, e->worker->input_file, ((niFrameSurface1_t *)(e->first_frame->buf[0]->data))->ui16FrameIdx, av_buffer_get_ref_count(e->first_frame->buf[0]), e->dec_frame_fifo);
                pthread_mutex_unlock(&e->lock);
            }
            // init pts for yuv and image inputs
            pthread_mutex_lock(&f->filter_lock);
            normalize_pts_for_sw_decoder(f);
            for (i = 0; i < f->outputs; i++) {
                create_new_dst_pad(f,i);
            }
            ret = init_filter_graph2(f);
            if (ret < 0) {
                av_log(NULL, AV_LOG_ERROR, "init filter graph failed\n");
                for (i = 0; i < f->num_src_pads; i++) {
                    av_frame_free(&e->first_frame);
                }
                pthread_mutex_unlock(&f->filter_lock);
                goto end;
            }
            else {
                av_log(NULL, AV_LOG_INFO, "init filter graph success\n");
            }
            f->init = 1;
            pthread_mutex_unlock(&f->filter_lock);

            pthread_mutex_lock(&f->filter_lock);
            for (i = 0; i < f->num_src_pads; i++) {
                e = f->src_pads[i];
                ret = av_buffersrc_add_frame_flags(
                    e->buffersrc_ctx, e->first_frame,
                    AV_BUFFERSRC_FLAG_KEEP_REF | AV_BUFFERSRC_FLAG_PUSH);
                if (ret < 0 && ret != AVERROR_EOF && ret != AVERROR (EAGAIN)) {
                    pthread_mutex_unlock(&f->filter_lock);
                    av_log(NULL, AV_LOG_ERROR, "%s av_buffersrc_add_frame_flags"
                           " first frame failed for %s %d\n", __func__, e->name, ret);
                    goto end;
                }

                // record the last frame if decoder not reach eof
                if (!e->eos_flag) {
                    // av_frame_unref(e->last_frame);
                    if (! (e->last_frame->buf[0] && e->first_frame->buf[0] &&
                        e->last_frame->buf[0]->data == e->first_frame->buf[0]->data)) {
                        av_frame_ref(e->last_frame, e->first_frame);
                    }
                }
                av_frame_free(&e->first_frame);

                av_log(NULL, AV_LOG_INFO, "%s av_buffersrc_add_frame_flags"
                       " first frames fed to filter\n", __func__);
            }
            pthread_mutex_unlock(&f->filter_lock);
            av_log(NULL, AV_LOG_INFO, "init_finish\n");
        } else {
            pthread_mutex_unlock(&f->filter_lock);
            for (i = 0; i < f->num_src_pads; i++) {
                e = f->src_pads[i];
                frame_status = filter_dec_frame_read(f, e);
                if (FILT_DEC_FRAME_SKIP == frame_status)
                {
                    continue;
                }
                else if (FILT_DEC_FRAME_EXIT == frame_status)
                {
                    goto finish;
                }
                ret = av_buffersrc_add_frame_flags(
                    e->buffersrc_ctx, e->first_frame,
                    AV_BUFFERSRC_FLAG_KEEP_REF | AV_BUFFERSRC_FLAG_PUSH);
                if (ret < 0 && ret != AVERROR_EOF && ret != AVERROR (EAGAIN)) {
                    av_log(NULL, AV_LOG_ERROR, "%s av_buffersrc_add_frame_flags"
                           " add frame failed for %s %d\n", __func__, e->name, ret);
                    goto end;
                }

                // record the last frame if decoder not reach eof
                if (!e->eos_flag) {
                    pthread_mutex_lock(&e->lock);
                    last_pts = e->last_frame->pts;
                    av_frame_unref(e->last_frame);
                    if (! (e->last_frame->buf[0] && e->first_frame->buf[0] &&
                        e->last_frame->buf[0]->data == e->first_frame->buf[0]->data)) {
                        if (last_pts != -1 && e->first_frame != NULL)//decoder signaled exit dont save a reference
                            av_frame_ref(e->last_frame, e->first_frame);
                    }
                    pthread_mutex_unlock(&e->lock);
                }
                av_frame_free(&e->first_frame);
            }

            // recycle and unref the hw frame if it not ref by filter anymore,
            // it will signal to decoder threads
            for (i = 0; i < f->num_src_pads; i++) {
                e = f->src_pads[i];
                if (e->worker->type == SW_PICTURE) {
                    continue;
                }
                pthread_mutex_lock(&e->worker->list_lock);
                if (list_recycle_frames(e->worker->wait_to_free_list, e->name) < NI_MAX_DEC_CAPACITY) {
                    av_log(NULL, AV_LOG_DEBUG, "%s fifo size %d index %d\n", __func__,
                           get_fifo_size(e->worker->wait_to_free_list), e->worker->index);
                    pthread_cond_signal(&e->worker->list_cond);
                }
                pthread_mutex_unlock(&e->worker->list_lock);
            }
        }

        // try to pull filtered frames from every filter exit
        // put all filtered frames to encoder fifo buffer
        eof_cnt = 0;
        for (i = 0; i < f->num_dst_pads; i++) {
            exit = f->dst_pads[i];
            ret = av_buffersink_get_frame(exit->buffersink_ctx, exit->filter_frame);

            if (ret < 0 && ret != AVERROR (EAGAIN) && ret != AVERROR_EOF) {
                av_log(NULL, AV_LOG_ERROR, "%s av_buffersink_get_frame got error %d\n",
                    __func__, ret);
                goto end;
            } else if (ret == AVERROR (EAGAIN) || ret == AVERROR_EOF) {
                av_log(NULL, AV_LOG_INFO, "%s av_buffersink_get_frame got %s\n",
                       __func__,
                       ret == AVERROR(EAGAIN) ? "EAGAIN" : "EOF");
                if (ret == AVERROR_EOF) {
                    eof_cnt++;
                }
                if (! f->src_pads[0]->last_frame) {
                    av_log(NULL, AV_LOG_ERROR, "%s av_buffersink_get_frame got "
                        "%s src frame NULL, return 0 !\n", __func__,
                        ret == AVERROR (EAGAIN) ? "EAGAIN" : "EOF");
                    ret = 0;
                    goto end;
                }
            }
            else {
                av_log(NULL, AV_LOG_DEBUG, "%s after av_buffersink_get_frame f->filter_frame %p data %p extended_data %p\n",
                    __func__, exit->filter_frame, exit->filter_frame->data, exit->filter_frame->extended_data);
                niFrameSurface1_t *p_data3 = NULL;
                if (exit->filter_frame->data[3]) {
                    p_data3 = (niFrameSurface1_t*)(exit->filter_frame->data[3]);
                }
                av_log(NULL, AV_LOG_DEBUG, "%s av_buffersink_get_frame got "
                    "one frame out: %p ui16FrameIdx = [%d], hdl=%d bitdep=%d, %d x %d\n",
                    __func__, exit->filter_frame->data[3], p_data3->ui16FrameIdx,
                    p_data3->device_handle, p_data3->bit_depth,
                    p_data3->ui16width, p_data3->ui16height);

                if (!exit->enc_worker->started) {
                    exit->enc_worker->timebase = av_buffersink_get_time_base(exit->buffersink_ctx);
                    av_log(NULL, AV_LOG_DEBUG, "%s av_buffersink_get_time_base %d/%d\n",
                        __func__, exit->enc_worker->timebase.num,
                        exit->enc_worker->timebase.den);
                }

                ret = send_encode_frame(exit->enc_worker, exit->filter_frame);
                if (ret < 0)
                {
                    av_log(NULL, AV_LOG_ERROR, "%s: send_encode_frame ERROR !!!\n", __func__);
                }
                f->filtered_frames++;
                av_frame_unref(exit->filter_frame);
            }
        }
        if (eof_cnt == f->num_dst_pads) {
            av_log(NULL, AV_LOG_INFO, "%s all %d encoder flushed, filter %d ready to flush\n",
                   __func__, eof_cnt, f->index);
            goto end;
        }

        // now if there is any reconfig need to do, go ahead and make the change !
        // only one filter thread will do reconfig, other filter need to wait
        pthread_mutex_lock(&f->filter_common->lock);
        // reconfig will block the decoder threads, and continue to do filter until list empty
        if (f->filter_common->reconfig && entry_empty(f)) {
            av_log(NULL, AV_LOG_INFO, "reconfig status %d threads waiting for reconfig done %d\n", f->filter_common->reconfig, reconfig_wait_count);
            if (f->filter_common->reconfig == DO_RECONFIG) {
                f->filter_common->reconfig = WAIT_RECONFIG;
                pthread_mutex_unlock(&f->filter_common->lock);
                //wait for all the decoder threads to pause
                while (reconfig_wait_count != f->num_src_pads)
                {
                    for (i = 0; i < f->num_src_pads; i++)
                    {
                        e = f->src_pads[i];
                        /* In case input stream(s) have ended and it is waiting
                         for longest input to finish. Wake the thread up to acknowledge reconfig.
                        */
                        pthread_mutex_lock(&e->lock);
                        pthread_cond_broadcast(&e->eos_cond);
                        pthread_mutex_unlock(&e->lock);
                        if (e->worker->type == SW_PICTURE)
                        {
                            continue;
                        }
                        // Unblock decoder thread in case it is already waiting on wait_to_free_list
                        pthread_mutex_lock(&e->worker->list_lock);
                        pthread_cond_signal(&e->worker->list_cond);
                        pthread_mutex_unlock(&e->worker->list_lock);
                    }
                }
                // for (i = 0; i < f->num_src_pads; i++)
                // {
                //     e = f->src_pads[i];
                //     if (e->worker->type == SW_PICTURE)
                //     {
                //         continue;
                //     }
                //     fifo_print(e->dec_frame_fifo, e->name, e->worker->index, "filter_thread_run reconfig dec_frame_fifo");
                //     pthread_mutex_lock(&e->worker->list_lock);
                //     fifo_print(e->worker->wait_to_free_list, e->name, e->worker->index, "filter_thread_run reconfig wait_to_free_list");
                //     pthread_mutex_unlock(&e->worker->list_lock);
                // }
                // need to wait all the filter threads do reconfig at same time
                pthread_mutex_lock(&f->filter_common->lock);
                if (filter_num > 1) {
                    f->filter_common->ready_num++;
                    pthread_cond_wait(&f->filter_common->start_cond, &f->filter_common->lock);
                }
                //All dec and filter threads are now waiting.
                //Safe to set Done here to not block new dec threads
                f->filter_common->reconfig = DONE_RECONFIG;
                ret = read_reconfig_file_and_apply_update(NI_XSTACK_RECONFIG_FILE_NAME);
                // tell filter threads reconfig has finish and need to reinit filter
                if (ret < 0) {
                    av_log(NULL, AV_LOG_ERROR, "%s reconfig failed, work as before.\n", __func__);
                    // goto end;
                }
                if (filter_num > 1) {
                    pthread_cond_broadcast(&f->filter_common->finish_cond);
                }
                // signal to continue blocked decoder threads
                pthread_cond_broadcast(&f->filter_common->reconfig_cond);
                pthread_mutex_unlock(&f->filter_common->lock);
                av_log(NULL, AV_LOG_INFO, "%s threads waiting for reconfig done %d\n", __func__, reconfig_wait_count);
                while (reconfig_wait_count)
                {
                }
                av_log(NULL, AV_LOG_INFO, "%s all threads waiting for reconfig done released\n", __func__);
            } else if (f->filter_common->reconfig == WAIT_RECONFIG) {
                f->filter_common->ready_num++;
                av_log(NULL, AV_LOG_INFO, "filter index %d ready num %d\n", f->index, f->filter_common->ready_num);
                if (f->filter_common->ready_num == filter_num) {
                    pthread_cond_signal(&f->filter_common->start_cond);
                }
                pthread_cond_wait(&f->filter_common->finish_cond, &f->filter_common->lock);
                av_log(NULL, AV_LOG_INFO, "filter index %d finish\n", f->index);
            } else {
                f->filter_common->reconfig = NO_RECONFIG;
            }
        }
        pthread_mutex_unlock(&f->filter_common->lock);
    }
end:
    pthread_mutex_lock(&f->ret_lock);
    f->filter_ret = ret;
    pthread_mutex_unlock(&f->ret_lock);
    for (i = 0; i < f->num_src_pads; i++) {
        e = f->src_pads[i];
        pthread_mutex_lock(&e->worker->list_lock);
        pthread_cond_broadcast(&e->worker->list_cond);
        pthread_mutex_unlock(&e->worker->list_lock);
    }
finish:
    flush_filter(f);
    for (i = 0; i < f->num_src_pads; i++) {
        e = f->src_pads[i];
        if (e->eos_flag) {
            pthread_mutex_lock(&e->lock);
            pthread_cond_broadcast(&e->eos_cond);
            pthread_mutex_unlock(&e->lock);
        }
    }
    ret = finish_filter(f);
    av_log(NULL, AV_LOG_INFO, "filter %d filter num %d.\n", f->index, f->filtered_frames);
    av_log(NULL, AV_LOG_ERROR, "filter %d exit: ret=0x%x.\n", f->index, ret);
    return (void *)((long)ret);
}



// ENCODER THREAD FUNCTION
// init the encoding task
static int init_encoder_worker(encoder_worker *enc_worker)
{
    int ret;

    ret = pthread_mutex_init(&enc_worker->frame_lock, NULL);
    if (ret) {
        goto fail_init_frame_lock;
    }

    ret = pthread_cond_init(&enc_worker->consume_cond, NULL);
    if (ret) {
        goto fail_init_consume_cond;
    }

    ret = pthread_cond_init(&enc_worker->produce_cond, NULL);
    if (ret) {
        goto fail_init_produce_cond;
    }

    // enc only cache one frame
#if LIBAVCODEC_VERSION_MAJOR >= 61 //7.0
    enc_worker->enc_frame_fifo = av_fifo_alloc2(1, sizeof(AVFrame*), AV_FIFO_FLAG_AUTO_GROW);
#else
    enc_worker->enc_frame_fifo = av_fifo_alloc_array(1, sizeof(AVFrame*));
#endif
    if (! enc_worker->enc_frame_fifo) {
        goto fail_init_fifo;
    }

    enc_worker->encoded_frames = enc_worker->encoder_output_frames = 0;
    enc_worker->filter_flush = 0;
    enc_worker->should_exit = THREAD_STATE_RUNNING;
    enc_worker->started = 0;
    enc_worker->buffered_frame = av_frame_alloc();

    if (! enc_worker->buffered_frame) {
        goto fail_init_buf;
    }

    return 0;

fail_init_buf:
    free_fifo(enc_worker->enc_frame_fifo);
fail_init_fifo:
    pthread_cond_destroy(&enc_worker->produce_cond);
fail_init_produce_cond:
    pthread_cond_destroy(&enc_worker->consume_cond);
fail_init_consume_cond:
    pthread_mutex_destroy(&enc_worker->frame_lock);
fail_init_frame_lock:
    return -1;
}

static void cleanup_encoder_worker(encoder_worker *worker)
{
    if (worker) {
        pthread_mutex_destroy(&worker->frame_lock);
        pthread_cond_destroy(&worker->consume_cond);
        pthread_cond_destroy(&worker->produce_cond);
    }
}

// open output file (video only for now), return 0 on success, -1 otherwise
static int open_output_file(encoder_worker *enc_worker, const AVFrame *frame)
{
    const char *codec_name = enc_worker->encoder_name;
    const char *output_file = enc_worker->output_name;

    AVStream *out_stream;
    AVCodecContext *enc_ctx;
    const AVCodec *encoder;
    int ret;
    AVFormatContext *ofmt_ctx = NULL;
    OutputStream *output_stream;

    av_log(NULL, AV_LOG_INFO, "%s open encoder %s output file %s index %d\n",
           __func__, codec_name, output_file, enc_worker->index);
    output_stream = av_mallocz(sizeof(OutputStream));
    if (! output_stream) {
        av_log(NULL, AV_LOG_ERROR, "failed to allocate output stream\n");
        goto fail_init_output_stream;
    }

    if (! strcmp(output_file, "pipe:")) {
        ret = avformat_alloc_output_context2(&ofmt_ctx, NULL, "m4v", "pipe:");
    } else if (!strcmp(output_file, "null")) {
        avformat_alloc_output_context2(&ofmt_ctx, NULL, output_file, NULL);
    } else {
        /* Note: The file extension string should be in output_file here for
                 avformat_alloc_output_context2() to auto-detect output format
        */
        ret = avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL,
                                             output_file);
    }
    if (! ofmt_ctx) {
        av_log(NULL, AV_LOG_ERROR, "%s Could not create output context\n",
            __func__);
        goto fail_init_ofmt_ctx;
    }

    output_stream->last_encoded_pts = AV_NOPTS_VALUE;
    output_stream->last_encoded_dts = AV_NOPTS_VALUE;

    out_stream = avformat_new_stream(ofmt_ctx, NULL);
    if (! out_stream) {
        av_log(NULL, AV_LOG_ERROR, "Failed allocating output stream\n");
        goto fail_init_out_stream;
    }

    encoder = avcodec_find_encoder_by_name(codec_name);
    if (! encoder) {
        av_log(NULL, AV_LOG_FATAL, "Necessary encoder %s not found\n",
               codec_name);
        goto fail_init_out_stream;
    }

    enc_ctx = avcodec_alloc_context3(encoder);
    if (! enc_ctx) {
        av_log(NULL, AV_LOG_FATAL, "Failed to allocate the encoder context\n");
        goto fail_init_out_stream;
    }

    enc_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
    enc_ctx->width = frame->width;
    enc_ctx->height = frame->height;
    enc_ctx->sample_aspect_ratio = frame->sample_aspect_ratio;
    enc_ctx->time_base = enc_worker->timebase;

    out_stream->codecpar->codec_id = encoder->id;
    enc_ctx->pix_fmt = AV_PIX_FMT_NI_QUAD;
    enc_ctx->sw_pix_fmt = AV_PIX_FMT_YUV420P;

    if (strstr(encoder->name, "quadra")) {
        char str_devid[4] = {0};

        snprintf(str_devid, sizeof(str_devid), "%d", enc_worker->device_id);
        av_opt_set(enc_ctx->priv_data, "enc", str_devid, 0);

        if (enc_worker->encoder_params) {
            av_opt_set(enc_ctx->priv_data, "xcoder-params",
                       enc_worker->encoder_params, 0);
        }
    }

    // open the codec
    ret = avcodec_open2(enc_ctx, encoder, NULL);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "%s avcodec_open2 cannot open video encoder %s\n",
               __func__, codec_name);
        goto fail_init_out_stream;
    }

    ret = avcodec_parameters_from_context(out_stream->codecpar, enc_ctx);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Stream failed to copy encoder parameters\n");
        goto fail_init_out_stream;
    }

    out_stream->time_base = enc_ctx->time_base;

    av_log(NULL, AV_LOG_INFO, "out_stream->time_base = %d/%d\n",
           out_stream->time_base.num, out_stream->time_base.den);

    output_stream->enc_ctx = enc_ctx;

    av_dump_format(ofmt_ctx, 0, output_file, 1);

    if (! (ofmt_ctx->oformat->flags & AVFMT_NOFILE)) {
        ret = avio_open(&ofmt_ctx->pb, output_file, AVIO_FLAG_WRITE);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "Could not open output file '%s'",
                   output_file);
            goto fail_init_out_stream;
        }
    }

    // init muxer, write output file header
    ret = avformat_write_header(ofmt_ctx, NULL);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Error occurred when opening output file\n");
        goto fail_init_out_stream;
    }

    enc_worker->output_stream = output_stream;
    enc_worker->ofmt_ctx = ofmt_ctx;

    return 0;


fail_init_out_stream:
    avformat_free_context(ofmt_ctx);
fail_init_ofmt_ctx:
    av_free(output_stream);
fail_init_output_stream:

    return -1;
}

// write a frame to encoder and try to get an encoded frame back
static int encoder_write_frame(encoder_worker *enc_worker, AVFrame *filt_frame,
                               int *got_frame)
{
    int ret;
    AVPacket enc_pkt = {0};
    AVFormatContext *ofmt_ctx = enc_worker->ofmt_ctx;
    OutputStream *output_stream = enc_worker->output_stream;

    av_log(NULL, AV_LOG_DEBUG, "%s frame %p data %p extended_data %p\n",
           __func__, filt_frame, filt_frame ? filt_frame->data : NULL, filt_frame ? filt_frame->extended_data : NULL);

    *got_frame = 0;

    /* encode filtered frame */
    enc_pkt.data = NULL;
    enc_pkt.size = 0;

    if (filt_frame && ! enc_worker->force_source_keyframe) {
        filt_frame->pict_type = AV_PICTURE_TYPE_NONE;
    }

    ret = avcodec_send_frame(output_stream->enc_ctx, filt_frame);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "%s avcodec_send_frame fail to send frame %d"
               "\n",  __func__, ret);
        return ret;
    }

    while (1) {
        ret = avcodec_receive_packet(output_stream->enc_ctx, &enc_pkt);
        if (ret >= 0) {
            if (enc_pkt.size && enc_pkt.data) {
                *got_frame = 1;
            }
        } else if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
            av_log(NULL, AV_LOG_DEBUG, "%s: avcodec_receive_packet got %s "
                   " %d\n",  __func__, ret == AVERROR(EAGAIN) ? "AGAIN" : "EOF",
                   ret);
            *got_frame = 0;
            ret = 0;
        } else {
            av_log(NULL, AV_LOG_ERROR, "%s: avcodec_receive_packet fail to "
                   "receive packet %d\n", __func__, ret);
        }

        if (ret < 0 || ! (*got_frame))
            break;

        enc_worker->encoder_output_frames++;

        if (filt_frame) {
            av_log(NULL, AV_LOG_DEBUG, "encoder mux -> pts=%ld,dts=%ld\n",
                   enc_pkt.pts, enc_pkt.dts);
        } else {
            av_log(NULL, AV_LOG_DEBUG, "encoder mux -> flush pts=%ld,dts=%ld\n",
                   enc_pkt.pts, enc_pkt.dts);
        }

        if ((output_stream->last_encoded_pts != AV_NOPTS_VALUE) &&
            (enc_pkt.pts == output_stream->last_encoded_pts)) {
            av_log(NULL, AV_LOG_ERROR, "%s same pts!!! pts=%ld,last_pts=%ld\n",
                   __func__, enc_pkt.pts, output_stream->last_encoded_pts);
        }
        if ((output_stream->last_encoded_dts != AV_NOPTS_VALUE) &&
            (enc_pkt.dts <= output_stream->last_encoded_dts)) {
            av_log(NULL, AV_LOG_ERROR, "%s Non-monotonically increasing dts!!! "
                   "dts=%ld,last_dts=%ld\n",
                   __func__, enc_pkt.dts, output_stream->last_encoded_dts);
        }

        output_stream->last_encoded_pts = enc_pkt.pts;
        output_stream->last_encoded_dts = enc_pkt.dts;

        /* prepare packet for muxing */
        av_log(NULL, AV_LOG_DEBUG, "%s encoder stream mux <- pts=%ld,dts=%ld\n",
               __func__, enc_pkt.pts, enc_pkt.dts);
        /* mux encoded frame */
        ret = av_interleaved_write_frame(ofmt_ctx, &enc_pkt);
        av_packet_unref(&enc_pkt);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "encoder stream interleaved write error\n");
            break;
        }
    }

    return ret;
}

static int flush_encoder(encoder_worker *enc_worker)
{
    int ret;
    int got_frame;
    OutputStream *output_stream = enc_worker->output_stream;
    if (!output_stream)
        return 0;
    if (! (output_stream->enc_ctx->codec->capabilities & AV_CODEC_CAP_DELAY))
        return 0;

    do {
        av_log(NULL, AV_LOG_DEBUG, "Flushing stream encoder\n");
        ret = encoder_write_frame(enc_worker, NULL, &got_frame);
        if (ret < 0)
            break;
        if (! got_frame)
            return 0;
    } while (0);
    return ret;
}

// encoder thread routine
static void *encoder_thread_run(void *thread_data)
{
    int ret = 0, got_frame = 0;
    encoder_worker *enc_worker = (encoder_worker *)thread_data;
    while (1) {
        pthread_mutex_lock(&enc_worker->frame_lock);
        while (is_fifo_empty(enc_worker->enc_frame_fifo)) {
            // flush the encoder if filter has flushed
            if (enc_worker->filter_flush) {
                av_log(NULL, AV_LOG_ERROR, "%s filter flushed, encoder %d ready to flush\n",
                       __func__, enc_worker->index);
                pthread_mutex_unlock(&enc_worker->frame_lock);
                goto flush;
            }

            if (! enc_worker->should_exit) {
                pthread_cond_wait(&enc_worker->consume_cond, &enc_worker->frame_lock);
            } else {
                pthread_mutex_unlock(&enc_worker->frame_lock);
                goto flush;
            }
        }

        // read encode frame from encoder fifo buffer
#if IS_FFMPEG_70_AND_ABOVE
        av_fifo_read(enc_worker->enc_frame_fifo, &enc_worker->filtered_frame, 1);
#else
        av_fifo_generic_read(enc_worker->enc_frame_fifo, &enc_worker->filtered_frame, sizeof(AVFrame*), NULL);
#endif
        if (enc_worker->filtered_frame->data != enc_worker->filtered_frame->extended_data) {
            av_log(NULL, AV_LOG_DEBUG, "%s frame %p data %p != extended_data %p\n",
                   __func__, enc_worker->filtered_frame, enc_worker->filtered_frame->data,
                   enc_worker->filtered_frame->extended_data);
            enc_worker->filtered_frame->extended_data = enc_worker->filtered_frame->data;
        }

        if (! enc_worker->started) {
            if (open_output_file(enc_worker, enc_worker->filtered_frame)) {
                pthread_mutex_unlock(&enc_worker->frame_lock);
                break;
            }
            enc_worker->started = 1;
        }

        enc_worker->encoded_frames++;
        av_log(NULL, AV_LOG_DEBUG, "encoder encoding total=%lu, output total="
               "%lu, %dx%d,pts=%lu,dts=%lu\n",
               enc_worker->encoded_frames, enc_worker->encoder_output_frames,
               enc_worker->filtered_frame->width, enc_worker->filtered_frame->height,
               enc_worker->filtered_frame->pts, enc_worker->filtered_frame->pkt_dts);

        ret = encoder_write_frame(enc_worker, enc_worker->filtered_frame, &got_frame);

        av_frame_unref(enc_worker->filtered_frame);
        if (ret < 0) {
            goto end;
        }
        pthread_cond_signal(&enc_worker->produce_cond);
        av_log(NULL, AV_LOG_DEBUG, "encode one frame finish\n");
        pthread_mutex_unlock(&enc_worker->frame_lock);
    } // while

flush:
    ret = flush_encoder(enc_worker);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "encoder flushing encoder failed\n");
    }

end:
    if (enc_worker->started) {
        av_write_trailer(enc_worker->ofmt_ctx);
        if (! (enc_worker->ofmt_ctx->oformat->flags & AVFMT_NOFILE)) {
            avio_closep(&enc_worker->ofmt_ctx->pb);
        }
        avcodec_free_context(&enc_worker->output_stream->enc_ctx);

        if (enc_worker->output_stream) {
            av_free(enc_worker->output_stream);
        }

        if (enc_worker->ofmt_ctx) {
            avformat_free_context(enc_worker->ofmt_ctx);
        }
    }
    av_log(NULL, AV_LOG_INFO, "encoder %d enc num %ld.\n", enc_worker->index, enc_worker->encoded_frames);
    av_log(NULL, AV_LOG_ERROR, "encoder %d exit ret=0x%x.\n", enc_worker->index, ret);
    enc_worker->should_exit = THREAD_STATE_EXIT_ISSUED;

    pthread_mutex_lock(&enc_worker->common->lock);
    enc_worker->common->exit_enc_num++;
    av_log(NULL, AV_LOG_INFO, "exit enc %d\n", enc_worker->common->exit_enc_num);
    pthread_mutex_unlock(&enc_worker->common->lock);

    return (void *)((long)ret);
}



// MAIN THREAD FUNCTION
unsigned short  BMP=0x4D42,
                JPG=0xD8FF,
                PNG[4]={0x5089,0x474E,0x0A0D,0x0A1A},
                GIF[3]={0x4947,0x3846,0x6139};

// check the output resolution
static inline int check_resolution(int width, int height) {
    if (width % 2 || height % 2) {
        return -1;
    }
    if (width < MIN_WIDTH || width > MAX_WIDTH || height < MIN_HEIGHT || height > MAX_HEIGHT) {
        return -1;
    }
    return 0;
}

// get resolution from args, format like widthxheight
static int read_resolution(const char *args, int *width, int *height) {
    char *ch = NULL;
    *width = strtoul(args, &ch, 10);
    if (*ch != 'x') {
        av_log(NULL, AV_LOG_ERROR, "invalid resolution format %s\n",
                args);
        return -1;
    }
    *height = strtoul(ch + 1, NULL, 10);
    if (check_resolution(*width, *height) < 0) {
        av_log(NULL, AV_LOG_ERROR, "invalid resolution value %s\n",
                args);
        return -1;
    }
    return 0;
}

// read filter description from file
static int read_filter(const char *filter_desc, char *dest) {
    if (strstr(filter_desc, "inputs=") && strstr(filter_desc, ":layout=")) {
        strcpy(dest, filter_desc);
        return 0;
    } else {
        FILE *file = fopen(filter_desc, "r");
        if (! file) {
            av_log(NULL, AV_LOG_ERROR, "ERROR: %s: Cannot open filter file: %s\n",
                __func__, filter_desc);
            return -1;
        }
        char one_line[1536] = {0};
        if (fgets(one_line, sizeof(one_line), file)) {
            strcpy(dest, one_line);
            return 0;
        } else {
            return -1;
        }
    }
}

// judge the input name of -I is image or not
// only support format: bmp, png, jpg
static int is_image(const char *name)
{
    FILE *file;
    unsigned short pos[5];
    file = fopen(name,"r");
    if(!file) {
        av_log(NULL, AV_LOG_ERROR, "ERROR: %s: Cannot open filter file: %s\n",
                __func__, name);
        return -1;
    }

    fread(pos, 8, 1, file);

    if(pos[0]==BMP)
    {
        return 1;
    }
    else if(pos[0]==JPG)
    {
        return 2;
    }
    else if(PNG[0]==pos[0]&&PNG[1]==pos[1]&&PNG[2]==pos[2]&&PNG[3]==pos[3])
    {
        return 3;
    }
    else if(GIF[0]==pos[0]&&GIF[1]==pos[1]&&GIF[2]==pos[2])
    {
        return 0;
    }
    return -1;
}

// calculate the number of all encoded frames
static inline unsigned long cur_total_frames()
{
    int i, j, total_frame = 0;
    for (i = 0; i < filter_num; i++) {
        for (j = 0; j < xstack_workers[i]->outputs; j++) {
            total_frame += encoder_workers[i][j]->encoder_output_frames;
        }
    }
    return total_frame;
}

static void help_usage(void)
{
    printf("Usage: \n"
            "-i | --input                    input video file name.\n"
            "-I | --input_image              input image file name.\n"
            "-d | --decoder                  decoder name.\n"
            "-p | --decoder-params           decoder parameters.\n"
            "-S | --input-res                only for yuv input, yuv resolution, must set fot yuv input\n"
            "-P | --scale-res                for sw input like yuv and picture , scaled resolution, if use default, "
                                             "it will not do scale in most situation\n"
            "-l | --loop                     number of input cycles.\n"
            "-e | --encoder                  encoder name.\n"
            "-x | --encoder-params           encoder parameters.\n"
            "-o | --output                   output file name.\n"
            "-s | --resolution               output file resolution, in the format of Width x Height, -s is optional, "
                                             "and if present, is associated with the output file name preceding it.\n"
            "-f | --filter                   ni_quadra_xstack filter description.\n"
            "-t | --text                     ni_quadra_drawtext filter description.\n"
            "-F | --pad                      ni_quadra_pad filter description\n"
            "-n | --devid                    device id.\n"
            "-v | --loglevel                 available debug level: warning, info, debug, trace.\n"
            "-h | --help                     print this help information.\n");
}

void setup_loglevel(char *loglevel)
{
    if (loglevel) {
        if (!strcmp(loglevel, "error")) {
            av_log_set_level(AV_LOG_ERROR);
        } else if (!strcmp(loglevel, "warning")) {
            av_log_set_level(AV_LOG_WARNING);
        } else if (!strcmp(loglevel, "info")) {
            av_log_set_level(AV_LOG_INFO);
        } else if (!strcmp(loglevel, "debug")) {
            av_log_set_level(AV_LOG_DEBUG);
        } else if (!strcmp(loglevel, "trace")) {
            av_log_set_level(AV_LOG_TRACE);
        } else {
            av_log_set_level(AV_LOG_INFO);
        }
    } else {
        av_log_set_level(AV_LOG_INFO);
    }
}

static void print_report(int is_last_report,
        int64_t timer_start, int64_t cur_time,
        unsigned long frame_number)
{
    static int64_t last_time = -1;
    float t;
    char buf[1024];
    float fps;

    if (!print_stat)
        return;

    if (!is_last_report) {
        if (last_time == -1) {
            last_time = cur_time;
            return;
        }
        if ((cur_time - last_time) < 500000)
            return;
        last_time = cur_time;
    }

    t = (cur_time - timer_start) / 1000000.0;

    fps = t > 1 ? frame_number / t : 0;
    if (print_stat) {
        const char end = is_last_report ? '\n' : '\r';

        buf[0] = '\0';
        snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "frame=%5lu fps=%3.*f ",
                frame_number, (fps < 9.95), fps);
        if (AV_LOG_INFO > av_log_get_level()) {
            fprintf(stderr, "%s   %c", buf, end);
        } else {
            av_log(NULL, AV_LOG_INFO, "%s   %c", buf, end);
        }
        fflush(stderr);
    }
}

static void sigint_handler(void)
{
    global_stop = 1;
    av_log(NULL, AV_LOG_INFO, "%s().\n", __func__);
}

void *signal_handler(void *arg)
{
    sigset_t *set = arg;
    int ret, sig;
    struct timespec timeout;
    siginfo_t info;
    filter_worker *f;

    timeout.tv_sec = 0;
    timeout.tv_nsec = 100000000; // 100ms timeout

    // loop to wait for signals and handle them
    while (! global_stop) {
        sig = -1;
        ret = sigtimedwait(set, &info, &timeout);
        if (ret > 0) {
            av_log(NULL, AV_LOG_ERROR, "Received signal %d ret %d\n", info.si_signo, ret);
            sig = info.si_signo;
        } else if (ret < 0) {
            if (EAGAIN != errno) {
                av_log(NULL, AV_LOG_ERROR, "Error sigtimedwait < 0, errno not EAGAIN: %d\n", errno);
            } else {
                av_log(NULL, AV_LOG_DEBUG, "sigtimedwait EAGAIN !\n");
                ret = 0;
            }
        } else {
            av_log(NULL, AV_LOG_ERROR, "sigtimedwait ret 0, should not happen ?\n");
        }

        switch (sig) {
        case SIGINT:
            sigint_handler();
            break;
        case SIGUSR1:
            f = xstack_workers[0];
            pthread_mutex_lock(&f->filter_common->lock);
            f->filter_common->reconfig = DO_RECONFIG;
            pthread_mutex_unlock(&f->filter_common->lock);
            sigaddset(set, SIGUSR1);
            break;
        default:
            continue;
        }
    }

    av_log(NULL, AV_LOG_ERROR, "%s end.\n", __func__);
    return (void *)((long)ret);
}


int main(int argc, char **argv)
{
    int ret, i, j;
    int state = EXIT_SUCCESS;
    common *common = NULL;
    filter_common *filter_common = NULL;
    int force_source_keyframe = 0;

    int input_num = 0;
    int picture_num = 0;
    input_info in_info[NI_MAX_XSTACK_INPUTS];
    memset(in_info, 0, sizeof(input_info) * NI_MAX_XSTACK_INPUTS);
    for (i = 0; i < NI_MAX_XSTACK_INPUTS; i++) {
        in_info[i].got_decoder = false;
        in_info[i].got_params = false;
        in_info[i].type = HW_VIDEO;
        in_info[i].input_width = 0;
        in_info[i].input_height = 0;
        in_info[i].scaled_width = 0;
        in_info[i].scaled_height = 0;
    }
    unsigned int dec_loop = 1;

    filter_info f_info[NI_MAX_XSTACK_FILTER];
    memset(f_info, 0, sizeof(filter_info) * NI_MAX_XSTACK_FILTER);
    for (i = 0; i < NI_MAX_XSTACK_FILTER; i++) {
        f_info[i].get_drawtext = false;
        f_info[i].need_pad = false;
    }

    int output_num[NI_MAX_XSTACK_FILTER] = {0};
    output_info out_info[NI_MAX_XSTACK_FILTER][NI_MAX_XSTACK_OUTPUTS];
    memset(out_info, 0, sizeof(output_info) * NI_MAX_XSTACK_FILTER * NI_MAX_XSTACK_OUTPUTS);
    for (i = 0; i < NI_MAX_XSTACK_FILTER; i++) {
        output_num[i] = 0;
        for (j = 0; j < NI_MAX_XSTACK_OUTPUTS; j++) {
            out_info[i][j].width = 0;
            out_info[i][j].height = 0;
            out_info[i][j].specific_res = false;
        }
    }
    int temp_out_num = 0;

    char *encoder_name = NULL;
    char *encoder_params = NULL;

    pthread_attr_t attr;
    void *result;

    int devid = 0;
    pthread_t sighandler_tid;
    sigset_t sig_set;
    char *loglevel = NULL;
    int opt;
    int opt_index;
    const char *opt_string = "i:I:d:p:S:P:l:o:e:x:s:f:t:F:n:v:hk";
    static struct option long_options[] = {
        {"input",                 required_argument, NULL, 'i'},
        {"input_image",           required_argument, NULL, 'I'},
        {"decoder",               required_argument, NULL, 'd'},
        {"decoder-params",        required_argument, NULL, 'p'},
        {"input-res",             required_argument, NULL, 'S'},
        {"scale-res",             required_argument, NULL, 'P'},
        {"loop",                  required_argument, NULL, 'l'},
        {"encoder",               required_argument, NULL, 'e'},
        {"encoder-params",        required_argument, NULL, 'x'},
        {"output",                required_argument, NULL, 'o'},
        {"resolution",            required_argument, NULL, 's'},
        {"filter",                required_argument, NULL, 'f'},
        {"text",                  required_argument, NULL, 't'},
        {"pad",                   required_argument, NULL, 'F'},
        {"devid",                 required_argument, NULL, 'n'},
        {"loglevel",              required_argument, NULL, 'v'},
        {"help",                  no_argument,       NULL, 'h'},
        {"force-keyframe",        no_argument,       NULL, 'k'},
        { NULL,                   0,                 NULL,  0 },
    };

    while ((opt = getopt_long(argc, argv, opt_string, long_options, &opt_index)) != -1) {
        switch (opt) {
            case 'i':
                input_num++;
                if (input_num < 1 || input_num > NI_MAX_XSTACK_INPUTS) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d input files\n",
                           NI_MAX_XSTACK_INPUTS);
                    help_usage();
                    return EXIT_FAILURE;
                }
                strcpy(in_info[input_num - 1].input_name, optarg);
                break;
            case 'I':
                if (is_image(optarg) <= 0 || get_input_type(optarg) != SW_PICTURE) {
                    av_log(NULL, AV_LOG_ERROR, "Not support this kind of picture or gif %s\n",
                           optarg);
                    return EXIT_FAILURE;
                }
                input_num++;
                picture_num++;
                if (input_num < 1 || input_num > NI_MAX_XSTACK_INPUTS) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d input files\n",
                           NI_MAX_XSTACK_INPUTS);
                    help_usage();
                    return EXIT_FAILURE;
                }
                strcpy(in_info[input_num - 1].input_name, optarg);
                in_info[input_num - 1].type = SW_PICTURE;
                break;
            case 'd':
                if (input_num < 1 || input_num > NI_MAX_XSTACK_INPUTS) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d decoders\n",
                           NI_MAX_XSTACK_INPUTS);
                    help_usage();
                    return EXIT_FAILURE;
                }
                strcpy(in_info[input_num - 1].decoder_name, optarg);
                in_info[input_num - 1].got_decoder = true;
                break;
            case 'p':
                strcpy(in_info[input_num - 1].decoder_params, optarg);
                in_info[input_num - 1].got_params = true;
                break;
            case 'S':
                if (input_num < 1 || input_num > NI_MAX_XSTACK_INPUTS) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d input files\n",
                           NI_MAX_XSTACK_INPUTS);
                    help_usage();
                    return EXIT_FAILURE;
                }
                if (read_resolution(optarg, &in_info[input_num - 1].input_width,
                    &in_info[input_num - 1].input_height) < 0) {
                    return EXIT_FAILURE;
                }
                in_info[input_num - 1].type = SW_VIDEO;
                break;
            case 'P':
                if (input_num < 1 || input_num > NI_MAX_XSTACK_INPUTS) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d input files\n",
                           NI_MAX_XSTACK_INPUTS);
                    help_usage();
                    return EXIT_FAILURE;
                }
                if (read_resolution(optarg, &in_info[input_num - 1].scaled_width,
                    &in_info[input_num - 1].scaled_height) < 0) {
                    return EXIT_FAILURE;
                }
                break;
            case 'f':
                filter_num++;
                if (filter_num < 1 || filter_num > NI_MAX_XSTACK_FILTER) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d filter\n",
                           NI_MAX_XSTACK_FILTER);
                    help_usage();
                    return EXIT_FAILURE;
                }
                if (read_filter(optarg, f_info[filter_num - 1].filter_desc) < 0) {
                    av_log(NULL, AV_LOG_ERROR, "fail to read filter description or filter description invalid\n");
                    return EXIT_FAILURE;
                }
                break;
            case 't':
                if (filter_num < 1 || filter_num > NI_MAX_XSTACK_FILTER) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d filter\n",
                           NI_MAX_XSTACK_FILTER);
                    help_usage();
                    return EXIT_FAILURE;
                }
                strcpy(f_info[filter_num - 1].drawtext_filter_desc, optarg);
                f_info[filter_num - 1].get_drawtext = true;
                break;
            case 'F':
                if (filter_num < 1 || filter_num > NI_MAX_XSTACK_FILTER) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d filter\n",
                           NI_MAX_XSTACK_FILTER);
                    help_usage();
                    return EXIT_FAILURE;
                }
                strcpy(f_info[filter_num - 1].pad_filter_desc, optarg);
                f_info[filter_num - 1].need_pad = true;
                break;
            case 'e':
                encoder_name = optarg;
                break;
            case 'o':
                output_num[filter_num - 1]++;
                temp_out_num = output_num[filter_num - 1] - 1;
                if (output_num[filter_num - 1] > NI_MAX_XSTACK_OUTPUTS) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d output files\n",
                           NI_MAX_XSTACK_OUTPUTS);
                    help_usage();
                    return EXIT_FAILURE;
                }
                if (output_num[filter_num - 1] < 1) {
                    av_log(NULL, AV_LOG_ERROR, "invalid output number %d\n", output_num[filter_num - 1]);
                    help_usage();
                    return EXIT_FAILURE;
                }
                strcpy(out_info[filter_num - 1][temp_out_num].output_name, optarg);
                break;
            case 's':
                if (output_num[filter_num - 1] > NI_MAX_XSTACK_OUTPUTS) {
                    av_log(NULL, AV_LOG_ERROR, "Error, exceeding max %d resolution files\n",
                           NI_MAX_XSTACK_OUTPUTS);
                    help_usage();
                    return EXIT_FAILURE;
                }
                if (output_num[filter_num - 1] < 1) {
                    av_log(NULL, AV_LOG_INFO, "should to set resolution after output file, skip current resolution %s\n",
                           optarg);
                    break;
                }
                if (out_info[filter_num - 1][temp_out_num].specific_res) {
                    av_log(NULL, AV_LOG_INFO, "current output resolution has been set, skip current resolution %s\n",
                           optarg);
                    break;
                }
                if (read_resolution(optarg, &out_info[filter_num - 1][temp_out_num].width,
                                    &out_info[filter_num - 1][temp_out_num].height) < 0) {
                    return EXIT_FAILURE;
                }
                out_info[filter_num - 1][temp_out_num].specific_res = true;
                break;
            case 'x':
                encoder_params = optarg;
                break;
            case 'l':
                dec_loop = strtoul(optarg, NULL, 10);
                break;
            case 'n':
                devid = atoi(optarg);
                break;
            case 'v':
                loglevel = optarg;
                break;
            case 'h':
                help_usage();
                return EXIT_SUCCESS;
            case 'k':
                force_source_keyframe = 1;
                break;
            default:
                av_log(NULL, AV_LOG_ERROR, "can not parse the arg '-%c %s'\n",
                       opt, optarg);
                help_usage();
                return EXIT_FAILURE;
        }
    }

    setup_loglevel(loglevel);

    // check the input parameters
    if (! filter_num) {
        av_log(NULL, AV_LOG_ERROR, "Error missing -f filter description.\n");
        return EXIT_FAILURE;
    }

    if (input_num == picture_num) {
        av_log(NULL, AV_LOG_ERROR,
               "Error, must have a video input.\n");
        return EXIT_FAILURE;
    }

    for (i = 0; i < input_num; i++) {
        if (!in_info[i].got_decoder && in_info[i].type == HW_VIDEO) {
            av_log(NULL, AV_LOG_ERROR,
                "Error, didn't get decoder name for input file %d.\n", i);
            return EXIT_FAILURE;
        }
        if (in_info[i].type == SW_VIDEO && (in_info[i].input_width == 0 || in_info[i].input_height == 0)) {
            av_log(NULL, AV_LOG_ERROR,
                "Error, didn't get width or height for yuv input file %d.\n", i);
            return EXIT_FAILURE;
        }
    }

    if (input_num < 2) {
        av_log(NULL, AV_LOG_ERROR,
               "Error number of input files less than 2\n");
        return EXIT_FAILURE;
    }

    for (i = 0; i < filter_num; i++) {
        if (output_num[i] < 1) {
            av_log(NULL, AV_LOG_ERROR,
                   "Error number of output files\n");
            return EXIT_FAILURE;
        }
        if (output_num[i] > 1 && (f_info[i].get_drawtext || f_info[i].need_pad)) {
            av_log(NULL, AV_LOG_ERROR,
                   "not support drawtext or pad for multiple output\n");
            return EXIT_FAILURE;
        }
    }

    if (!encoder_name) {
        av_log(NULL, AV_LOG_ERROR, "Error no encode name specified.\n");
        return EXIT_FAILURE;
    }

    //alloc common
    common = alloc_common();
    if (common == NULL) {
        av_log(NULL, AV_LOG_ERROR, "failed to allocate common data.\n");
        state = EXIT_FAILURE;
        goto end;
    }
    common->total_dec_threads = input_num;

    filter_common = alloc_filter_common();
    if (filter_common == NULL) {
        av_log(NULL, AV_LOG_ERROR, "failed to allocate filter_common data.\n");
        state = EXIT_FAILURE;
        goto end;
    }

    //alloc encoder_workers
    for (i = 0; i < filter_num; i++) {
        for (j = 0; j < output_num[i]; j++) {
            encoder_workers[i][j] = calloc(1, sizeof(encoder_worker));
            if (encoder_workers[i][j] == NULL) {
                av_log(NULL, AV_LOG_ERROR, "Error alloc encoder worker.\n");
                state = EXIT_FAILURE;
                goto end;
            }
            memset(encoder_workers[i][j], 0, sizeof(encoder_worker));
        }
    }

    //alloc decoder_workers
    for (i = 0; i < input_num; i++) {
        decoder_workers[i] = calloc(1, sizeof(decoder_worker));
        if (decoder_workers[i] == NULL) {
            av_log(NULL, AV_LOG_ERROR, "Error alloc decoder worker.\n");
            state = EXIT_FAILURE;
            goto end;
        }
        memset(decoder_workers[i], 0, sizeof(decoder_worker));
    }

    // init filter description, and get sequence of outputs
    for (i = 0; i < filter_num; i++) {
        xstack_workers[i] = calloc(1, sizeof(filter_worker));
        if (xstack_workers[i] == NULL) {
            av_log(NULL, AV_LOG_ERROR, "Error alloc xstack worker.\n");
            state = EXIT_FAILURE;
            goto end;
        }
        memset(xstack_workers[i], 0, sizeof(filter_worker));
    }

    // init thread attr
    ret = pthread_attr_init(&attr);
    if (ret) {
        av_log(NULL, AV_LOG_ERROR, "fail to initialize attr: %s.\n", strerror(ret));
        state = EXIT_FAILURE;
        goto end;
    }

    ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
    if (ret) {
        av_log(NULL, AV_LOG_ERROR, "fail to set attr detachstate: %s.\n", strerror(ret));
        pthread_attr_destroy(&attr);
        state = EXIT_FAILURE;
        goto end;
    }

    // block SIGINT and SIGUSR1
    sigemptyset(&sig_set);
    sigaddset(&sig_set, SIGINT);
    sigaddset(&sig_set, SIGUSR1);
    ret = pthread_sigmask(SIG_BLOCK, &sig_set, NULL);
    if (ret) {
        av_log(NULL, AV_LOG_ERROR, "sigmask block failed !\n");
        state = EXIT_FAILURE;
        goto end;
    }

    // create a thread to handle signals
    ret = pthread_create(&sighandler_tid, NULL, signal_handler,
                         (void *)&sig_set);
    if (ret) {
        av_log(NULL, AV_LOG_ERROR, "create sighandler thread failed !\n");
        state = EXIT_FAILURE;
        goto end;
    }

    //run encode thread
    for (i = 0; i < filter_num; i++) {
        for (j = 0; j < output_num[i]; j++) {
            encoder_worker *enc_worker = encoder_workers[i][j];
            enc_worker->index = i;
            enc_worker->encoder_name = encoder_name;
            enc_worker->output_name = out_info[i][j].output_name;
            enc_worker->encoder_params = encoder_params;
            enc_worker->device_id = devid;
            enc_worker->nb_decoders = input_num - picture_num;
            enc_worker->force_source_keyframe = force_source_keyframe;
            enc_worker->common = common;
            if (init_encoder_worker(enc_worker)) {
                av_log(NULL, AV_LOG_ERROR, "init_encoder_worker failed !\n");
                state = EXIT_FAILURE;
                goto end;
            }

            ret = pthread_create(&enc_worker->tid, &attr, &encoder_thread_run, enc_worker);
            if (ret) {
                av_log(NULL, AV_LOG_ERROR, "failed to create codec thread %d: %s.\n", i, strerror(ret));
                pthread_attr_destroy(&attr);
                cleanup_encoder_worker(enc_worker);
                state = EXIT_FAILURE;
                goto end;
            }

            active_encoder_workers++;
        }
    }

    //run filter thread
    for (i = 0; i < filter_num; i++) {
        filter_worker *xstack_worker = xstack_workers[i];
        xstack_worker->index = i;
        xstack_worker->filtered_frames = 0;
        xstack_worker->filter_common = filter_common;
        init_desc(xstack_worker, input_num, output_num[i], f_info[i], out_info[i]);

        // init_xstack
        if (init_xstack(xstack_worker, input_num, output_num[i])) {
            av_log(NULL, AV_LOG_ERROR, "init_xstack failed !\n");
            state = EXIT_FAILURE;
            goto end;
        }

        ret = pthread_create(&xstack_worker->tid, &attr, &filter_thread_run, xstack_worker);
        if (ret) {
            av_log(NULL, AV_LOG_ERROR, "failed to create xstack thread %d: %s.\n", i, strerror(ret));
            pthread_attr_destroy(&attr);
            cleanup_filter_worker(xstack_worker);
            state = EXIT_FAILURE;
            goto end;
        }
    }

    //run decode thread
    for (i = 0; i < input_num; i++) {
        decoder_worker *dec_worker = decoder_workers[i];

        dec_worker->xstack = xstack_workers;

        dec_worker->index = i;
        strcpy(dec_worker->input_file, in_info[i].input_name);
        if (in_info[i].type == SW_VIDEO) {
            dec_worker->type = SW_VIDEO;
            if (check_resolution(in_info[i].input_width, in_info[i].input_height) < 0) {
                av_log(NULL, AV_LOG_ERROR, "invalid input resolution\n");
                goto end;
            }
            dec_worker->input_width = in_info[i].input_width;
            dec_worker->input_height = in_info[i].input_height;
            dec_worker->width = in_info[i].scaled_width;
            dec_worker->height = in_info[i].scaled_height;
        } else if (in_info[i].type == SW_PICTURE) {
            dec_worker->type = SW_PICTURE;
            dec_worker->width = in_info[i].scaled_width;
            dec_worker->height = in_info[i].scaled_height;
        } else {
            dec_worker->type = HW_VIDEO;
            strcpy(dec_worker->decoder_name, in_info[i].decoder_name);
            if (in_info[i].got_params) {
                strcpy(dec_worker->decoder_params, in_info[i].decoder_params);
            }
        }
        dec_worker->picture_num = picture_num;
        dec_worker->common = common;
        dec_worker->devid  = devid;
        dec_worker->loop = dec_loop;
        dec_worker->decoded_frames = 0;
        dec_worker->filter_common = filter_common;

        dec_worker->decoded_frame = av_frame_alloc();
        if (!dec_worker->decoded_frame) {
            av_log(NULL, AV_LOG_ERROR, "failed to allocate decoded frame for codec worker %d\n", i);
            goto end;
        }

        ret = init_decoder_worker(dec_worker);
        if (ret) {
            av_log(NULL, AV_LOG_ERROR, "failed to init_decoder_worker %d.\n", i);
            pthread_attr_destroy(&attr);
            state = EXIT_FAILURE;
            goto end;
        }

        ret = pthread_create(&dec_worker->tid, &attr, &dec_worker_thread_run, dec_worker);
        if (ret) {
            av_log(NULL, AV_LOG_ERROR, "failed to create codec thread %d: %s.\n", i, strerror(ret));
            pthread_attr_destroy(&attr);
            cleanup_decoder_worker(dec_worker);
            state = EXIT_FAILURE;
            goto end;
        }

        active_decoder_workers++;
    }

    ret = pthread_attr_destroy(&attr);
    if (ret) {
        av_log(NULL, AV_LOG_ERROR, "failed to destroy attr: %s.\n", strerror(ret));
        state = EXIT_FAILURE;
        goto end;
    }

    pthread_mutex_lock(&common->lock);
    while (common->ready_num < common->total_dec_threads)
        pthread_cond_wait(&common->ready_cond, &common->lock);
    pthread_mutex_unlock(&common->lock);

    // start calculate and print fps
    int64_t timer_start = av_gettime_relative();
    while (global_stop == 0) {
        if (common->exit_dec_num == active_decoder_workers &&
            common->exit_enc_num == active_encoder_workers) {
            global_stop = 1;
            break;
        }
        sleep(1);

        print_report(0, timer_start, av_gettime_relative(),
                     cur_total_frames());
    }
    print_report(1, timer_start, av_gettime_relative(),
                 cur_total_frames());

    av_log(NULL, AV_LOG_ERROR, "main thread is going to exit.\n");

    if (pthread_join(sighandler_tid, &result) == 0) {
        if ((long)result != 0) {
            av_log(NULL, AV_LOG_DEBUG, "pthread_join sighandler ret %ld\n",
                   (long)result);
            state = EXIT_FAILURE;
        }
    }
end:
    // free active_decoder_workers
    for (i = 0; i < active_decoder_workers; i++) {
        decoder_worker *dec_worker = decoder_workers[i];

        av_log(NULL, AV_LOG_DEBUG, "decoder thread %d ready to exit.\n",
               dec_worker->index);

        pthread_mutex_lock(&dec_worker->frame_lock);
        dec_worker->should_exit = THREAD_STATE_EXIT_ISSUED;
        pthread_mutex_unlock(&dec_worker->frame_lock);
        if (pthread_join(dec_worker->tid, &result) == 0) {
            if ((long)result != 0) {
                av_log(NULL, AV_LOG_DEBUG, "pthread_join dec_worker %d ret %ld"
                       "\n", dec_worker->index, (long)result);
                state = EXIT_FAILURE;
            }
        }

        if (dec_worker->wait_to_free_list) {
            av_log(NULL, AV_LOG_DEBUG, "decoder wait to free list size: %d\n",
                   get_fifo_size(dec_worker->wait_to_free_list));
            free_fifo(dec_worker->wait_to_free_list);
        }
        cleanup_decoder_worker(dec_worker);

        free(dec_worker);
        decoder_workers[i] = NULL;
    }

    // free active_encoder_workers
    for (i = 0; i < filter_num; i++) {
        for (j = 0; j < output_num[i]; j++) {
            encoder_worker *enc_worker = encoder_workers[i][j];

            av_log(NULL, AV_LOG_INFO, "encoder thread %d ready to exit.\n", enc_worker->index);

            pthread_mutex_lock(&enc_worker->frame_lock);
            enc_worker->should_exit = THREAD_STATE_EXIT_ISSUED;
            pthread_cond_signal(&enc_worker->consume_cond);
            pthread_cond_signal(&enc_worker->produce_cond);
            pthread_mutex_unlock(&enc_worker->frame_lock);
            if (pthread_join(enc_worker->tid, &result) == 0) {
                if ((long)result != 0) {
                    av_log(NULL, AV_LOG_INFO, "pthread_join encoder worker ret %ld\n",
                        (long)result);
                    state = EXIT_FAILURE;
                }
            }

            if (enc_worker->enc_frame_fifo) {
                av_log(NULL, AV_LOG_DEBUG, "encoder frame list size: %d\n",
                       get_fifo_size(enc_worker->enc_frame_fifo));
                free_fifo(enc_worker->enc_frame_fifo);
            }
            if (enc_worker->filtered_frame) {
                av_freep(&enc_worker->filtered_frame);
                enc_worker->filtered_frame = NULL;
            }
            cleanup_encoder_worker(enc_worker);

            free(enc_worker);
            encoder_workers[i][j] = NULL;
        }
    }

    // free xstack entry and exit
    ni_xstack_entry_t *entry;
    ni_xstack_exit_t *exit;
    for (i = 0; i < filter_num; i++) {
        filter_worker *xstack_worker = xstack_workers[i];
        av_log(NULL, AV_LOG_INFO, "filter thread %d ready to exit.\n", xstack_worker->index);

        if (pthread_join(xstack_worker->tid, &result) == 0) {
            if ((long)result != 0) {
                av_log(NULL, AV_LOG_INFO, "pthread_join encoder worker ret %ld\n",
                    (long)result);
                state = EXIT_FAILURE;
            }
        }

        for (j = 0; j < xstack_worker->num_src_pads; j++) {
            entry = xstack_worker->src_pads[j];
            if (entry->dec_frame_fifo) {
                av_log(NULL, AV_LOG_DEBUG, "dec_frame_fifo list size %d\n",
                       get_fifo_size(entry->dec_frame_fifo));
                free_fifo(entry->dec_frame_fifo);
            }
            if (entry->first_frame) {
                av_frame_free(&entry->first_frame);
                entry->first_frame = NULL;
            }
            if (entry->last_frame) {
                av_frame_free(&entry->last_frame);
                entry->last_frame = NULL;
            }
            pthread_mutex_destroy(&entry->lock);
            pthread_cond_destroy(&entry->frame_cond);
            pthread_cond_destroy(&entry->eos_cond);
            free(entry);
            xstack_worker->src_pads[j] = NULL;
        }

        for (j = 0; j < xstack_worker->outputs; j++) {
            exit = xstack_worker->dst_pads[j];
            av_frame_free(&exit->filter_frame);
            free(exit);
            xstack_worker->dst_pads[j] = NULL;
        }

        // free filter
        if (xstack_worker->filter_graph) {
            avfilter_graph_free(&xstack_worker->filter_graph);
        }
        cleanup_filter_worker(xstack_worker);
        free(xstack_worker);
    }

    if (state < 0)
        av_log(NULL, AV_LOG_ERROR, "Error occurred: %s\n", av_err2str(ret));

    free_filter_common(filter_common);
    free_common(common);

    av_log(NULL, AV_LOG_INFO, "EXIT.. state=0x%x.\n", state);
    return state ? 1 : 0;
}
