#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>

#include <sys/mman.h>

#include <malloc.h>

#include <errno.h>
#include <getopt.h>

#include <stdint.h>

#include <poll.h>

#include <pthread.h>
#include <assert.h>

#include "netint_ioctl.h"
#include "single_list.h"

#define ROUND128(value) (((value) + 0x7F) & ~0x7F)

struct pci_dev_info {
    unsigned int domain;
    unsigned int bus;
    unsigned int dev;
    unsigned int fn;
};

struct disp_buffer {
    int fd;
    int bar;
    struct pci_dev_info pdev_info;
    uint8_t *data;
    unsigned long len;
    unsigned long offset;
    volatile uint8_t *mmap_data;
    struct link_node node;
};

struct ni_p2p_dev {
    int netint_fd;
};

struct shared_data {
    struct ni_p2p_dev p2p_dev;
    struct disp_buffer *disp;
    int num_buffers;
    int exclusive;
    int width;
    int height;
    const char *file;
    const char *output;
    uint32_t filesize;

    char align0[64];

    pthread_cond_t free_sign;
    pthread_mutex_t free_lock;
    struct link_head free_list;

    char align1[64];

    pthread_cond_t busy_sign;
    pthread_mutex_t busy_lock;
    struct link_head busy_list;
};

struct prod_thread {
    pthread_t tid;
    struct shared_data *shared;
    int should_stop;
    int exit;
};
struct prod_thread *prod_thread;

struct coms_thread {
    pthread_t tid;
    struct shared_data *shared;
    int should_stop;
    int exit;
};
struct coms_thread *coms_thread;

static int global_stop = 0;

static uint32_t yuv420p2size(int width, int height)
{
    uint32_t y_len, u_len, v_len;

    assert((width % 2 == 0) && (height % 2 == 0));
    y_len = width * height;
    u_len = (width / 2) * (height / 2);
    v_len = (width / 2) * (height / 2);
    return y_len + u_len + v_len;
}

static uint32_t yuv420p2hwsize(int width, int height)
{
    uint32_t y_len, u_len, v_len;

    assert((width % 2 == 0) && (height % 2 == 0));
    y_len = ROUND128(width) * height;
    u_len = ROUND128(width / 2) * (height / 2);
    v_len = ROUND128(width / 2) * (height / 2);

    return y_len + u_len + v_len;
}

static void *prod_thread_func(void *data)
{
    struct prod_thread *prod = (struct prod_thread *)data;
    struct shared_data *shared = prod->shared;
    int ret = 0;
    struct ni_p2p_dev *p2p_dev;
    struct disp_buffer *buf;
    struct link_node *node;
    const char *file = shared->file;
    FILE *fp;
    uint32_t data_len = yuv420p2size(shared->width, shared->height);
    uint8_t *pdata = NULL;
    int valid = 0;

    p2p_dev = &shared->p2p_dev;

    printf("input file %s, datalen %u\n", file, data_len);
    fp = fopen(file, "rb");
    if (fp == NULL) {
        printf("failed to open file: %s\n", file);
        ret = -1;
        goto out;
    }

    pdata = malloc(data_len);
    if (!pdata) {
        printf("failed to allocate memory\n");
        ret = -1;
        goto out;
    }

    while (!prod->should_stop && !feof(fp)) {
        if (valid == 0) {
            printf("prod: read ....\n");
            ret = fread(pdata, data_len, 1, fp);
            if (ret != 1) {
                if (feof(fp)) {
                    printf("eof\n");
                    break;
                }
                printf("failed to read file: ret %d, %s\n", ret, strerror(errno));
                ret = -1;
                goto out;
            }
            valid = 1;
        }

        pthread_mutex_lock(&shared->free_lock);
        if (link_empty(&shared->free_list)) {
            pthread_cond_wait(&shared->free_sign, &shared->free_lock);
            pthread_mutex_unlock(&shared->free_lock);
            continue;
        }

        node = peek_link(&shared->free_list);
        buf = link_entry(node, struct disp_buffer, node);
        pick_link(&shared->free_list);
        pthread_mutex_unlock(&shared->free_lock);

        if (shared->exclusive) {
            struct netint_iocmd_attach_wfence uatch;
            struct netint_iocmd_signal_wfence usigl;

            memset(&uatch, 0, sizeof(uatch));
            uatch.fd = buf->fd;
            ret = ioctl(p2p_dev->netint_fd, NETINT_IOCTL_ATTACH_WFENCE, &uatch);
            if (ret < 0) {
                printf("failed to attach wfence\n");
                goto out;
            }

            memcpy((void *)buf->mmap_data, pdata, data_len);

            pthread_mutex_lock(&shared->busy_lock);
            append_link(&shared->busy_list, &buf->node);
            pthread_cond_signal(&shared->busy_sign);
            pthread_mutex_unlock(&shared->busy_lock);

            //sleep(1);

            memset(&usigl, 0, sizeof(usigl));
            usigl.fd = buf->fd;
            ret = ioctl(p2p_dev->netint_fd, NETINT_IOCTL_SIGNAL_WFENCE, &usigl);
            if (ret < 0) {
                printf("failed to signal fence\n");
                goto out;
            }
        } else {
            struct netint_iocmd_issue_request uis;

            memcpy((void *)buf->data, pdata, data_len);

            memset(&uis, 0, sizeof(uis));
            uis.fd = buf->fd;
            uis.data = buf->data;
            uis.len = buf->len;
            uis.dir = NI_DMABUF_WRITE_TO_DEVICE;
            ret = ioctl(p2p_dev->netint_fd, NETINT_IOCTL_ISSUE_REQ, &uis);
            if (ret < 0) {
                printf("failed to send req: %s\n", strerror(errno));
                goto out;
            }

            pthread_mutex_lock(&shared->busy_lock);
            append_link(&shared->busy_list, &buf->node);
            pthread_cond_signal(&shared->busy_sign);
            pthread_mutex_unlock(&shared->busy_lock);
        }

        valid = 0;
    }
out:
    if (fp)
        fclose(fp);
    free(pdata);
    printf("prod thread exits with %d\n", ret);
    prod_thread->exit = 1;
    return (void *)((unsigned long)ret);
}

static void *coms_thread_func(void *data)
{
    struct coms_thread *coms = (struct coms_thread *)data;
    struct shared_data *shared = coms->shared;
    int ret = 0;
    struct ni_p2p_dev *p2p_dev;
    struct link_node *node;
    struct disp_buffer *buf;
    struct pollfd pfds[1];
    FILE *fp = NULL;
    uint32_t data_len = yuv420p2size(shared->width, shared->height);
    uint8_t *pdata = NULL;
    uint32_t filesize = shared->filesize;
    uint32_t total_size = 0;

    p2p_dev = &shared->p2p_dev;

    fp = fopen(shared->output ? shared->output : "/dev/null", "wb");
    if (!fp) {
        printf("failed to open file to write\n");
        ret = -1;
        goto out;
    }

    pdata = malloc(data_len);
    if (!pdata) {
        printf("failed to allocate data\n");
        ret = -1;
        goto out;
    }

    while (!coms->should_stop && total_size < filesize) {
        pthread_mutex_lock(&shared->busy_lock);
        if (link_empty(&shared->busy_list)) {
            pthread_cond_wait(&shared->busy_sign, &shared->busy_lock);
            pthread_mutex_unlock(&shared->busy_lock);
            continue;
        }

        node = peek_link(&shared->busy_list);
        buf = link_entry(node, struct disp_buffer, node);
        pick_link(&shared->busy_list);
        pthread_mutex_unlock(&shared->busy_lock);

        if (shared->exclusive) {
            struct netint_iocmd_issue_request uis;
            memset(&uis, 0, sizeof(uis));
            uis.fd = buf->fd;
            uis.data = buf->data;
            uis.len = buf->len;
            uis.dir = NI_DMABUF_READ_FROM_DEVICE;
            memset(buf->data, 0, buf->len);
            ret = ioctl(p2p_dev->netint_fd, NETINT_IOCTL_ISSUE_REQ, &uis);
            if (ret < 0) {
                printf("failed to send req: %s\n", strerror(errno));
                goto out;
            }

            pfds[0].fd = buf->fd;
            pfds[0].events = POLLOUT;
            pfds[0].revents = 0;
            ret = poll(pfds, 1, -1);
            if (ret < 0) {
                printf("failed to poll\n");
                goto out;
            }

            printf("data 0x%lx, data_len %u\n", (unsigned long)buf->data, data_len);
            memcpy(pdata, buf->data, data_len);

            pthread_mutex_lock(&shared->free_lock);
            append_link(&shared->free_list, &buf->node);
            pthread_cond_signal(&shared->free_sign);
            pthread_mutex_unlock(&shared->free_lock);

        } else {
            struct netint_iocmd_attach_rfence uatch;
            struct netint_iocmd_signal_rfence usigl;
            memset(&uatch, 0, sizeof(uatch));
            uatch.fd = buf->fd;
            ret = ioctl(p2p_dev->netint_fd, NETINT_IOCTL_ATTACH_RFENCE, &uatch);
            if (ret < 0) {
                printf("failed to attach rfence\n");
                goto out;
            }

            memcpy(pdata, (void *)buf->mmap_data, data_len);

            pthread_mutex_lock(&shared->free_lock);
            append_link(&shared->free_list, &buf->node);
            pthread_cond_signal(&shared->free_sign);
            pthread_mutex_unlock(&shared->free_lock);

            /* test, wait for prod to get it */
            //        sleep(1);

            memset(&usigl, 0, sizeof(usigl));
            usigl.fd = buf->fd;
            ret = ioctl(p2p_dev->netint_fd, NETINT_IOCTL_SIGNAL_RFENCE, &usigl);
            if (ret < 0) {
                printf("failed to signal fence\n");
                goto out;
            }
        }

        if (1 != fwrite(pdata, data_len, 1, fp)) {
            printf("failed to write file\n");
            ret = -1;
            goto out;
        }

        /* test, wait for prod to get it */
//        sleep(1);
        total_size += data_len;
    }

out:
    if (fp)
        fclose(fp);
    free(pdata);
    printf("coms thread exits with %d\n", ret);
    coms_thread->exit = 1;
    return (void *)((unsigned long)ret);
}

static void init_tasks(struct shared_data *shared)
{
    prod_thread->tid = (pthread_t)-1;
    prod_thread->should_stop = 0;
    prod_thread->shared = shared;
    prod_thread->exit = 0;

    coms_thread->tid = (pthread_t)-1;
    coms_thread->should_stop = 0;
    coms_thread->shared = shared;
    coms_thread->exit = 0;

    shared->p2p_dev.netint_fd = -1;
}

static int start_tasks(struct shared_data *shared)
{
    pthread_attr_t attr;
    int ret;

    ret = pthread_attr_init(&attr);
    if (ret) {
        printf("failed to initialize pthread attr\n");
        return ret;
    }

    ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
    if (ret) {
        printf("failed to set pthread detachstate\n");
        goto destroy_attr;
    }

    ret = pthread_create(&coms_thread->tid, &attr, coms_thread_func, coms_thread);
    if (ret) {
        printf("failed to create coms thread\n");
        goto destroy_prod_thread;
    }

    ret = pthread_create(&prod_thread->tid, &attr, prod_thread_func, prod_thread);
    if (ret) {
        printf("failed to create prod thread\n");
        goto destroy_attr;
    }

destroy_prod_thread:
    if (ret) {
        void *result;
        prod_thread->should_stop = 1;
        pthread_cond_signal(&shared->free_sign);

        if (pthread_join(prod_thread->tid, &result))
            printf("failed to join prod thread\n");
    }
destroy_attr:
    pthread_attr_destroy(&attr);
    return ret;
}

static void destroy_thread(struct shared_data *shared)
{
    void *result;

    prod_thread->should_stop = 1;
    pthread_cond_signal(&shared->free_sign);
    if (pthread_join(prod_thread->tid, &result))
        printf("failed to join prod thread\n");

    coms_thread->should_stop = 1;
    pthread_cond_signal(&shared->busy_sign);
    if (pthread_join(coms_thread->tid, &result))
        printf("failed to join coms thread\n");
}

static int init_shared_data(struct shared_data *shared)
{
    int ret;

    init_link(&shared->free_list);
    init_link(&shared->busy_list);

    ret = pthread_mutex_init(&shared->free_lock, NULL);
    if (ret)
        return -1;

    ret = pthread_cond_init(&shared->free_sign, NULL);
    if (ret)
        goto destroy_free_lock;

    ret = pthread_mutex_init(&shared->busy_lock, NULL);
    if (ret)
        goto destroy_free_sign;

    ret = pthread_cond_init(&shared->busy_sign, NULL);
    if (ret)
        goto destroy_busy_lock;

    return 0;
destroy_busy_lock:
    pthread_mutex_destroy(&shared->busy_lock);
destroy_free_sign:
    pthread_cond_destroy(&shared->free_sign);
destroy_free_lock:
    pthread_mutex_destroy(&shared->free_lock);

    return -1;
}

static void cleanup_shared_data(struct shared_data *shared)
{
    pthread_cond_destroy(&shared->busy_sign);
    pthread_mutex_destroy(&shared->busy_lock);
    pthread_cond_destroy(&shared->free_sign);
    pthread_mutex_destroy(&shared->free_lock);
}

static int probe_device(struct shared_data *shared,
        struct pci_dev_info *pdev_info, int num_buffers,
        int video_width, int video_height)
{
    int fd;
    int ret;
    uint8_t *data = NULL;
    uint32_t len;
//    int loop = 1;

    struct netint_iocmd_export_dmabuf uexport;

    struct disp_buffer *disp;

    int buf_idx;

    disp = calloc(num_buffers, sizeof(struct disp_buffer));
    if (!disp)
        return -1;

    for (buf_idx = 0; buf_idx < num_buffers; buf_idx++) {
        disp[buf_idx].fd = -1;
        disp[buf_idx].mmap_data = MAP_FAILED;
    }

    fd = open("/dev/netint", O_RDWR);
    if (fd < 0) {
        printf("failed to open device: %s\n", strerror(errno));
        goto out;
    }

    for (buf_idx = 0; buf_idx < num_buffers; buf_idx++) {
        struct disp_buffer *db = &disp[buf_idx];

        len = (yuv420p2hwsize(video_width, video_height) + 4095) & ~4095;

        db->pdev_info = *pdev_info;
        db->len = len;
        db->offset = buf_idx * len;
        db->bar = 4;

        /* export dmabuf */
        uexport.fd = -1;
        uexport.flags = 0;
        uexport.offset = db->offset;
        uexport.length = db->len;
        uexport.domain = pdev_info->domain;
        uexport.bus = pdev_info->bus;
        uexport.dev = pdev_info->dev;
        uexport.fn  = pdev_info->fn;
        uexport.bar = db->bar;
        ret = ioctl(fd, NETINT_IOCTL_EXPORT_DMABUF, &uexport);
        if (ret < 0) {
            printf("failed to export dmabuf: %s\n", strerror(errno));
            goto out;
        }

        db->fd = uexport.fd;
        printf("export fd %d\n", db->fd);

        /* mmap */
        db->mmap_data = mmap(0, len, PROT_READ | PROT_WRITE, MAP_SHARED, db->fd, 0);
        if (db->mmap_data == MAP_FAILED) {
            printf("failed to mmap dmabuf: %s\n", strerror(errno));
            goto out;
        }

        //    for (i = 0; i < len / 4; i++)
        //        *((unsigned int *)db->mmap_data + i) = i;
        //    dump_data_out((unsigned char *)db->mmap_data, len);

        /* send io requests */
        ret = posix_memalign((void **)&data, sysconf(_SC_PAGESIZE), db->len);
        if (ret) {
            printf("failed to allocate memory\n");
            goto out;
        }

        db->data = data;
//        for (i = 0; i < db->len; i++)
//            *(db->data + i) = i % 255;

        append_link(&shared->free_list, &db->node);
    }

    shared->p2p_dev.netint_fd = fd;
    shared->disp = disp;
    shared->num_buffers = num_buffers;
    shared->width = video_width;
    shared->height = video_height;
    return 0;

out:
    if (disp) {
        for (buf_idx = 0; buf_idx < num_buffers; buf_idx++) {
            if (disp[buf_idx].fd >= 0) {
                if (disp[buf_idx].mmap_data != MAP_FAILED)
                    munmap((void *)disp[buf_idx].mmap_data, disp[buf_idx].len);
                close(disp[buf_idx].fd);
            }
            free(disp[buf_idx].data);
        }
        free(disp);
    }
    close(fd);
    return ret;
}

static void close_device(struct shared_data *shared)
{
    if (shared->disp) {
        int buf_idx;
        struct disp_buffer *disp = shared->disp;

        for (buf_idx = 0; buf_idx < shared->num_buffers; buf_idx++) {
            if (disp[buf_idx].fd >= 0) {
                if (disp[buf_idx].mmap_data != MAP_FAILED)
                    munmap((void *)disp[buf_idx].mmap_data, disp[buf_idx].len);
                close(disp[buf_idx].fd);
            }
            free(disp[buf_idx].data);
        }
        free(shared->disp);
        shared->disp = NULL;
    }

    if (shared->p2p_dev.netint_fd >= 0) {
        close(shared->p2p_dev.netint_fd);
        shared->p2p_dev.netint_fd = -1;
    }
}

static void print_help(void)
{
    printf("Options: \n"
           "-b, --block_dev\n"
           "\tspecify block device name\n"
           );
}

int resolve_blkdev(const char *block_dev, struct pci_dev_info *pdev_info)
{
    char line[256];
    char syspath[256];
    char *dom, *bus, *dev, *fnc;
    struct stat bstat;
    int ret;
    FILE *fp;

    if (stat(block_dev, &bstat) < 0) {
        printf("failed to get stat of file %s\n", block_dev);
        return 0;
    }

    if ((bstat.st_mode & S_IFMT) != S_IFBLK) {
        printf("not a block device\n");
        return 0;
    }

    ret = snprintf(syspath, sizeof(syspath) - 1, "/sys/block/%s/device/address", block_dev + 5);
    syspath[ret] = '\0';

    printf("syspath %s\n", syspath);
    fp = fopen(syspath, "r");
    if (!fp) {
        printf("failed to open %s\n", syspath);
        return 0;
    }

    if (fgets(line, sizeof(line), fp) == NULL) {
        printf("failed to read line from address");
        return 0;
    }

    pdev_info->domain = strtoul(line, &dom, 16);
    pdev_info->bus = strtoul(dom + 1, &bus, 16);
    pdev_info->dev = strtoul(bus + 1, &dev, 16);
    pdev_info->fn  = strtoul(dev + 1, &fnc, 16);

    return 1;
}

int main(int argc, char **argv)
{
    int ret;
    struct pci_dev_info pdev_info;
    int found = 0;
    const char *block_dev = NULL;
    int num_buffers = 2;
    char *n;
    int opt;
    int opt_index;
    int video_width = 0, video_height = 0;
    struct stat file_stat;
    const char *file = NULL;
    const char *output = NULL;
    int exclusive_mode = 0;

    struct shared_data *shared = NULL;

    const char *opt_string = "b:f:s:o:eh";
    static struct option long_options[] = {
        { "block_dev",  required_argument, NULL, 'n' },
        { "input_file", required_argument, NULL, 'f' },
        { "size",       required_argument, NULL, 's' },
        { "output",     required_argument, NULL, 'o' },
        { "exclusive",  no_argument,       NULL, 'e' },
        { NULL,         0,                 NULL,  0  },
    };

    while ((opt = getopt_long(argc, argv, opt_string, long_options, &opt_index)) != -1) {
        switch (opt) {
        case 'b':
            block_dev = optarg;
            break;
        case 'f':
            file = optarg;
            break;
        case 's':
            video_width = (int) strtol(optarg, &n, 10);
            if (*n != 'x') {
                printf("%s: input size need to be [width]x[height]\n", optarg);
                return EXIT_FAILURE;
            }
            video_height = atoi(n + 1);
            break;
        case 'e':
            exclusive_mode = 1;
            break;
        case 'o':
            output = optarg;
            break;
        case 'h':
            print_help();
            return EXIT_SUCCESS;
        default:
            return EXIT_FAILURE;
        }
    }

    if (block_dev == NULL) {
        printf("invalid block device\n");
        return EXIT_FAILURE;
    }

    if (file == NULL || access(file, R_OK) != 0) {
        printf("invalid file\n");
        return EXIT_FAILURE;
    }

    if (video_height == 0 || video_width == 0) {
        printf("invalid size\n");
        return EXIT_FAILURE;
    }

    if (stat(file, &file_stat) != 0) {
        printf("cannot get file stat\n");
        return EXIT_FAILURE;
    }

    if (file_stat.st_size % yuv420p2size(video_width, video_height)) {
        printf("invalid file?\n");
        return EXIT_FAILURE;
    }

    found = resolve_blkdev(block_dev, &pdev_info);
    if (found)
        printf("domain %04x, bus %02x, dev %02x, fn %01x\n",
                pdev_info.domain, pdev_info.bus,
                pdev_info.dev, pdev_info.fn);
    else {
        printf("device not found\n");
        return EXIT_FAILURE;
    }

    prod_thread = (struct prod_thread *)calloc(1, sizeof(struct prod_thread));
    if (!prod_thread)
        return EXIT_FAILURE;

    coms_thread = (struct coms_thread *)calloc(1, sizeof(struct coms_thread));
    if (!coms_thread)
        return EXIT_FAILURE;

    shared = (struct shared_data *)calloc(1, sizeof(struct shared_data));
    if (!shared)
        return EXIT_FAILURE;

    init_tasks(shared);
    ret = init_shared_data(shared);
    if (ret) {
        printf("failed to init shared data\n");
        goto out;
    }

    shared->output = output;
    shared->file = file;
    shared->filesize = file_stat.st_size;
    shared->exclusive = exclusive_mode;
    /* start */
    ret = probe_device(shared, &pdev_info, num_buffers, video_width, video_height);
    if (ret < 0) {
        printf("failed to op dev\n");
        goto out;
    }

    ret = start_tasks(shared);
    if (ret) {
        printf("failed to start tasks\n");
        goto out;
    }

    while (!global_stop) {
        if (prod_thread->exit && coms_thread->exit)
            break;
        sleep(1);
    }
out:
    if (shared) {
        destroy_thread(shared);
        cleanup_shared_data(shared);
        close_device(shared);
        free(shared);
    }

    free(prod_thread);
    free(coms_thread);

    if (ret)
        return EXIT_FAILURE;
    else
        return EXIT_SUCCESS;
}
