/*
 * Copyright (c) 2016 New Electronic Technology GmbH. All rights reserved.
 *
 * This file is subject to the terms and conditions of the GNU General Public
 * Licence.
 *
 * It is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY. Without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
 * the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this file, see the file "COPYING" in the directory
 * containing this file for more details.
 *
 * If not, write to the Free Software Foundation, Inc.,
 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */

// kernel includes
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/vmalloc.h>
#include <linux/version.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/pci.h>
#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/dma-mapping.h>
#include <linux/pagemap.h>
#include <linux/poll.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/sched.h>

// includes for userspace interface
#include "lv.pci.driver.if.h"

#define DRV_VERSION "1.03.008 (12.08.2019)"

// modinfo
MODULE_LICENSE("GPL");
MODULE_AUTHOR("NET GmbH");
MODULE_DESCRIPTION("CorSight streaming device driver");
MODULE_VERSION(DRV_VERSION);

// diagnostics (undefine for release version!!!)
//#define DIAG_MODE 1
//#define DIAG_MODE_EXTRA 1
//#define DIAG_MODE_IDXTABLE 1
//#define DIAG_MODE_IRQ 1
//#define DIAG_MODE_IRQ_DMA 1
//#define DIAG_MODE_IRQ_DEBUG 1
#define LVSM_MSG "lvsm: "

#ifdef __LP64__
#   define OS64BIT true
#else
#   define OS64BIT false
#endif

// defines
#define LVSM_VENDOR_ID        0x1124
#define LVSM_DEVICE_ID_CBCM   0x0064
#define LVSM_DEVICE_ID_CSCM   0x0065
#define LVSM_DEVICE_ID_CS3CM  0x0066

#define LVSM_MEM_SIZE       0x04000000  // 64M
// all needed for regs and page tables is under 1M
// do not map more to protect vmalloc space against waste and fragmentation
// (in case of problems, try to allocate even the smallest fragments, per-page)
// other areas are mapped dynamically if needed
#define LVSM_MAP_SIZE       0x00100000  // 1M

// CorSight regs
#define LVSM_SBCTR_MODINFO  0x00000000
#define LVSM_FPGA_ID        0x00000008
#define LVSM_FPGA_REV       0x00000010
#define LVSM_IRQ_EN         0x00000024
#define LVSM_IRQ_STAT       0x00000028
#define LVSM_IRQ_CLR        0x0000002C
#define LVSM_IRQ_PEND       0x00000030
#define LVSM_IRQ_TIMOUT     0x00000040
#define LVSM_IRQ_COUNT      0x00000044
#define LVSM_OUT_COUNT      0x00000048
#define LVSM_LAT_COUNT      0x0000004C
#define LVSM_IRQ_LOG0       0x00000060  //only for debug
#define LVSM_IRQ_LOG1       0x00000064  //only for debug
#define LVSM_IRQ_LOGFIFO    0x00000068  //only for debug
#define LVSM_PCI_COUNT      0x0000006C  //only for debug
#define LVSM_EOF_COUNT      0x00000070  //only for debug
#define LVSM_TIM_COUNT      0x00000074  //only for debug
#define LVSM_VIP_COUNT      0x00000078  //only for debug
#define LVSM_ACK_COUNT      0x0000007C  //only for debug
#define LVSM_PCIE_DEBUG     0x00000080  //only for debug
#define LVSM_FMB_EN         0x00020004
#define LVSM_FMB_STRMACT    0x00020010
#define LVSM_FMB_STAT       0x00020024
#define LVSM_FMB_LOAD0      0x00020028
#define LVSM_FMB_FRMDIM     0x00030000
#define LVSM_FMB_STRMCFG    0x00030004
#define LVSM_FMB_STARTFRM   0x00030008
#define LVSM_FMB_STARTMSG   0x0003000C
#define LVSM_FMB_RXCURRSTAT 0x00030020
#define LVSM_FMB_TXCURRSTAT 0x00030030
#define LVSM_DMA_EN         0x00080004
#define LVSM_DMA_CHANTXEN   0x00080008
#define LVSM_DMA_CHANTXSTAT 0x0008000C
#define LVSM_DMA_CHANTXERR  0x00080010
#define LVSM_DMA_IDXHALT    0x0008001C
#define LVSM_DMA_CTRLREG    0x00090000
#define LVSM_DMA_FRMINDEX   0x00090004
#define LVSM_DMA_FRMIDXFIFO 0x00090030
#define LVSM_DMA_NEXTFRMIDX    0x10000


#define LVSM_IRQMSK_DMA0  (1<<0)
#define LVSM_IRQMSK_DMA1  (1<<1)
#define LVSM_IRQMSK_TIM0  (1<<8)
#define LVSM_IRQMSK_TIM1  (1<<9)
#define LVSM_IRQMSK_TIM4  (1<<18)
#define LVSM_IRQMSK_TIM5  (1<<19)

#define LVSM_FDMIN_DRV     255
#define LVSM_FDMIN_DEV_0     0
#define LVSM_FDMIN_DEV_LAST 15
#define LVSM_FDMIN_SVC     254

#define LVSM_FDTYPE_DRV    255
#define LVSM_FDTYPE_DEV      0
#define LVSM_FDTYPE_SVC    254


// device info
#define MAX_SM_DEVS    16
#define MAX_AUX_DEVS   16

typedef struct _memarea_map
{
    uint32_t Offset;
    uint32_t Size;
    uint8_t *Virtual;
} memarea_map;

typedef struct _sm_info
{
    struct pci_dev* pDev;
    int Enabled;
    int Opened;
    int Index;
    struct _sm_info *pSelf;
    uint8_t *Virtual;
    resource_size_t Physical;
    uint32_t KernelIrqCount;
    uint32_t EmptyIrqCount;
    uint32_t WantedIrqCount;
    uint32_t UnwantedIrqCount;
    uint32_t LossedIrqCount;
    uint32_t IrqLessingCount;
    uint32_t IrqSkipOther;
    uint32_t TimeoutIrqCount;
    uint32_t IdxDummy[2];
    uint32_t IdxOld;
    uint32_t IrqMask;
    sm_irq_info IrqInfos[SM_IRQBUF_SIZE];
    volatile unsigned IrqBufIdxW;
    volatile unsigned IrqBufIdxR;
    wait_queue_head_t WaitQueue;
    spinlock_t DmaSpinLock;
    struct mutex BufLockMutex;
    struct list_head BufLockList;
    memarea_map DynMaps[LVSM_MEMAREA_MAXENTRIES];
    char Desc[32];
} sm_info;

typedef struct _aux_info
{
    struct pci_dev* pDev;
} aux_info;

static sm_info LvSmDevs[MAX_SM_DEVS];
static int NrSm = 0;
static aux_info AuxDevs[MAX_AUX_DEVS];
static int NrAux = 0;
static sm_drv_if LvSmDrvInfo;

typedef struct _buflk_info
{
    struct list_head list;
    uint64_t *PageAddrs;
    struct page **Pages;
    uint64_t NrPages;
} buflk_info;

// forward declarations
static int          LvSm_init (void);
static void         LvSm_exit (void);
static int          LvSm_open    	(struct inode *inode, struct file *file);
static int          LvSm_release 	(struct inode *inode, struct file *file);
static int          LvSm_mmap    	(struct file *file, struct vm_area_struct *vma);
static ssize_t      LvSm_read    	(struct file *file, char *Buffer, size_t Count, loff_t *f_pos);
static ssize_t      LvSm_write   	(struct file *file, const char *Buffer, size_t Count, loff_t *f_pos);
static loff_t       LvSm_llseek  	(struct file *file, loff_t offset, int origin);
#ifdef HAVE_UNLOCKED_IOCTL // true since 2.6.11
static long         LvSm_unlocked_ioctl	(struct file *file, unsigned int cmd, unsigned long arg);
#else
static int          LvSm_ioctl   	(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg);
#endif
static unsigned int LvSm_poll    	(struct file *file, struct poll_table_struct *poll_table);
static irqreturn_t  LvSm_irq     	(int irq, void *dev_id);
static int          LvSm_proc_read 	(char *page, char **start, off_t offset, int count, int *eof, void *data);
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
static int          LvSm_proc_open	(struct inode *inode, struct file *file);
#endif

// module init/exit
module_init (LvSm_init);
module_exit (LvSm_exit);

// supported fops
static struct file_operations LvSm_fops = {
    .owner   = THIS_MODULE,
    .llseek  = LvSm_llseek,
    .read    = LvSm_read,
    .write   = LvSm_write,
    .mmap    = LvSm_mmap,
    .open    = LvSm_open,
    .poll    = LvSm_poll,
#ifdef HAVE_UNLOCKED_IOCTL // true since 2.6.11
    .unlocked_ioctl = LvSm_unlocked_ioctl,
#else
    .ioctl   = LvSm_ioctl,
#endif
    .release = LvSm_release,
    };

#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
// fops for replacement of create_proc_read_entry
static const struct file_operations LvSm_proc_fops = {
    .owner   = THIS_MODULE,
    .open    = LvSm_proc_open,
    .read    = seq_read,
    .llseek  = seq_lseek,
    .release = single_release,
};
#endif

// other globals
static const char* LvSm_String = "lvsm";
static int LvSm_DevMajor = 0;


// kernel version dependent defines
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
#define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
#endif

// helper functions
static int GetFileIds (struct file *file, int *LvSmIndex, int *FdType)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19, 0)
    int FdMinor = MINOR(file->f_path.dentry->d_inode->i_rdev);
#else
    int FdMinor = MINOR(file->f_dentry->d_inode->i_rdev);
#endif

    // init with invalid defaults
    *LvSmIndex = 333;
    *FdType = 333;

    if (FdMinor==LVSM_FDMIN_DRV)
    {
        // driver generic access
        *FdType = LVSM_FDTYPE_DRV;
        *LvSmIndex = 0;
    }
    else if (FdMinor>=LVSM_FDMIN_DEV_0 && FdMinor<=LVSM_FDMIN_DEV_LAST)
    {
        // one of the devices
        *FdType = LVSM_FDTYPE_DEV;
        *LvSmIndex = FdMinor - LVSM_FDMIN_DEV_0;
    }
    else if (FdMinor==LVSM_FDMIN_SVC)
    {
        // service driver access
        *FdType = LVSM_FDTYPE_SVC;
        *LvSmIndex = 0;
    }
    else
    {
        // don't know this one
        return -EINVAL;
    }

    return 0;
}

static void ReadPciCfg (uint8_t *Cfg, struct pci_dev *pDev)
{
    int i, RetVal;

    if (!Cfg || !pDev) return;
    memset (Cfg, 0, LVSM_PCICFG_REPSIZE);

    // brute force nonportable aproach
    for (i=0; i<LVSM_PCICFG_REPSIZE; ++i)
    {
        RetVal = pci_read_config_byte (pDev, i, &Cfg[i]);
        if (RetVal !=0)
        {
            printk (KERN_DEBUG LVSM_MSG "reading of pci config for device %.4X:%.4X failed at byte %d\n", pDev->vendor, pDev->device, i);
            break;
        }
    }
}

static void InitDrvIfInfo (void)
{
    // temporary helper initing the info passed from the driver to user space
    // let's report lvsm and aux devs through the same list
    int i, j; // i loops through the output list, j through input lists

    memset (&LvSmDrvInfo, 0, sizeof(LvSmDrvInfo));

    strcpy (LvSmDrvInfo.Magic, LVSM_DRV_MAGIC);

    i = 0;
    for (j=0; j<NrSm && i<LVSM_ALLDEVS_MAXENTRIES; ++i,++j)
    {
        sprintf (LvSmDrvInfo.AllDevs.DevInfo[i].DeviceId, "lvsm%d", j);
        LvSmDrvInfo.AllDevs.DevInfo[i].BusNr = LvSmDevs[j].pDev->bus->number;
        LvSmDrvInfo.AllDevs.DevInfo[i].DevNr = LvSmDevs[j].pDev->devfn>>3;
        LvSmDrvInfo.AllDevs.DevInfo[i].FnNr  = LvSmDevs[j].pDev->devfn&7;
        ReadPciCfg (LvSmDrvInfo.AllDevs.DevInfo[i].PciCfg, LvSmDevs[j].pDev);
    }
    for (j=0; j<NrAux && i<LVSM_ALLDEVS_MAXENTRIES; ++i,++j)
    {
        sprintf (LvSmDrvInfo.AllDevs.DevInfo[i].DeviceId, "lvaux%d", j);
        LvSmDrvInfo.AllDevs.DevInfo[i].BusNr = AuxDevs[j].pDev->bus->number;
        LvSmDrvInfo.AllDevs.DevInfo[i].DevNr = AuxDevs[j].pDev->devfn>>3;
        LvSmDrvInfo.AllDevs.DevInfo[i].FnNr  = AuxDevs[j].pDev->devfn&7;
        ReadPciCfg (LvSmDrvInfo.AllDevs.DevInfo[i].PciCfg, AuxDevs[j].pDev);
    }

    LvSmDrvInfo.AllDevs.NrDevs = i;
}

static int LockBuffer (sm_page_locks __user *pLockInfoUser, int LvSmIndex)
{
    sm_page_locks *pLockInfo = kmalloc (sizeof(sm_page_locks), GFP_KERNEL);
    buflk_info *pBufLk = kmalloc (sizeof(buflk_info), GFP_KERNEL);
    uint64_t *PageAddrs = NULL;
    struct page **Pages = NULL;
    int RetVal = 0;
    int i, j;

    // copy the structure from user space
    if (!pLockInfo || !pBufLk)
    {
        kfree (pLockInfo);
        kfree (pBufLk);
        return -EINVAL;
    }
    if (copy_from_user (pLockInfo, pLockInfoUser, sizeof(sm_page_locks)))
    {
        printk (KERN_DEBUG LVSM_MSG "dma buffer lock info copy from user failed, no lock done\n");
        kfree (pLockInfo);
        kfree (pBufLk);
        return -EFAULT;
    }
    #ifdef DIAG_MODE
    printk (KERN_DEBUG LVSM_MSG "locking %d pages\n", (int)pLockInfo->NrPages);
    #endif

    // allocate the array for struct page pointers
    Pages = kmalloc (sizeof(struct page **) * pLockInfo->NrPages, GFP_KERNEL);
    if (Pages)
    {
        // get_user_pages faults the pages in physical ram and pins them down
        int Res;
        down_read(&current->mm->mmap_sem);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,1)
	Res = get_user_pages (pLockInfo->Ptr & PAGE_MASK, pLockInfo->NrPages, 1, 0, Pages, NULL);
#else
	Res = get_user_pages (current, current->mm, pLockInfo->Ptr & PAGE_MASK, pLockInfo->NrPages, 1, 0, Pages, NULL);
#endif
        up_read(&current->mm->mmap_sem);
        if (Res < pLockInfo->NrPages)
        {
            // not all pages could be locked, ie. release those locked and report failure
            for (i=0; i<Res; ++i)
            {
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,1)
                page_cache_release(Pages[i]);
#else
                put_page(Pages[i]);
#endif
            }
            printk (KERN_DEBUG LVSM_MSG "locking failed, locked only %d pages, unlocking all again\n", Res);
            RetVal = -EINVAL;
        }
        else
        {
            // successfully locked all the pages, report the addresses
            PageAddrs = kmalloc (sizeof(uint64_t) * pLockInfo->NrPages, GFP_KERNEL);
            if (PageAddrs)
            {
                for (i=0; i<pLockInfo->NrPages; ++i)
                {
                    PageAddrs[i] = dma_map_page(&LvSmDevs[LvSmIndex].pDev->dev, Pages[i], 0, PAGE_SIZE, DMA_FROM_DEVICE) >> PAGE_SHIFT;
                    #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
                    if (dma_mapping_error (&LvSmDevs[LvSmIndex].pDev->dev, (dma_addr_t)PageAddrs[i]))
                    #else
                    if (dma_mapping_error ((dma_addr_t)PageAddrs[i]))
                    #endif
                    {
                        PageAddrs[i] = 0;
                        printk (KERN_DEBUG LVSM_MSG "dma page mapping failed, unmapping/unlocking all again\n");
                        RetVal = -EINVAL;
                        break;
                    }

                }
                if (RetVal == 0)
                {
                    // and copy the array to user space
                    if (copy_to_user ((void __user *)(ptrdiff_t)pLockInfo->PageAddrs, PageAddrs, pLockInfo->NrPages*sizeof(uint64_t)))
                    {
                        printk (KERN_DEBUG LVSM_MSG "dma page map copy to user failed, unmapping/unlocking all again\n");
                        RetVal = -EFAULT;
                    }
                    // the DrvId itself is copied back when the bulk_info structure is added to the list, see below
                }
                if (RetVal != 0)
                {
                    // unmap successfully mapped pages
                    for (j=0; j<i; ++j)
                    {
                        dma_unmap_page (&LvSmDevs[LvSmIndex].pDev->dev, PageAddrs[j]<<PAGE_SHIFT, PAGE_SIZE, DMA_FROM_DEVICE);
                        PageAddrs[j] = 0;
                    }
                    // and unlock all
                    for (i=0; i<pLockInfo->NrPages; ++i)
                    {
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,1)
                        page_cache_release(Pages[i]);
#else
                        put_page(Pages[i]);
#endif
                    }
                }
            }
        }
    }

    // the two arrays should be discarded in case of failure
    // otherwise the info is added to the list of locked buffers
    if (RetVal == 0)
    {
        pBufLk->Pages = Pages;
        pBufLk->PageAddrs = PageAddrs;
        pBufLk->NrPages = pLockInfo->NrPages;
        mutex_lock (&LvSmDevs[LvSmIndex].BufLockMutex);
        list_add (&pBufLk->list, &LvSmDevs[LvSmIndex].BufLockList);
        // and finally remember the pointer to the structure
        pLockInfo->DrvId = (uint64_t)(ptrdiff_t)pBufLk;
        if (copy_to_user (pLockInfoUser, pLockInfo, sizeof (sm_page_locks)))
        {
            // ooops, unexpected, so no cleanup here
            printk (KERN_DEBUG LVSM_MSG "dma buffer lock info copy to user failed, no cleanup performed\n");
            RetVal = -EFAULT;
        }
        mutex_unlock (&LvSmDevs[LvSmIndex].BufLockMutex);
    }
    if (RetVal != 0)
    {
        kfree (Pages);
        kfree (PageAddrs);
    }

    // the lock info structure is not needed any more
    kfree (pLockInfo);

    return RetVal;
}

// unlock buffer pages, and free associated arrays
static int UnlockBufferPages (buflk_info *pBufLk, int LvSmIndex)
{
    int RetVal = 0;
    int i;

    // unmap the DMA pages
    if (pBufLk->PageAddrs)
    {
        for (i=0; i<pBufLk->NrPages; ++i)
        {
            dma_unmap_page (&LvSmDevs[LvSmIndex].pDev->dev, pBufLk->PageAddrs[i]<<PAGE_SHIFT, PAGE_SIZE, DMA_FROM_DEVICE);
        }
        kfree (pBufLk->PageAddrs);
    }
    else
    {
        printk (KERN_DEBUG LVSM_MSG "could not unmap pages\n");
        RetVal = -EINVAL;
    }

    // now unlock the pages
    // get the pointer for the struct page pointers array
    if (pBufLk->Pages)
    {
        for (i=0; i<pBufLk->NrPages; ++i)
        {
            // flag the page dirty, to make clear it was probably modified by DMA and should not be forgotten
            SetPageDirty (pBufLk->Pages[i]);
            // unpin the page
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,1)
            page_cache_release (pBufLk->Pages[i]);
#else
            put_page (pBufLk->Pages[i]);
#endif
        }
        // the struct page pointers array should be also discarded, will not be needed any more
        kfree (pBufLk->Pages);
    }
    else
    {
        // failed
        printk (KERN_DEBUG LVSM_MSG "could not unlock pages\n");
        RetVal = -EINVAL;
    }

    return RetVal;
}

// unlock buffer
static int UnlockBuffer (sm_page_locks __user *pLockInfoUser, int LvSmIndex)
{
    sm_page_locks *pLockInfo = kmalloc (sizeof(sm_page_locks), GFP_KERNEL);
    buflk_info *pBufLk = NULL;
    int RetVal = 0;

    // copy the structure from user space
    if (!pLockInfo)
    {
        return -EINVAL;
    }
    if (copy_from_user (pLockInfo, pLockInfoUser, sizeof(sm_page_locks)))
    {
        return -EFAULT;
    }
    #ifdef DIAG_MODE
    printk (KERN_DEBUG LVSM_MSG "unlocking %d pages\n", (int)pLockInfo->NrPages);
    #endif

    pBufLk = (buflk_info*)(ptrdiff_t)pLockInfo->DrvId;
    if (pBufLk)
    {
        // unmap/unlock the pages
        UnlockBufferPages ((buflk_info *)(ptrdiff_t)pLockInfo->DrvId, LvSmIndex);
        // and remove the buffer info from the list
        mutex_lock (&LvSmDevs[LvSmIndex].BufLockMutex);
        list_del (&pBufLk->list);
        mutex_unlock (&LvSmDevs[LvSmIndex].BufLockMutex);
        // finally, let's free the buflk_info structure
        kfree (pBufLk);
    }
    else
    {
        printk (KERN_DEBUG LVSM_MSG "could not unmap/unlock buffer\n");
        RetVal = -EINVAL;
    }

    // the lock info structure is not needed any more
    // the Pages/PadeAddrs arrays get discared in UnlockBufferPages
    kfree (pLockInfo);

    return RetVal;
}

// unlock all buffers locked for given module
static int UnlockAllBuffers (int LvSmIndex)
{
    int RetVal = 0;
    buflk_info *pBufLk = NULL;
    buflk_info *pBufLkNext = NULL;

    // first unmap/unlock all
    list_for_each_entry (pBufLk, &LvSmDevs[LvSmIndex].BufLockList, list)
    {
        UnlockBufferPages (pBufLk, LvSmIndex);
    }
    // then empty the list
    mutex_lock (&LvSmDevs[LvSmIndex].BufLockMutex);
    list_for_each_entry_safe (pBufLk, pBufLkNext, &LvSmDevs[LvSmIndex].BufLockList, list)
    {
        list_del (&pBufLk->list);
        kfree (pBufLk);
    }
    mutex_unlock (&LvSmDevs[LvSmIndex].BufLockMutex);

    return RetVal;
}

// update DMA index table
static int UpdateDmaIndexTable (sm_dma_idx_tbl __user *pIndexTableUser, int LvSmIndex)
{
    sm_dma_idx_tbl IndexTable;
    int RetVal = 0;
    unsigned long sl_flags;
    uint32_t *TableBase;
    int i;

    // copy the structure from user space
    if (copy_from_user (&IndexTable, pIndexTableUser, sizeof(sm_dma_idx_tbl)))
    {
        return -EFAULT;
    }

    // find the corresponding virtual address
    // (assume the Offset is directly one of those mapped before)
    for (i=0; i<LVSM_MEMAREA_MAXENTRIES && LvSmDevs[LvSmIndex].DynMaps[i].Offset!=IndexTable.Offset; ++i) {};
    if (i==LVSM_MEMAREA_MAXENTRIES) // slot not found
    {
#ifdef DIAG_MODE_IDXTABLE
        printk (KERN_DEBUG LVSM_MSG "did not find the device memory mapping at %X\n", IndexTable.Offset);
#endif
        return -EFAULT;
    }
    // note that the address is stored as uint32_t* since it will be 32-bit indexed
    TableBase = (uint32_t*)LvSmDevs[LvSmIndex].DynMaps[i].Virtual;

#ifdef DIAG_MODE_IDXTABLE
    printk (KERN_DEBUG LVSM_MSG "frame index table: %d=%d %d=%d\n",
        IndexTable.Entries[0].Index, IndexTable.Entries[0].Value,
        IndexTable.Entries[1].Index, IndexTable.Entries[1].Value);
#endif

    // all DMA access guarded by the spinlock
    spin_lock_irqsave (&LvSmDevs[LvSmIndex].DmaSpinLock, sl_flags);

    // now the actual DMA table update
    iowrite32 (0x10001<<IndexTable.StreamId, LvSmDevs[LvSmIndex].Virtual+LVSM_DMA_IDXHALT);
    for (i=0; i<LVSM_IDXTBL_MAXENTRIES && IndexTable.Entries[i].Index!=LVSM_IDXTBL_NOENTRY; ++i)
    {
         iowrite32 (IndexTable.Entries[i].Value, TableBase + IndexTable.Entries[i].Index);
         //mj iowrite32 (0, TableBase + 9);
    }
    iowrite32 (0x10000<<IndexTable.StreamId, LvSmDevs[LvSmIndex].Virtual+LVSM_DMA_IDXHALT);

    // finished with the DMA access, unlock
    spin_unlock_irqrestore (&LvSmDevs[LvSmIndex].DmaSpinLock, sl_flags);

#ifdef DIAG_MODE_IDXTABLE
    char str[256]="frame index table:", idx[8];
    for (i=0; i<=10; i++) {
        sprintf(idx, " %2d", ioread32 (TableBase + i));
        strcat(str, idx);
    }
    printk (KERN_DEBUG LVSM_MSG "%s\n",str);
#endif

    return RetVal;
}

// update dummy index
static int UpdateDummyIndex (sm_dma_dummy_idx __user *pIndex, int LvSmIndex)
{
    sm_dma_dummy_idx DummyIndex;
    int RetVal = 0;

    // copy the structure from user space
    if (copy_from_user (&DummyIndex, pIndex, sizeof(sm_dma_dummy_idx)))
    {
        return -EFAULT;
    }

    LvSmDevs[LvSmIndex].IdxDummy[DummyIndex.StreamId] = DummyIndex.Value;

    return RetVal;
}

// Map device memory area
static int MapDeviceMemArea (sm_mem_area __user *pMemAreaUser, int LvSmIndex)
{
    int RetVal = 0;
    sm_mem_area MemArea;
    int i;

    // copy the structure from user space
    if (copy_from_user (&MemArea, pMemAreaUser, sizeof(sm_mem_area)))
    {
        return -EFAULT;
    }

    #ifdef DIAG_MODE
    printk (KERN_DEBUG LVSM_MSG "mapping device memory area %X/%X\n", MemArea.Offset, MemArea.Size);
    #endif

    // find a free slot
    for (i=0; i<LVSM_MEMAREA_MAXENTRIES && LvSmDevs[LvSmIndex].DynMaps[i].Virtual!=NULL; ++i) {};
    if (i==LVSM_MEMAREA_MAXENTRIES) // no free slot found
    {
        return -EFAULT;
    }

    // map it and store the info in the slot
    LvSmDevs[LvSmIndex].DynMaps[i].Virtual = (uint8_t*)ioremap_nocache(LvSmDevs[LvSmIndex].Physical+MemArea.Offset, MemArea.Size);
    if (!LvSmDevs[LvSmIndex].DynMaps[i].Virtual)
    {
        printk (KERN_DEBUG LVSM_MSG "failed to map device memory area at %X\n", MemArea.Offset);
        return -EFAULT;
    }
    LvSmDevs[LvSmIndex].DynMaps[i].Offset = MemArea.Offset;
    LvSmDevs[LvSmIndex].DynMaps[i].Size = MemArea.Size;

    return RetVal;
}

// Unmap device memory area
static int UnmapDeviceMemArea (sm_mem_area __user *pMemAreaUser, int LvSmIndex)
{
    int RetVal = 0;
    sm_mem_area MemArea;
    int i;

    // copy the structure from user space
    if (copy_from_user (&MemArea, pMemAreaUser, sizeof(sm_mem_area)))
    {
        return -EFAULT;
    }

    #ifdef DIAG_MODE
    printk (KERN_DEBUG LVSM_MSG "unmapping device memory area %X/%X\n", MemArea.Offset, MemArea.Size);
    #endif

    // find the corresponding slot
    for (i=0; i<LVSM_MEMAREA_MAXENTRIES && LvSmDevs[LvSmIndex].DynMaps[i].Offset!=MemArea.Offset; ++i) {};
    if (i==LVSM_MEMAREA_MAXENTRIES) // slot not found
    {
        printk (KERN_DEBUG LVSM_MSG "did not recognize the device memory area to be unmapped at %X\n", MemArea.Offset);
        return -EFAULT;
    }

    // unpam and clean it up
    iounmap (LvSmDevs[LvSmIndex].DynMaps[i].Virtual);
    memset (&LvSmDevs[LvSmIndex].DynMaps[i], 0, sizeof(LvSmDevs[LvSmIndex].DynMaps[i]));

    return RetVal;
}

// increment buffer index (used from irq handler)
static inline void IncrBufIdx (volatile unsigned *Idx, unsigned Incr)
{
    unsigned NewIdx = *Idx + Incr;
    barrier ();
    *Idx = (NewIdx >= SM_IRQBUF_SIZE) ? NewIdx-SM_IRQBUF_SIZE : NewIdx;			//(mw) corrected modulo operation
}

// device cleanup
// called upon releasing the device from user space
// performs safety cleanup to shut everything down properly even in case of kill/crash
// IMPORTANT: expects that user space keeps the device open as long as it needs it
static void DeviceCleanup (int LvSmIndex)
{
    int i;
#if 1
    if (LvSmDevs[LvSmIndex].Virtual) {
        uint32_t ModInfo  = ioread32 (LvSmDevs[LvSmIndex].Virtual+LVSM_SBCTR_MODINFO);
        // system bus not available
        if (ModInfo==0xFFFFFFFF) {
            // don't write to FPGA register
            printk (KERN_DEBUG LVSM_MSG "system bus not available!!!\n");
        } else {
            // clean IRQ mask and stop DMA
            iowrite32 (0x10000, LvSmDevs[LvSmIndex].Virtual+LVSM_DMA_EN);
            iowrite32 (0x00000, LvSmDevs[LvSmIndex].Virtual+LVSM_IRQ_EN);
        }
    }
#endif
    LvSmDevs[LvSmIndex].IrqMask = 0;

    // release locked buffers if any
    UnlockAllBuffers (LvSmIndex);

    // unmap dynamically mapped device memory areas if any
    for (i=0; i<LVSM_MEMAREA_MAXENTRIES; ++i)
    {
        if (LvSmDevs[LvSmIndex].DynMaps[i].Virtual)
        {
            iounmap (LvSmDevs[LvSmIndex].DynMaps[i].Virtual);
        }
    }
    memset (&LvSmDevs[LvSmIndex].DynMaps, 0, sizeof(LvSmDevs[LvSmIndex].DynMaps));
}

// service driver cleanup
// called upon releasing the device from user space
// performs safety cleanup to shut everything down properly even in case of kill/crash
// IMPORTANT: expects that user space keeps the device open as long as it needs it
static void ServiceCleanup (void)
{
    // nothing in the moment
}


// init
static int LvSm_init (void)
{
    int Result = 0;
    int i,j;
    int GetAux = 1;	// allways look for aux devices
    u16 Val16;

    struct pci_dev *NextDev = NULL;

    printk (KERN_DEBUG LVSM_MSG "module init - %s\n", DRV_VERSION);

    // preparation
    memset (LvSmDevs, 0, sizeof(LvSmDevs));

    // register
    Result = register_chrdev(0, LvSm_String, &LvSm_fops);
    if (Result<0)
    {
        printk (KERN_DEBUG LVSM_MSG "register_chrdev failed\n");
        return Result;
    }
    else
    {
        LvSm_DevMajor = Result;
        printk (KERN_DEBUG LVSM_MSG "device registered with major id %d\n", LvSm_DevMajor);
    }

    // look up the streaming devices
    // for now just a few, so keep it simple
    NrSm = 0;
    printk (KERN_DEBUG LVSM_MSG "look for streaming devices:\n");

    //
    // pci_get_device - begin or continue searching for a PCI device by vendor/device id
    //
    // Iterates through the list of known PCI devices.
    // If a PCI device is found with a matching vendor and device, the reference count to the device is incremented and a pointer to its device structure is returned.
    // Otherwise, NULL is returned.
    // A new search is initiated by passing NULL as the from argument.
    // Otherwise if from is not NULL, searches continue from next device on the global list.
    // The reference count for from is always decremented if it is not NULL.
    while ((NextDev=pci_get_device(LVSM_VENDOR_ID, PCI_ANY_ID, NextDev)))
    {
        printk (KERN_DEBUG LVSM_MSG "  (%2.2x:%2.2x.%x) %.4X:%.4X refcount=%d\n",
                NextDev->bus->number, NextDev->devfn>>3, NextDev->devfn&7,
                NextDev->vendor, NextDev->device,
                NextDev->dev.kobj.kref.refcount);

 
        if (   NextDev->device!=LVSM_DEVICE_ID_CBCM
            && NextDev->device!=LVSM_DEVICE_ID_CSCM
            && NextDev->device!=LVSM_DEVICE_ID_CS3CM )
        {
            continue;
        }
        // there is a CorSight device
        pci_dev_get (NextDev);  // increment reference count
        LvSmDevs[NrSm].pDev  = NextDev;
        LvSmDevs[NrSm].Index = NrSm;
        LvSmDevs[NrSm].pSelf = &LvSmDevs[NrSm];               
        init_waitqueue_head (&LvSmDevs[NrSm].WaitQueue);
        spin_lock_init      (&LvSmDevs[NrSm].DmaSpinLock);
        sprintf (LvSmDevs[NrSm].Desc, "%s%d", LvSm_String, NrSm);
        if (pci_enable_device(LvSmDevs[NrSm].pDev))
        {
            printk (KERN_DEBUG LVSM_MSG "  could not enable device[%d]\n", NrSm);
            // keep it listed to stay in sync with user space
        }
        else
        {
            LvSmDevs[NrSm].Enabled = 1;
            printk (KERN_DEBUG LVSM_MSG "  enabled device[%d]\n", NrSm);
        }

        //mw Corsight II can handle 44 bit bus addresses
        //mw with 32 it does not even work!
        if (OS64BIT && !pci_set_dma_mask(LvSmDevs[NrSm].pDev, DMA_BIT_MASK(44)))
        {
            printk (KERN_DEBUG LVSM_MSG "  DMA mask set to 44 bit for device[%d]\n", NrSm);
        }
        // Corsight I can handle only 32 bit bus addresses
        else if (!pci_set_dma_mask(LvSmDevs[NrSm].pDev, DMA_BIT_MASK(32)))
        {
            printk (KERN_DEBUG LVSM_MSG "  DMA mask set to 32 bit for device[%d]\n", NrSm);
        }
        else
        {
            printk (KERN_DEBUG LVSM_MSG "  could not set DMA mask for device[%d]\n", NrSm);
            // should it stay enabled?
        }

        // remember addresses
        LvSmDevs[NrSm].Physical = LvSmDevs[NrSm].pDev->resource[0].start;
        LvSmDevs[NrSm].Virtual  = (uint8_t*)ioremap_nocache(LvSmDevs[NrSm].Physical, LVSM_MAP_SIZE);
        printk (KERN_DEBUG LVSM_MSG "  physical address of device[%d] %08lx\n", NrSm, (uintptr_t)LvSmDevs[NrSm].Physical);

        if (!LvSmDevs[NrSm].Virtual)
        {
            printk (KERN_DEBUG LVSM_MSG "  failed to obtain virtual address of device[%d]\n", NrSm);
            // should it stay enabled?
            // TBD: check how and if this should be reported to user space
        }
        else
        {
            printk (KERN_DEBUG LVSM_MSG "  virtual  address of device[%d] %p\n", NrSm, LvSmDevs[NrSm].Virtual);
        }


        // pci config space
        pci_read_config_word(LvSmDevs[NrSm].pDev, PCI_COMMAND, &Val16);
        printk (KERN_DEBUG LVSM_MSG "  pci config space: command @04 %04x\n", Val16);
        pci_read_config_word(LvSmDevs[NrSm].pDev, PCI_STATUS, &Val16);
        printk (KERN_DEBUG LVSM_MSG "  pci config space: status  @06 %04x\n", Val16);

        // clear pci config status
        pci_write_config_word(LvSmDevs[NrSm].pDev, PCI_STATUS, 0xffff);
        // enable Legacy Interrupt
        pci_write_config_word(LvSmDevs[NrSm].pDev, PCI_COMMAND, 0x0002);
        // disable Legacy Interrupt to simulate VMI hangup
        //pci_read_config_word (LvSmDevs[NrSm].pDev, PCI_COMMAND, &Val16);
        //pci_write_config_word(LvSmDevs[NrSm].pDev, PCI_COMMAND,  Val16 | 0x0400);

        pci_read_config_word(LvSmDevs[NrSm].pDev, PCI_COMMAND, &Val16);
        printk (KERN_DEBUG LVSM_MSG "  pci config space: command @04 %04x\n", Val16);
        pci_read_config_word(LvSmDevs[NrSm].pDev, PCI_STATUS, &Val16);
        printk (KERN_DEBUG LVSM_MSG "  pci config space: status  @06 %04x\n", Val16);

        // irq
        // TBD: check usage and deprecation of IRQF_DISABLED
        #if !defined(IRQF_DISABLED)
        #define IRQF_DISABLED 0x00
        #endif
        Result = request_irq (LvSmDevs[NrSm].pDev->irq, LvSm_irq, IRQF_SHARED|IRQF_DISABLED, LvSmDevs[NrSm].Desc, (void*)LvSmDevs[NrSm].pSelf);
        switch (Result)
        {
        case -EINVAL:
            printk (KERN_DEBUG LVSM_MSG "  request_irq: bad irq number or handler for device[%d]\n", NrSm);
            break;
        case -EBUSY:
            printk (KERN_DEBUG LVSM_MSG "  request_irq: irq busy for device[%d] irq=%d\n", NrSm, LvSmDevs[NrSm].pDev->irq);
            break;
        default:
            if (Result<0)
            {
                printk (KERN_DEBUG LVSM_MSG "  request_irq: general irq error for device[%d]\n", NrSm);
            }
            else
            {
                printk (KERN_DEBUG LVSM_MSG "  request_irq: success for device[%d] irq=%d\n", NrSm, LvSmDevs[NrSm].pDev->irq);
            }
            break;
        }

        // set bus master
        pci_set_master(LvSmDevs[NrSm].pDev);
        pci_read_config_word(LvSmDevs[NrSm].pDev, PCI_COMMAND, &Val16);
        printk (KERN_DEBUG LVSM_MSG "  pci_set_master: command @04 %04x\n", Val16);

        // init helper structures
        mutex_init     (&LvSmDevs[NrSm].BufLockMutex);
        INIT_LIST_HEAD (&LvSmDevs[NrSm].BufLockList);

        // init the device
        // (no actions needed for now)

        // adjust flags used by latter init phase(s)
        if (NextDev->device==LVSM_DEVICE_ID_CBCM || NextDev->device==LVSM_DEVICE_ID_CSCM || NextDev->device==LVSM_DEVICE_ID_CS3CM)
        {
            GetAux = 1;
        }

        // increment the counter and look for more devices
        if (NrSm < MAX_SM_DEVS-1) NrSm++;
    }

    // no device sorting should be needed, all in plain list, looks the same in user space

    // look up the auxiliary devices if needed
    // for now just a few, so keep it simple
    // still reporting just the essential needed devices
    NrAux = 0;
    printk (KERN_DEBUG LVSM_MSG "look for aux devices:\n");
    if (GetAux)
    {
        // find the required Intel devices
        NextDev = NULL;
        while ((NextDev=pci_get_device(0x8086, PCI_ANY_ID, NextDev)))
        {

            printk (KERN_DEBUG LVSM_MSG "  (%2.2x:%2.2x.%x) %.4X:%.4X refcount=%d\n",
                NextDev->bus->number, NextDev->devfn>>3, NextDev->devfn&7,
                NextDev->vendor, NextDev->device,
                NextDev->dev.kobj.kref.refcount);

            if (// corsight1
                   NextDev->device!=0x8800  // Main
                && NextDev->device!=0x8801  // ROM
                && NextDev->device!=0x8816  // SPI
                && NextDev->device!=0x8803  // GPIO
                && NextDev->device!=0x8186  // LPC

                // corsight2
                && NextDev->device!=0x0F00  // SOC
                && NextDev->device!=0x0F12  // PCU SMBUS
                && NextDev->device!=0x0F1C  // PCU LPC
                && NextDev->device!=0x0F48  // PCIe Root Port 1
                && NextDev->device!=0x0F4C  // PCIe Root Port 3
                && NextDev->device!=0x0F4E  // PCIe Root Port 4
                
                // corsight3
                && NextDev->device!=0x5AF0  // SOC
                && NextDev->device!=0x5AD4  // SMBUS
                && NextDev->device!=0x5A96  // SPI
                && NextDev->device!=0x5AE8  // LPC
                && NextDev->device!=0x5AD8  // PCIe Root Port 1
                && NextDev->device!=0x5AD9  // PCIe Root Port 2
                && NextDev->device!=0x5ADA  // PCIe Root Port 3
                && NextDev->device!=0x5ADB) // PCIe Root Port 4
            {
                continue;
            }

            pci_dev_get (NextDev);  // increment reference count
            AuxDevs[NrAux].pDev = NextDev;


            // that should suffice for now, just report, no init-like stuff

            // BR 180712 - the topcliff ROM boots with memory mapping disabled.
            // Under Win we can enable it on the fly by directly accessing its PCI
            // configuration space. This is not possible under Linux therefore
            // we need to do it here, we are now pretty sure we have a CS2 in our hands....
            if (NextDev->device==0x8801) {
                u32 Val;
                pci_read_config_dword(NextDev, PCI_ROM_ADDRESS, &Val);
                if ((Val & 0xfffffff0) && ((Val & 0x1)==0)) {
                    Val|=1;
                    pci_write_config_dword(NextDev, PCI_ROM_ADDRESS, Val);
                }
            }
            // increment the counter and look for more devs
            if (NrAux < MAX_AUX_DEVS-1) NrAux++;
        }
    }

    // ok, report the results
    printk (KERN_INFO LVSM_MSG "number of detected streaming devices: %d\n", NrSm);
    for (i=0; i<NrSm; i++)
    {
        printk (KERN_DEBUG LVSM_MSG "lvsm device[%d]: (%2.2x:%2.2x.%x) %.4X:%.4X refcount=%d\n", i,
            LvSmDevs[i].pDev->bus->number, LvSmDevs[i].pDev->devfn>>3, LvSmDevs[i].pDev->devfn&7,
            LvSmDevs[i].pDev->vendor, LvSmDevs[i].pDev->device,
            LvSmDevs[i].pDev->dev.kobj.kref.refcount);
    }
    printk (KERN_INFO LVSM_MSG "number of detected aux devices: %d\n", NrAux);
    for (j=0; j<NrAux; j++)
    {
        printk (KERN_DEBUG LVSM_MSG " aux device[%d]: (%2.2x:%2.2x.%x) %.4X:%.4X refcount=%d\n", j,
            AuxDevs[j].pDev->bus->number, AuxDevs[j].pDev->devfn>>3, AuxDevs[j].pDev->devfn&7,
            AuxDevs[j].pDev->vendor, AuxDevs[j].pDev->device,
            AuxDevs[j].pDev->dev.kobj.kref.refcount);
    }

    // create the proc entry for diagnostics
#  if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
    //mw replacement for newer kernels
    proc_create(LvSm_String, 0, NULL, &LvSm_proc_fops);
#  else
    create_proc_read_entry(LvSm_String, 0, NULL, LvSm_proc_read, NULL);
#  endif

    printk (KERN_DEBUG LVSM_MSG "module loaded\n");

    // report success?
    return 0;
}

// exit
static void LvSm_exit (void)
{
    int i,j;
    struct pci_dev *NextDev = NULL;

    printk (KERN_DEBUG LVSM_MSG "module exit\n");

    remove_proc_entry (LvSm_String, NULL);      // all kernel versions

    // cleanup streaming devices
    for (i=0; i<NrSm; ++i)
    {
        NextDev=LvSmDevs[i].pDev;

        // dev spec cleanup?
        // (no actions needed for now)
        DeviceCleanup(i);

        // cleanup helper structures
        mutex_destroy (&LvSmDevs[i].BufLockMutex);

        // clear bus master
        pci_clear_master(LvSmDevs[i].pDev);

        // irq
        free_irq (LvSmDevs[i].pDev->irq, LvSmDevs[i].pSelf);

        // unmap
        if (LvSmDevs[i].Virtual) {
            iounmap (LvSmDevs[i].Virtual);
            LvSmDevs[i].Virtual = NULL;
        }

        // disable
        pci_disable_device (LvSmDevs[i].pDev);
        LvSmDevs[i].Enabled = 0;

        // forget
        printk (KERN_DEBUG LVSM_MSG "lvsm device[%d]: (%2.2x:%2.2x.%x) %.4X:%.4X refcount=%d\n", i,
            LvSmDevs[i].pDev->bus->number, LvSmDevs[i].pDev->devfn>>3, LvSmDevs[i].pDev->devfn&7,
            LvSmDevs[i].pDev->vendor, LvSmDevs[i].pDev->device,
            LvSmDevs[i].pDev->dev.kobj.kref.refcount);

        pci_dev_put (LvSmDevs[i].pDev);  // decrement reference count
        LvSmDevs[i].pDev = NULL;
    }

    // cleanup aux devices
    for (j=0; j<NrAux; j++)
    {
        printk (KERN_DEBUG LVSM_MSG " aux device[%d]: (%2.2x:%2.2x.%x) %.4X:%.4X refcount=%d\n", j,
            AuxDevs[j].pDev->bus->number, AuxDevs[j].pDev->devfn>>3, AuxDevs[j].pDev->devfn&7,
            AuxDevs[j].pDev->vendor, AuxDevs[j].pDev->device,
            AuxDevs[j].pDev->dev.kobj.kref.refcount);

        pci_dev_put (AuxDevs[j].pDev);  // decrement reference count
        AuxDevs[j].pDev = NULL;
    }

    // unregister
    unregister_chrdev (LvSm_DevMajor, LvSm_String);

    printk (KERN_DEBUG LVSM_MSG "module unloaded\n");
}

// open
static int LvSm_open(struct inode *inode, struct file *file)
{
    int LvSmIndex, FdType;
    int Result = GetFileIds (file, &LvSmIndex, &FdType);

    if (Result) {
        printk (KERN_DEBUG LVSM_MSG "module open failed!\n");
        return -EINVAL;
    }

    switch (FdType)
    {
        default:
            printk (KERN_DEBUG LVSM_MSG "open file not recognized\n");
            return -EINVAL;

        case LVSM_FDTYPE_DEV:
            // refuse re-opening of already opened device (see notes about the release method)
            if (LvSmDevs[LvSmIndex].Opened) {
                printk (KERN_DEBUG LVSM_MSG "device %d already opened\n", LvSmIndex);
                return -EBUSY;
            }
            // otherwise proceed silently
            printk (KERN_DEBUG LVSM_MSG "open device %d\n", LvSmIndex);
            LvSmDevs[LvSmIndex].Opened = 1;
            try_module_get(THIS_MODULE);
            break;

        case LVSM_FDTYPE_DRV:
            // init the drv interface info
            printk (KERN_DEBUG LVSM_MSG "open driver\n");
            InitDrvIfInfo ();
            break;

        case LVSM_FDTYPE_SVC:
            printk (KERN_DEBUG LVSM_MSG "open service\n");
            break;
    }

    // nothing special needed for now

    return 0;
}

// release
static int LvSm_release(struct inode *inode, struct file *file)
{
    int LvSmIndex, FdType;
    int Result = GetFileIds (file, &LvSmIndex, &FdType);

    if (Result) {
        printk (KERN_DEBUG LVSM_MSG "module release failed!\n");
        return -EINVAL;
    }

    switch (FdType)
    {
        default:
            printk (KERN_DEBUG LVSM_MSG "release file not recognized\n");
            return -EINVAL;

        case LVSM_FDTYPE_DRV:
            printk (KERN_DEBUG LVSM_MSG "release driver\n");
            break;

        case LVSM_FDTYPE_DEV:
            // perform safety cleanup to be sure there's no rubbish left even in case of app kill/crash
            // IMPORTANT: this assumes that userspace keeps the dev file open as long as the device is used
            // multiple opens should be prohibited to prevent race (helps also multi-process handling)
            // if that cannot be assured, the cleanup will need to be solved different way
            if (LvSmDevs[LvSmIndex].Opened) {
                printk (KERN_DEBUG LVSM_MSG "release device %d\n", LvSmIndex);
                DeviceCleanup (LvSmIndex);
            } else {
                printk (KERN_DEBUG LVSM_MSG "device %d already released\n", LvSmIndex);
            }
            LvSmDevs[LvSmIndex].Opened = 0;
            module_put(THIS_MODULE);
            break;

        case LVSM_FDTYPE_SVC:
            printk (KERN_DEBUG LVSM_MSG "release service\n");
            ServiceCleanup ();
            break;
    }

    // nothing special needed for now

    return 0;
}

// mmap
static int LvSm_mmap(struct file *file, struct vm_area_struct *vma)
{
    int LvSmIndex, FdType;
    int Result = GetFileIds (file, &LvSmIndex, &FdType);
    unsigned long Size = vma->vm_end - vma->vm_start;
    unsigned long Offset = vma->vm_pgoff << PAGE_SHIFT;

    #ifdef DIAG_MODE
    printk (KERN_DEBUG LVSM_MSG "mmap    (type %3d idx %d)\n", FdType, LvSmIndex);
    #endif
    if (Result) return -EINVAL;

    // map the memory
    // currently only the board memory is recognized for mapping
    switch (FdType)
    {
        default:
            printk (KERN_DEBUG LVSM_MSG "mmap file not recognized\n");
            return -EINVAL;

        case LVSM_FDTYPE_DEV:
            if (LvSmDevs[LvSmIndex].Virtual==NULL)
            {
                printk (KERN_DEBUG LVSM_MSG "mmap failed (invalid board virtual addr)\n");
                return -EINVAL;
            }
            else
            {
                if (io_remap_pfn_range (vma, vma->vm_start, LvSmDevs[LvSmIndex].Physical >> PAGE_SHIFT, LVSM_MEM_SIZE, PAGE_SHARED))
                {
                    printk (KERN_DEBUG LVSM_MSG "mmap failed (io_remap_pfn_range)\n");
                    return -EAGAIN;
                }
            }
            break;

        case LVSM_FDTYPE_SVC:
            if (io_remap_pfn_range (vma, vma->vm_start, Offset >> PAGE_SHIFT, Size, PAGE_SHARED))
            {
                printk (KERN_DEBUG LVSM_MSG "mmap failed (io_remap_pfn_range)\n");
                return -EAGAIN;
            }
            break;
    }

    return 0;
}

// read
static ssize_t LvSm_read (struct file *file, char *Buffer, size_t Count, loff_t *f_pos)
{
    int LvSmIndex, FdType;
    int Result = GetFileIds (file, &LvSmIndex, &FdType);

    #ifdef DIAG_MODE_EXTRA
    printk (KERN_DEBUG LVSM_MSG "read    (type %3d idx %d)\n", FdType, LvSmIndex);
    #endif
    if (Result) return -EINVAL;

    // should be touched just for the DRV file
    switch (FdType)
    {
    default:
        printk (KERN_DEBUG LVSM_MSG "read file not recognized\n");
        return -EINVAL;

    case LVSM_FDTYPE_DRV:
        {
            // delivers the info about detected devices
            char *pInfo = (char*)&LvSmDrvInfo;
            int Size = sizeof (LvSmDrvInfo);
            if (*f_pos > Size) return 0;
            if (*f_pos+Count > Size) Count = Size - *f_pos;
            *pInfo += *f_pos;

            if (copy_to_user (Buffer, pInfo, Count))
            {
                return -EFAULT;
            }

            *f_pos += Count;
            return Count;
        }

    case LVSM_FDTYPE_DEV:
        {
            // reads info about delivered irqs
            // TBD: once deeply tested, add security through semaphores if needed
            size_t NrAvailable = 0;
            size_t NrRead = 0;
            int NrToEnd;
            int IdxR, IdxW;
            // wait for the event
            while (LvSmDevs[LvSmIndex].IrqBufIdxW==LvSmDevs[LvSmIndex].IrqBufIdxR)      // nothing to read
            {
                wait_event (LvSmDevs[LvSmIndex].WaitQueue, LvSmDevs[LvSmIndex].IrqBufIdxW!=LvSmDevs[LvSmIndex].IrqBufIdxR);
            }
            // seems we have real data
            IdxR = LvSmDevs[LvSmIndex].IrqBufIdxR;
            IdxW = LvSmDevs[LvSmIndex].IrqBufIdxW;
            NrAvailable = (IdxW > IdxR) ? IdxW-IdxR : IdxW-IdxR+SM_IRQBUF_SIZE;	        //mw use full modulo operation {1,..,SM_IRQBUF_SIZE}
            NrRead = min (NrAvailable, Count);

            // increment the read index
            IncrBufIdx (&LvSmDevs[LvSmIndex].IrqBufIdxR, NrRead);
            // and now copy the data using the remembered old indexes
            // TBD: consider non-zero f_pos, but should not be needed

            //mw info is in one consecutive memory space
            NrToEnd = SM_IRQBUF_SIZE-IdxR;
            if (NrRead <= NrToEnd) {
                int NrBytes = NrRead*sizeof(sm_irq_info);
                if (copy_to_user(Buffer, &LvSmDevs[LvSmIndex].IrqInfos[IdxR], NrBytes)) {
                    return -EFAULT;
                }
            }

            //mw info is split into two memory parts
            else {
                int NrBytes1stPart = (NrToEnd) * sizeof(sm_irq_info);
                int NrBytes2ndPart = (NrRead-NrToEnd) * sizeof(sm_irq_info);
                if (copy_to_user(Buffer               , &LvSmDevs[LvSmIndex].IrqInfos[IdxR], NrBytes1stPart)) {
                    return -EFAULT;
                }
                if (copy_to_user(Buffer+NrBytes1stPart, &LvSmDevs[LvSmIndex].IrqInfos[   0], NrBytes2ndPart)) {
                    return -EFAULT;
                }
            }

            //// reset IdxOld with each read (security: allow some dummy irqs)
            //LvSmDevs[LvSmIndex].IdxOld = -1;  // results in many dummy buffers

            *f_pos += NrRead*sizeof(sm_irq_info);
            return NrRead*sizeof(sm_irq_info);
        }

    case LVSM_FDTYPE_SVC:
        {
            // reads from an io port
            char IoBuf[4];
            unsigned Address = *f_pos;

            switch (Count)
            {
            case 1:
                *(uint8_t*)IoBuf = inb (Address);
                break;
            case 2:
                *(uint16_t*)IoBuf = inw (Address);
                break;
            case 4:
                *(uint32_t*)IoBuf = inl (Address);
                break;
            default:
                return -EFAULT;
            }

            if (copy_to_user (Buffer, IoBuf, Count))
            {
                return -EFAULT;
            }

            // no need to readjust f_pos, this relies always explicit lseek
            return Count;
        }
    }
}

// write
static ssize_t LvSm_write (struct file *file, const char *Buffer, size_t Count, loff_t *f_pos)
{
    int LvSmIndex, FdType;
    int Result = GetFileIds (file, &LvSmIndex, &FdType);

    #ifdef DIAG_MODE_EXTRA
    printk (KERN_DEBUG LVSM_MSG "write   (type %3d idx %d)\n", FdType, LvSmIndex);
    #endif
    if (Result) return -EINVAL;

    // should be touched just for the DRV file
    switch (FdType)
    {
        default:
            printk (KERN_DEBUG LVSM_MSG "write file not recognized\n");
            return -EINVAL;

        case LVSM_FDTYPE_SVC:
            {
                // writes to an io port
                char IoBuf[4];
                unsigned Address = *f_pos;

                if (copy_from_user (IoBuf, Buffer, min(sizeof(IoBuf),Count)))
                {
                    return -EFAULT;
                }

                switch (Count)
                {
                    case 1:
                        outb (*(uint8_t*)IoBuf, Address);
                        break;
                    case 2:
                        outw (*(uint16_t*)IoBuf, Address);
                        break;
                    case 4:
                        outl (*(uint32_t*)IoBuf, Address);
                        break;
                    default:
                        return -EFAULT;
                }

                // no need to readjust f_pos, this relies always explicit lseek
                return Count;
            }
    }

    // nothing needed for now
    // (to be considered later)

    return 0;
}

// llseek
static loff_t LvSm_llseek(struct file *file, loff_t offset, int origin)
{
    int LvSmIndex, FdType;
    int Result = GetFileIds (file, &LvSmIndex, &FdType);

    #ifdef DIAG_MODE_EXTRA
    printk (KERN_DEBUG LVSM_MSG "llseek  (type %3d idx %d)\n", FdType, LvSmIndex);
    #endif
    if (Result) return -EINVAL;

    // should be touched just for the DRV file
    switch (FdType)
    {
        default:
            printk (KERN_DEBUG LVSM_MSG "llseek file not recognized\n");
            return -ESPIPE;

        case LVSM_FDTYPE_SVC:
            file->f_pos = offset;
            return offset;
    }

    // nothing needed for now
    // (to be considered later)

    return 0;
}

// ioctl
#ifdef HAVE_UNLOCKED_IOCTL // true since 2.6.11
static long LvSm_unlocked_ioctl (struct file *file, unsigned int cmd, unsigned long arg)
#else
static int LvSm_ioctl (struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
#endif
{
    int LvSmIndex, FdType;
    int Result = GetFileIds (file, &LvSmIndex, &FdType);

    #ifdef DIAG_MODE
    printk (KERN_DEBUG LVSM_MSG "ioctl   (type %3d idx %d)\n", FdType, LvSmIndex);
    #endif
    if (Result) return -EINVAL;
    if (_IOC_TYPE(cmd)!=LVSM_IOC_MAGIC) return -EINVAL;

    switch (FdType)
    {
        default:
            printk (KERN_DEBUG LVSM_MSG "ioctl file not recognized\n");
            return -EINVAL;

        case LVSM_FDTYPE_DRV:
            // now check the actual ioctl
            switch (cmd)
            {
                default:
                    return -ENOTTY;
            }
            break;

        case LVSM_FDTYPE_DEV:
            switch (cmd)
            {
                case LVSM_IOC_LOCK:
                    if (0!=LockBuffer ((sm_page_locks __user *)arg, LvSmIndex))
                    {
                        return -EINVAL;
                    }
                    break;

                case LVSM_IOC_UNLOCK:
                    if (0!=UnlockBuffer ((sm_page_locks __user *)arg, LvSmIndex))
                    {
                        return -EINVAL;
                    }
                    break;
                case LVSM_IOC_IRQMASK:
                    LvSmDevs[LvSmIndex].IrqMask = (uint32_t) arg;
#ifdef DIAG_MODE_IRQ
                    printk (KERN_DEBUG LVSM_MSG "ioctl IrqMask %04X\n", LvSmDevs[LvSmIndex].IrqMask);
#endif
                    break;
                case LVSM_IOC_DMAIDXTBL:
                    if (0!=UpdateDmaIndexTable ((sm_dma_idx_tbl __user *)arg, LvSmIndex))
                    {
                        return -EINVAL;
                    }
                    break;
                case LVSM_IOC_MAPAREA:
                    if (0!=MapDeviceMemArea ((sm_mem_area __user *)arg, LvSmIndex))
                    {
                        return -EINVAL;
                    }
                    break;
                case LVSM_IOC_UNMAPAREA:
                    if (0!=UnmapDeviceMemArea ((sm_mem_area __user *)arg, LvSmIndex))
                    {
                        return -EINVAL;
                    }
                    break;
                case LVSM_IOC_DUMMYIDX:
                    if (0!=UpdateDummyIndex ((sm_dma_dummy_idx __user *)arg, LvSmIndex))
                    {
                        return -EINVAL;
                    }
                    break;

                default:
                    return -ENOTTY;
            }
            break;
    }

    return 0;
}

// poll
static unsigned int LvSm_poll (struct file *file, struct poll_table_struct *poll_table)
{
    int LvSmIndex, FdType;
    int Result = GetFileIds (file, &LvSmIndex, &FdType);

    #ifdef DIAG_MODE_EXTRA
    printk (KERN_DEBUG LVSM_MSG "poll    (type %3d idx %d)\n", FdType, LvSmIndex);
    #endif
    if (Result) return 0;

    switch (FdType)
    {
        default:
            printk (KERN_DEBUG LVSM_MSG "poll file not recognized\n");
            return 0;

        case LVSM_FDTYPE_DRV:
            // show always as readable/writeable
            return POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM;

        case LVSM_FDTYPE_DEV:
            {
                // currently ignore the WaitKilled attitude and it thus also does not handle the end-of-file status
                poll_wait (file, &LvSmDevs[LvSmIndex].WaitQueue, poll_table);
                if (LvSmDevs[LvSmIndex].IrqBufIdxW!=LvSmDevs[LvSmIndex].IrqBufIdxR)
                {
                    return POLLIN | POLLRDNORM;
                }
                else
                {
                    return 0;
                }
            }
    }
}

// irq
static irqreturn_t LvSm_irq (int irq, void *dev_id)
{
    int Handled = 0;
    int i;
    static uint32_t DmaCount[2] = {0,0};
    sm_info *pLvSmDev = (sm_info*)dev_id;

#ifdef DIAG_MODE_IRQ
    int j;
#endif
#ifdef DIAG_MODE_IRQ_DMA
    static int DebugCount = 1000000;
#endif

    // process the irq if ours
    if (pLvSmDev && pLvSmDev->Virtual)
    {
         int16_t DmaIdx[16];
        uint16_t DmaIdxVal;

        //   Irq0
        //   +----+------------------------------
        //   |Bit | Description
        //   +----+------------------------------
        //   | 0  | DMA EOF Channel 0
        //   | 1  | DMA EOF Channel 1
        //   | 2  | DMA Err Channel 0
        //   | 3  | DMA Err Channel 1
        //   +----+------------------------------
        //   | 4  | FMB Rx Channel 0, Threshold 1
        //   | 5  | FMB Rx Channel 1, Threshold 1 (unused)
        //   | 6  | FMB Rx Channel 0, Threshold 2
        //   | 7  | Interrupt TimeOut
        //   +----+------------------------------
        //   | 8  | Timer 0, ChunkEndOfFrame channel 0
        //   | 9  | Timer 1, FrameRateTimer  channel 0
        //   |10  | Timer 2
        //   |11  | Timer 3
        //   +----+------------------------------
        //   |12  | PCIe Byte Enable
        //   |13  | LVSB Tx Error
        //   +----+------------------------------
        //   |14  | RS232 TxFifo empty
        //   |15  | RS232 RxFifo not empty
        //   |16  | RS232 TxFIFO overflow
        //   |17  | RS232 RxFIFO overflow
        //   +----+-------------------------------
        //   |18  | Timer 4, ChunkEndOfFrame channel 1
        //   |19  | Timer 5, FrameRateTimer  channel 1
        //   |20  | Timer 6
        //   |21  | Timer 7
        //   +----+------------------------------
        //   |22  | EvtLog
        //   |23  | QuadError
        //   |24  | QuadRevolution
        //   |25  | Custom0
        //   |26  | Custom1
        //   |27  | FrameMsgError0
        //   |28  | FrameMsgError1
        //   +----+------------------------------
        //   |29  | XADC alarm
        //   |30  | Geo Correction channel 0
        //   |31  | Geo Correction channel 1
        //   +----+------------------------------

        uint32_t ModInfo  = ioread32 (pLvSmDev->Virtual+LVSM_SBCTR_MODINFO);
        uint32_t IrqStat  = ioread32 (pLvSmDev->Virtual+LVSM_IRQ_STAT);
        uint32_t IrqLoss  = ioread32 (pLvSmDev->Virtual+LVSM_IRQ_CLR);
#ifdef DIAG_MODE_IRQ_DEBUG
        uint32_t IrqEn    = ioread32 (pLvSmDev->Virtual+LVSM_IRQ_EN);
        uint32_t IrqPend  = ioread32 (pLvSmDev->Virtual+LVSM_IRQ_PEND);
        uint32_t IrqCount = ioread32 (LvSmDevs->Virtual+LVSM_IRQ_COUNT);
        uint32_t OutCount = ioread32 (LvSmDevs->Virtual+LVSM_OUT_COUNT);
        uint32_t LatCount = ioread32 (LvSmDevs->Virtual+LVSM_LAT_COUNT);
#endif

        pLvSmDev->KernelIrqCount++;

        // not our irq
        if (IrqStat==0)  pLvSmDev->EmptyIrqCount++;

        // system bus not available
        if (ModInfo==0xFFFFFFFF) IrqStat=0;

        // timeout irq
        if (IrqStat & 0x0080) pLvSmDev->TimeoutIrqCount++;

#ifdef DIAG_MODE_IRQ_DEBUG
        printk (KERN_DEBUG LVSM_MSG "irq%d kernel=%4d empty=%4d timeout=%4d, pend<%X> loss<%4X> stat<%4X> en<%4X> irqcnt=%4d outcnt=%4d latcnt=%dus\n",
                                     irq,
                                     pLvSmDev->KernelIrqCount, pLvSmDev->EmptyIrqCount, pLvSmDev->TimeoutIrqCount,
                                     IrqPend, IrqLoss, IrqStat, IrqEn, IrqCount, OutCount, LatCount*100);
#endif

        if (IrqStat)
        {
#ifdef DIAG_MODE_IRQ_DEBUG
            // ignore all irq until timeout counter reaches threshold
            //if (OutCount<20) IrqStat &= ~0x3181;

            // ignore timer irq
            //IrqStat &= ~0x0100;

            // ignore dma irq
            //IrqStat &= ~0x0001;
#endif

            // timeout irq
            if (IrqStat & 0x0080) {
                // acknowledge all irq
                iowrite32 (0xffff, pLvSmDev->Virtual+LVSM_IRQ_CLR);
                IrqStat = 0x0000; //don't fill irq queue !!!
            } else {
                // acknowledge our irq
                iowrite32 (IrqStat, pLvSmDev->Virtual+LVSM_IRQ_CLR);
            }

            // remember all irq losses
            if (IrqLoss) pLvSmDev->LossedIrqCount++;

            // dma irq
            if (IrqStat & 0x0003) {

                // all DMA access guarded by the spinlock
                unsigned long sl_flags;
                spin_lock_irqsave (&pLvSmDev->DmaSpinLock, sl_flags);

                // get the indexes and progress frame index fifo if valid
                for (i=0; i<2; i++) {
                    if (IrqStat & (1<<i)) {
                        // DMA EOF 0/1
                        DmaIdxVal = (ioread32 (pLvSmDev->Virtual+LVSM_DMA_CHANTXERR)>>(16+i)) & 0x0001;
                        DmaIdx[i] =  ioread32 (pLvSmDev->Virtual+LVSM_DMA_FRMIDXFIFO+0x100*i) & 0xffff;

                        if (DmaIdxVal) {
                            iowrite32 (LVSM_DMA_NEXTFRMIDX, pLvSmDev->Virtual+LVSM_DMA_FRMIDXFIFO+0x100*i);
                            DmaCount[i]++;  // count kernel dma interrupts
                        } else {
                            IrqStat &= ~(1<<i);    //skip irq
                        }
#ifdef DIAG_MODE_IRQ
                        if (i==0) {
                            for (j=0; j<4; j++) { //read complete frame index fifo for debugging
                                DmaIdx[2+j] = ioread32 (pLvSmDev->Virtual+LVSM_DMA_FRMIDXFIFO+0x100*i) & 0xffff;
                                iowrite32 (LVSM_DMA_NEXTFRMIDX, pLvSmDev->Virtual+LVSM_DMA_FRMIDXFIFO+0x100*i);
                            }
                        }
#endif
                    } else {
                        DmaIdx[i] = 0;
                    }
                }
                // finished the DMA access, unlock
                spin_unlock_irqrestore (&pLvSmDev->DmaSpinLock, sl_flags);
            }


#ifdef DIAG_MODE_IRQ
#  if 0
            if (IrqStat & 0x0101) {
                uint32_t IrqPend   = ioread32 (pLvSmDev->Virtual+LVSM_IRQ_PEND);
                uint32_t RxStat    = ioread32 (pLvSmDev->Virtual+LVSM_FMB_RXCURRSTAT);
                uint32_t TxStat    = ioread32 (pLvSmDev->Virtual+LVSM_FMB_TXCURRSTAT);
                uint32_t FmbLoad   = ioread32 (pLvSmDev->Virtual+LVSM_FMB_LOAD0);
                uint32_t NumFrames = ioread32 (pLvSmDev->Virtual+LVSM_FMB_STRMCFG);
                if (!(IrqStat&0x0001)) DmaIdx[0] = 0xff;
                DmaIdx[10] = IrqLoss & 0xffff;
                DmaIdx[11] = IrqLoss>>16;
                DmaIdx[12] = IrqPend & 0xffff;
                DmaIdx[13] = (RxStat>>12) & 0xffff;
                DmaIdx[14] = (TxStat>>12) & 0xffff;
                //DmaIdx[15] = FmbLoad>>16;
                DmaIdx[15] = NumFrames & 0xffff;
            }
#  else
            if (IrqStat & 0x0101) {
                if (!(IrqStat&0x0001)) DmaIdx[0] = 0xff;
                DmaIdx[10] = IrqLoss & 0xffff;
                DmaIdx[11] = IrqLoss>>16;
                DmaIdx[12] = pLvSmDev->EmptyIrqCount & 0xffff;
                DmaIdx[13] = pLvSmDev->LossedIrqCount & 0xffff;
                DmaIdx[14] = pLvSmDev->IrqBufIdxW;
                DmaIdx[15] = pLvSmDev->IrqBufIdxR;
            }
#  endif
#endif

#define IRQ_LESSING
#ifdef IRQ_LESSING
            // reduce irq pressure if FIFO is going to get full
            // IdxDummy[0] must be defined for each start (see: LVSM_IOC_DUMMYIDX)
            if (pLvSmDev->IdxDummy[0]!=0)
            {
                if (IrqStat & 0x0101)
                {
                    uint32_t IdxR, IdxW, NrAvailable;
                    IdxR = pLvSmDev->IrqBufIdxR;
                    IdxW = pLvSmDev->IrqBufIdxW;
                    NrAvailable = (IdxW >= IdxR) ? IdxW-IdxR : IdxW-IdxR+SM_IRQBUF_SIZE;        //  {0,..,SM_IRQBUF_SIZE-1}
                    if (IrqStat & 0x0001) {
                        if (NrAvailable>0) {                                    // safety feature: with every read, give the dummy buffer a chance
                            // skip dummy buffer irq (but don't skip first or single occurences)
                            if ( (DmaIdx[0]>=pLvSmDev->IdxDummy[0]) && (pLvSmDev->IdxOld==pLvSmDev->IdxDummy[0]) ) {    // if curr idx== dummy idx and old index == dummy idx
                                pLvSmDev->IrqLessingCount++;                    // #irqs not transferred to user space due to interrupt lessing
                                pLvSmDev->IrqSkipOther=1;                       // flag for skipping corresponding timer interrupt
                                DmaCount[0]--;                                  // redo the counting
                                IrqStat &= ~0x0001;                             // skip this irq
                            }
                        }
                        pLvSmDev->IdxOld=DmaIdx[0];                             // remember this index
                    }
                    if (IrqStat & 0x0100) {
                        if (pLvSmDev->IrqSkipOther==1)  {
                            IrqStat &= ~0x0100;                                 // skip this irq
                            pLvSmDev->IrqSkipOther=0;
                        }
                    }
                    DmaIdx[ 8] = pLvSmDev->IdxDummy[0];
                    DmaIdx[ 9] = pLvSmDev->IrqLessingCount;
                }
            }
#endif

            // count our irq
            if (IrqStat)
            {
                // fasync is not currently used in user space, instead we use blocking read
                // TBD: verify carefully all the concurrency issues related
                pLvSmDev->IrqInfos[pLvSmDev->IrqBufIdxW].IrqStat = IrqStat;
                pLvSmDev->IrqInfos[pLvSmDev->IrqBufIdxW].IrqLoss = IrqLoss;
                memcpy(pLvSmDev->IrqInfos[pLvSmDev->IrqBufIdxW].DmaIdx,   DmaIdx,   sizeof(DmaIdx));
                pLvSmDev->IrqInfos[pLvSmDev->IrqBufIdxW].Reserved[0] = DmaCount[0];
                pLvSmDev->IrqInfos[pLvSmDev->IrqBufIdxW].Reserved[1] = pLvSmDev->KernelIrqCount;

                IncrBufIdx (&pLvSmDev->IrqBufIdxW, 1);
                // would be nice to move the read index as well to gradually throw away entries, other than wrap around and loose SM_IRQBUF_SIZE entries
                // but beware of data races!
                wake_up (&pLvSmDev->WaitQueue); // TBD: consider the interruptible version

                pLvSmDev->WantedIrqCount++;

            } else {
                pLvSmDev->UnwantedIrqCount++;
            }

            Handled = 1;

#ifdef DIAG_MODE_IRQ_DMA
            if (IrqStat) {
                if (pLvSmDev->IrqMask & IrqLoss) {
                    iowrite32 (~(pLvSmDev->IrqMask & IrqLoss), pLvSmDev->Virtual+LVSM_IRQ_EN);  //reset IrqCnt
                    iowrite32 (  pLvSmDev->IrqMask,            pLvSmDev->Virtual+LVSM_IRQ_EN);
                    DebugCount=200;
                }
                if ((IrqStat /*& LVSM_IRQMSK_TIM1*/) && DebugCount) {
                    if (DebugCount) DebugCount--;
                    if (DebugCount>1) {
                        printk (KERN_DEBUG LVSM_MSG "irq%d %6d(%6d,%3d), IdxW %2d, IdxR %2d, loss %4X, stat %4X, mask %4X\n",
                                                     irq, pLvSmDev->KernelIrqCount, pLvSmDev->WantedIrqCount, pLvSmDev->UnwantedIrqCount, pLvSmDev->IrqBufIdxW, pLvSmDev->IrqBufIdxR, IrqLoss, IrqStat, pLvSmDev->IrqMask);
                        printk (KERN_DEBUG LVSM_MSG "irq%d DmaIdx: %2d %2d %2d %2d %2d %2d %2d %2d %2d %2d %2d %2d %2d %2d %2d %2d\n",
                                                     irq,
                                                     DmaIdx[0],DmaIdx[1],DmaIdx[2],DmaIdx[3],DmaIdx[4],DmaIdx[5],DmaIdx[6],DmaIdx[7],
                                                     DmaIdx[8],DmaIdx[9],DmaIdx[10],DmaIdx[11],DmaIdx[12],DmaIdx[13],DmaIdx[14],DmaIdx[15]);
                    } else {
                        printk (KERN_DEBUG LVSM_MSG "...\n");
                    }
                }
            }
#endif
        }
    }
    return IRQ_RETVAL(Handled);
}

// fasync
/*static int LvSm_fasync(int fd, struct file *file, int on)
{
    int LvSmIndex, FdType;
    int Result = GetFileIds (file, &LvSmIndex, &FdType);

    // check the FdType actually used in user space, if enabled
    if (Result==0 && FdType==LVSM_FDTYPE_DEV)
    {
        // ignored for now, not needed in user space
        //fasync_helper (fd, file, on, &LvSmDevs[LvSmIndex].pFasync);
    }

    // when enabling fasync, don't forget the return value
    return 0;
}*/


// proc_read new
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))

static int LvSm_proc_show(struct seq_file *m, void *v) {
#define LVPROCBUFLEN (2048)
    char *buf = kmalloc (LVPROCBUFLEN, GFP_KERNEL);
    int eof;
    int data;
    int len;
    memset (buf, 0, LVPROCBUFLEN);
    len = LvSm_proc_read (buf, NULL, 0, LVPROCBUFLEN-1, &eof, (void *)&data);
    buf[LVPROCBUFLEN-1]=0;
    if (len<LVPROCBUFLEN-1) buf[len]=0;
    seq_printf(m, "%s", buf);
    kfree (buf);
    return 0;
}

static int LvSm_proc_open(struct inode *inode, struct file *file) {
  return single_open(file, LvSm_proc_show, NULL);
}
#endif

// proc_read
//   Arguments:
//     *buf :   The kernel allocates a page of memory to any process that attempts to read a proc entry. The page pointer points to that buffer of memory into which the data is written.
//     **start: This pointer is used when the reading of the proc file should not start from the beginning of the file but from a certain offset. For small reads this is generally set to NULL.
//     off :    The offset from the beginning of the file where the file pointer currently points to
//     count :  The number of bytes of data to be read
//     data :   The data passed from the create_read_proc_entry function call.
//     eof:     This is set to 1 to indicate end of file

int LvSm_proc_read (char *page, char **start, off_t offset, int count, int *eof, void *data)
{
    // TBD: currently just a debug helper, no limit checks, no write access for now (not needed so far)
    int i;
    int len = 0;
    if (offset>0)
    {
        *eof = 1;
        return 0;
    }

    len += sprintf (page+len, "Driver Version: %s\n", DRV_VERSION);
    len += sprintf (page+len, "Nr detected lvsm devices: %d\n", NrSm);
    len += sprintf (page+len, "Nr detected  aux devices: %d\n", NrAux);
    for (i=0; i<NrSm; ++i)
    {
        uint32_t FpgaId      = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_FPGA_ID)        : 0;
        uint32_t FpgaRev     = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_FPGA_REV)       : 0;
        uint32_t IrqEn       = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_IRQ_EN)         : 0;
        uint32_t IrqStat     = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_IRQ_STAT)       : 0;
        uint32_t IrqLoss     = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_IRQ_CLR)        : 0;
        uint32_t IrqPend     = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_IRQ_PEND)       : 0;
        uint32_t IrqTimOut   = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_IRQ_TIMOUT)     : 0;
        uint32_t IrqCount    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_IRQ_COUNT)      : 0;
        uint32_t OutCount    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_OUT_COUNT)      : 0;
#ifdef DIAG_MODE_IRQ_DEBUG
        uint32_t PCICount    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_PCI_COUNT)      : 0;  //only for debug
        uint32_t EOFCount    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_EOF_COUNT)      : 0;  //only for debug
        uint32_t TIMCount    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_TIM_COUNT)      : 0;  //only for debug
        uint32_t VIPCount    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_VIP_COUNT)      : 0;  //only for debug
        uint32_t ACKCount    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_ACK_COUNT)      : 0;  //only for debug
        uint32_t PCIeDebug   = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_PCIE_DEBUG)     : 0;  //only for debug

        uint32_t LogCtrl0    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_IRQ_LOG0)       : 0;  //only for debug
        uint32_t LogData0    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_IRQ_LOGFIFO)    : 0;  //only for debug
        uint32_t LogData1    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_IRQ_LOGFIFO)    : 0;  //only for debug
        uint32_t LogData2    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_IRQ_LOGFIFO)    : 0;  //only for debug

        bool IrqFifoVal = LogData0 & 0x80000000;
#endif

#ifdef DIAG_MODE
        uint32_t DmaEn       = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_DMA_EN)         : 0;
        uint32_t DmaTxEn     = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_DMA_CHANTXEN)   : 0;
        uint32_t DmaTxAct    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_DMA_CHANTXSTAT) : 0;
        uint32_t DmaTxErr    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_DMA_CHANTXERR)  : 0;
        uint32_t DmaIdxHalt  = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_DMA_IDXHALT)    : 0;
        uint32_t FmbEn       = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_FMB_EN)         : 0;
        uint32_t FmbAct      = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_FMB_STRMACT)    : 0;
        uint32_t FmbStat     = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_FMB_STAT)       : 0;
        uint32_t FmbLoad0    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_FMB_LOAD0)      : 0;
        uint32_t FmbFrmDim   = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_FMB_FRMDIM)     : 0;
        uint32_t FmbStrmCfg  = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_FMB_STRMCFG)    : 0;
        uint32_t FmbStartFrm = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_FMB_STARTFRM)   : 0;
        uint32_t FmbStartMsg = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_FMB_STARTMSG)   : 0;
        uint32_t FmbRx       = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_FMB_RXCURRSTAT) : 0;
        uint32_t FmbTx       = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_FMB_TXCURRSTAT) : 0;
#endif

        len += sprintf(page+len, "device[%d]: (%2.2x:%2.2x.%x) %.4X:%.4X refcount=%d  %s\n  physical %08lx, virtual %p, irq(%d), fpga(%d) %d.%d-%3.3d\n", i,
                       LvSmDevs[i].pDev->bus->number,
                       LvSmDevs[i].pDev->devfn>>3,
                       LvSmDevs[i].pDev->devfn&7,
                       LvSmDevs[i].pDev->vendor,
                       LvSmDevs[i].pDev->device,
                       LvSmDevs[i].pDev->dev.kobj.kref.refcount,
                      (LvSmDevs[i].pDev->device==LVSM_DEVICE_ID_CBCM)  ? "CorSight1" :
                     ((LvSmDevs[i].pDev->device==LVSM_DEVICE_ID_CSCM)  ? "CorSight2" :
                     ((LvSmDevs[i].pDev->device==LVSM_DEVICE_ID_CS3CM) ? "CorSight3" : "????")),
            (uintptr_t)LvSmDevs[i].Physical,
                       LvSmDevs[i].Virtual,
                       LvSmDevs[i].pDev->irq,
                      (FpgaId>>21)&0x7FF, (FpgaId>>10)&0x7FF, FpgaRev&0xFF, FpgaId&0x3FF );

        if (len>count-100) break;
        len += sprintf(page+len, "  irq kernel: all=%d empty=%d skipped=%d lossed=%d timeout=%d\n",
                      LvSmDevs[i].KernelIrqCount,
                      LvSmDevs[i].EmptyIrqCount,
                      LvSmDevs[i].IrqLessingCount,
                      LvSmDevs[i].LossedIrqCount,
                      LvSmDevs[i].TimeoutIrqCount);

 
        if (len>count-100) break;
        len += sprintf(page+len, "  irq reg:    en=%04X stat=%04X loss=%04X pend=%X timout=%08X (%dus)\n",
                      IrqEn, IrqStat, IrqLoss, IrqPend, IrqTimOut, ((IrqTimOut>>8)+1)*100);

        if (len>count-100) break;
        len += sprintf(page+len, "  irq count:  all=%d timeout=%d\n",
                      IrqCount, OutCount);

#ifdef DIAG_MODE_IRQ_DEBUG
        if (len>count-100) break;
        len += sprintf(page+len, "  irq count:  all=%d pci=%d eof=%d tim=%d vip=%d ack=%d out=%d\n",
            IrqCount, PCICount, EOFCount, TIMCount, VIPCount, ACKCount, OutCount);

        if (len>count-100) break;
        len += sprintf(page+len, "  irq debug:  LogCtrl0=%08X, PCIeDebug=%08X\n",
            LogCtrl0, PCIeDebug);

        if (len>count-100) break;
        while (IrqFifoVal) {
            len += sprintf(page+len, "  %8dus:  dbg=%X msi=%X dis=%X wr=%X en=%2X  pend=%2X  loss=%X  clr=%2X  src=%X  vip=%X  sw=%X  rd=%X  pci=%X  irq=%X\n",
                (LogData2 & 0x0fffffff),        // Timestamp
                (LogData1 & 0x00200000)>>21,    // PCIeDebug
                (LogData1 & 0x00100000)>>20,    // MsiEnable
                (LogData1 & 0x00080000)>>19,    // IrqDisable
                (LogData1 & 0x00002000)>>13,    // IrqEnWr
                (LogData1 & 0x0007c000)>>14,    // IrqEn
               ((LogData1 & 0x00001fff)<<2) + ((LogData0 & 0x0c000000)>>26),    // IrqPend
                (LogData0 & 0x03e00000)>>21,    // IrqLoss
                (LogData0 & 0x001f0000)>>16,    // IrqClr
                (LogData0 & 0x0000f800)>>11,    // IrqSrc
                (LogData0 & 0x00000400)>>10,    // VipSOF
                (LogData0 & 0x000003c0)>>6,     // SwSync
                (LogData0 & 0x00000030)>>4,     // Rd
                (LogData0 & 0x0000000e)>>1,     // Pcie
                (LogData0 & 0x00000001)         // IrqGlobal
                );
            if (len>count-100) break;

            LogData0    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_IRQ_LOGFIFO)    : 0;
            LogData1    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_IRQ_LOGFIFO)    : 0;
            LogData2    = LvSmDevs[i].Virtual ? ioread32(LvSmDevs[i].Virtual+LVSM_IRQ_LOGFIFO)    : 0;

            IrqFifoVal = LogData0 & 0x80000000;
        }
#endif

#ifdef DIAG_MODE
        if (len>count-100) break;
        len += sprintf(page+len, "  dma %X/%X/%05X/%X/%X\n",
                      DmaEn, DmaTxEn, DmaTxAct, DmaTxErr&3, DmaIdxHalt);

        if (len>count-100) break;
        len += sprintf(page+len, "  fmb %X/%X/%X/%X/%06X/%06X\n",
                      FmbEn, FmbAct, FmbStat, FmbLoad0, FmbRx, FmbTx );

        if (len>count-100) break;
        len += sprintf(page+len, "  fmb LnLength=%d FrmHeight=%d NumFrames=%d StartFrm=%X StartMsg=%X\n",
                      FmbFrmDim&0x3fff, FmbFrmDim>>16, FmbStrmCfg&0x3ff, FmbStartFrm, FmbStartMsg );
#endif
    }
    page[count-1]=0;
    return len;
}


