From: popcornmix Date: Mon, 19 Nov 2012 18:27:05 +0000 Subject: [PATCH] Add Simon Hall's dma helper module, useful in future for X acceleration --- arch/arm/mach-bcm2708/Kconfig | 7 + arch/arm/mach-bcm2708/Makefile | 3 + arch/arm/mach-bcm2708/dmaer.c | 887 ++++++++++++++++++++++++ arch/arm/mach-bcm2708/include/mach/vc_support.h | 69 ++ arch/arm/mach-bcm2708/vc_support.c | 319 +++++++++ 5 files changed, 1285 insertions(+) create mode 100755 arch/arm/mach-bcm2708/dmaer.c create mode 100755 arch/arm/mach-bcm2708/include/mach/vc_support.h create mode 100755 arch/arm/mach-bcm2708/vc_support.c diff --git a/arch/arm/mach-bcm2708/Kconfig b/arch/arm/mach-bcm2708/Kconfig index 63bb76c..2a24db6 100644 --- a/arch/arm/mach-bcm2708/Kconfig +++ b/arch/arm/mach-bcm2708/Kconfig @@ -31,4 +31,11 @@ config BCM2708_NOL2CACHE help Do not allow ARM to use GPU's L2 cache. Requires disable_l2cache in config.txt. +config BCM2708_DMAER + tristate "BCM2708 DMA helper" + depends on MACH_BCM2708 + default n + help + Enable DMA helper for accelerating X composition + endmenu diff --git a/arch/arm/mach-bcm2708/Makefile b/arch/arm/mach-bcm2708/Makefile index 164ecb2..0da162c 100644 --- a/arch/arm/mach-bcm2708/Makefile +++ b/arch/arm/mach-bcm2708/Makefile @@ -6,3 +6,6 @@ obj-$(CONFIG_MACH_BCM2708) += clock.o bcm2708.o armctrl.o vcio.o power.o dma.o obj-$(CONFIG_BCM2708_GPIO) += bcm2708_gpio.o obj-$(CONFIG_BCM2708_VCMEM) += vc_mem.o +obj-$(CONFIG_BCM2708_DMAER) += dmaer_master.o +dmaer_master-objs := dmaer.o vc_support.o + diff --git a/arch/arm/mach-bcm2708/dmaer.c b/arch/arm/mach-bcm2708/dmaer.c new file mode 100755 index 0000000..d1bc0fa --- /dev/null +++ b/arch/arm/mach-bcm2708/dmaer.c @@ -0,0 +1,887 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#ifdef ECLIPSE_IGNORE + +#define __user +#define __init +#define __exit +#define __iomem +#define KERN_DEBUG +#define KERN_ERR +#define KERN_WARNING +#define KERN_INFO +#define _IOWR(a, b, c) b +#define _IOW(a, b, c) b +#define _IO(a, b) b + +#endif + +//#define inline + +#define PRINTK(args...) printk(args) +//#define PRINTK_VERBOSE(args...) printk(args) +//#define PRINTK(args...) +#define PRINTK_VERBOSE(args...) + +/***** TYPES ****/ +#define PAGES_PER_LIST 500 +struct PageList +{ + struct page *m_pPages[PAGES_PER_LIST]; + unsigned int m_used; + struct PageList *m_pNext; +}; + +struct VmaPageList +{ + //each vma has a linked list of pages associated with it + struct PageList *m_pPageHead; + struct PageList *m_pPageTail; + unsigned int m_refCount; +}; + +struct DmaControlBlock +{ + unsigned int m_transferInfo; + void __user *m_pSourceAddr; + void __user *m_pDestAddr; + unsigned int m_xferLen; + unsigned int m_tdStride; + struct DmaControlBlock *m_pNext; + unsigned int m_blank1, m_blank2; +}; + +/***** DEFINES ******/ +//magic number defining the module +#define DMA_MAGIC 0xdd + +//do user virtual to physical translation of the CB chain +#define DMA_PREPARE _IOWR(DMA_MAGIC, 0, struct DmaControlBlock *) + +//kick the pre-prepared CB chain +#define DMA_KICK _IOW(DMA_MAGIC, 1, struct DmaControlBlock *) + +//prepare it, kick it, wait for it +#define DMA_PREPARE_KICK_WAIT _IOWR(DMA_MAGIC, 2, struct DmaControlBlock *) + +//prepare it, kick it, don't wait for it +#define DMA_PREPARE_KICK _IOWR(DMA_MAGIC, 3, struct DmaControlBlock *) + +//not currently implemented +#define DMA_WAIT_ONE _IO(DMA_MAGIC, 4, struct DmaControlBlock *) + +//wait on all kicked CB chains +#define DMA_WAIT_ALL _IO(DMA_MAGIC, 5) + +//in order to discover the largest AXI burst that should be programmed into the transfer params +#define DMA_MAX_BURST _IO(DMA_MAGIC, 6) + +//set the address range through which the user address is assumed to already by a physical address +#define DMA_SET_MIN_PHYS _IOW(DMA_MAGIC, 7, unsigned long) +#define DMA_SET_MAX_PHYS _IOW(DMA_MAGIC, 8, unsigned long) +#define DMA_SET_PHYS_OFFSET _IOW(DMA_MAGIC, 9, unsigned long) + +//used to define the size for the CMA-based allocation *in pages*, can only be done once once the file is opened +#define DMA_CMA_SET_SIZE _IOW(DMA_MAGIC, 10, unsigned long) + +//used to get the version of the module, to test for a capability +#define DMA_GET_VERSION _IO(DMA_MAGIC, 99) + +#define VERSION_NUMBER 1 + +#define VIRT_TO_BUS_CACHE_SIZE 8 + +/***** FILE OPS *****/ +static int Open(struct inode *pInode, struct file *pFile); +static int Release(struct inode *pInode, struct file *pFile); +static long Ioctl(struct file *pFile, unsigned int cmd, unsigned long arg); +static ssize_t Read(struct file *pFile, char __user *pUser, size_t count, loff_t *offp); +static int Mmap(struct file *pFile, struct vm_area_struct *pVma); + +/***** VMA OPS ****/ +static void VmaOpen4k(struct vm_area_struct *pVma); +static void VmaClose4k(struct vm_area_struct *pVma); +static int VmaFault4k(struct vm_area_struct *pVma, struct vm_fault *pVmf); + +/**** DMA PROTOTYPES */ +static struct DmaControlBlock __user *DmaPrepare(struct DmaControlBlock __user *pUserCB, int *pError); +static int DmaKick(struct DmaControlBlock __user *pUserCB); +static void DmaWaitAll(void); + +/**** GENERIC ****/ +static int __init dmaer_init(void); +static void __exit dmaer_exit(void); + +/*** OPS ***/ +static struct vm_operations_struct g_vmOps4k = { + .open = VmaOpen4k, + .close = VmaClose4k, + .fault = VmaFault4k, +}; + +static struct file_operations g_fOps = { + .owner = THIS_MODULE, + .llseek = 0, + .read = Read, + .write = 0, + .unlocked_ioctl = Ioctl, + .open = Open, + .release = Release, + .mmap = Mmap, +}; + +/***** GLOBALS ******/ +static dev_t g_majorMinor; + +//tracking usage of the two files +static atomic_t g_oneLock4k = ATOMIC_INIT(1); + +//device operations +static struct cdev g_cDev; +static int g_trackedPages = 0; + +//dma control +static unsigned int *g_pDmaChanBase; +static int g_dmaIrq; +static int g_dmaChan; + +//cma allocation +static int g_cmaHandle; + +//user virtual to bus address translation acceleration +static unsigned long g_virtAddr[VIRT_TO_BUS_CACHE_SIZE]; +static unsigned long g_busAddr[VIRT_TO_BUS_CACHE_SIZE]; +static unsigned long g_cbVirtAddr; +static unsigned long g_cbBusAddr; +static int g_cacheInsertAt; +static int g_cacheHit, g_cacheMiss; + +//off by default +static void __user *g_pMinPhys; +static void __user *g_pMaxPhys; +static unsigned long g_physOffset; + +/****** CACHE OPERATIONS ********/ +static inline void FlushAddrCache(void) +{ + int count = 0; + for (count = 0; count < VIRT_TO_BUS_CACHE_SIZE; count++) + g_virtAddr[count] = 0xffffffff; //never going to match as we always chop the bottom bits anyway + + g_cbVirtAddr = 0xffffffff; + + g_cacheInsertAt = 0; +} + +//translate from a user virtual address to a bus address by mapping the page +//NB this won't lock a page in memory, so to avoid potential paging issues using kernel logical addresses +static inline void __iomem *UserVirtualToBus(void __user *pUser) +{ + int mapped; + struct page *pPage; + void *phys; + + //map it (requiring that the pointer points to something that does not hang off the page boundary) + mapped = get_user_pages(current, current->mm, + (unsigned long)pUser, 1, + 1, 0, + &pPage, + 0); + + if (mapped <= 0) //error + return 0; + + PRINTK_VERBOSE(KERN_DEBUG "user virtual %p arm phys %p bus %p\n", + pUser, page_address(pPage), (void __iomem *)__virt_to_bus(page_address(pPage))); + + //get the arm physical address + phys = page_address(pPage) + offset_in_page(pUser); + page_cache_release(pPage); + + //and now the bus address + return (void __iomem *)__virt_to_bus(phys); +} + +static inline void __iomem *UserVirtualToBusViaCbCache(void __user *pUser) +{ + unsigned long virtual_page = (unsigned long)pUser & ~4095; + unsigned long page_offset = (unsigned long)pUser & 4095; + unsigned long bus_addr; + + if (g_cbVirtAddr == virtual_page) + { + bus_addr = g_cbBusAddr + page_offset; + g_cacheHit++; + return (void __iomem *)bus_addr; + } + else + { + bus_addr = (unsigned long)UserVirtualToBus(pUser); + + if (!bus_addr) + return 0; + + g_cbVirtAddr = virtual_page; + g_cbBusAddr = bus_addr & ~4095; + g_cacheMiss++; + + return (void __iomem *)bus_addr; + } +} + +//do the same as above, by query our virt->bus cache +static inline void __iomem *UserVirtualToBusViaCache(void __user *pUser) +{ + int count; + //get the page and its offset + unsigned long virtual_page = (unsigned long)pUser & ~4095; + unsigned long page_offset = (unsigned long)pUser & 4095; + unsigned long bus_addr; + + if (pUser >= g_pMinPhys && pUser < g_pMaxPhys) + { + PRINTK_VERBOSE(KERN_DEBUG "user->phys passthrough on %p\n", pUser); + return (void __iomem *)((unsigned long)pUser + g_physOffset); + } + + //check the cache for our entry + for (count = 0; count < VIRT_TO_BUS_CACHE_SIZE; count++) + if (g_virtAddr[count] == virtual_page) + { + bus_addr = g_busAddr[count] + page_offset; + g_cacheHit++; + return (void __iomem *)bus_addr; + } + + //not found, look up manually and then insert its page address + bus_addr = (unsigned long)UserVirtualToBus(pUser); + + if (!bus_addr) + return 0; + + g_virtAddr[g_cacheInsertAt] = virtual_page; + g_busAddr[g_cacheInsertAt] = bus_addr & ~4095; + + //round robin + g_cacheInsertAt++; + if (g_cacheInsertAt == VIRT_TO_BUS_CACHE_SIZE) + g_cacheInsertAt = 0; + + g_cacheMiss++; + + return (void __iomem *)bus_addr; +} + +/***** FILE OPERATIONS ****/ +static int Open(struct inode *pInode, struct file *pFile) +{ + PRINTK(KERN_DEBUG "file opening: %d/%d\n", imajor(pInode), iminor(pInode)); + + //check which device we are + if (iminor(pInode) == 0) //4k + { + //only one at a time + if (!atomic_dec_and_test(&g_oneLock4k)) + { + atomic_inc(&g_oneLock4k); + return -EBUSY; + } + } + else + return -EINVAL; + + //todo there will be trouble if two different processes open the files + + //reset after any file is opened + g_pMinPhys = (void __user *)-1; + g_pMaxPhys = (void __user *)0; + g_physOffset = 0; + g_cmaHandle = 0; + + return 0; +} + +static int Release(struct inode *pInode, struct file *pFile) +{ + PRINTK(KERN_DEBUG "file closing, %d pages tracked\n", g_trackedPages); + if (g_trackedPages) + PRINTK(KERN_ERR "we\'re leaking memory!\n"); + + //wait for any dmas to finish + DmaWaitAll(); + + //free this memory on the application closing the file or it crashing (implicitly closing the file) + if (g_cmaHandle) + { + PRINTK(KERN_DEBUG "unlocking vc memory\n"); + if (UnlockVcMemory(g_cmaHandle)) + PRINTK(KERN_ERR "uh-oh, unable to unlock vc memory!\n"); + PRINTK(KERN_DEBUG "releasing vc memory\n"); + if (ReleaseVcMemory(g_cmaHandle)) + PRINTK(KERN_ERR "uh-oh, unable to release vc memory!\n"); + } + + if (iminor(pInode) == 0) + atomic_inc(&g_oneLock4k); + else + return -EINVAL; + + return 0; +} + +static struct DmaControlBlock __user *DmaPrepare(struct DmaControlBlock __user *pUserCB, int *pError) +{ + struct DmaControlBlock kernCB; + struct DmaControlBlock __user *pUNext; + void __iomem *pSourceBus, __iomem *pDestBus; + + //get the control block into kernel memory so we can work on it + if (copy_from_user(&kernCB, pUserCB, sizeof(struct DmaControlBlock)) != 0) + { + PRINTK(KERN_ERR "copy_from_user failed for user cb %p\n", pUserCB); + *pError = 1; + return 0; + } + + if (kernCB.m_pSourceAddr == 0 || kernCB.m_pDestAddr == 0) + { + PRINTK(KERN_ERR "faulty source (%p) dest (%p) addresses for user cb %p\n", + kernCB.m_pSourceAddr, kernCB.m_pDestAddr, pUserCB); + *pError = 1; + return 0; + } + + pSourceBus = UserVirtualToBusViaCache(kernCB.m_pSourceAddr); + pDestBus = UserVirtualToBusViaCache(kernCB.m_pDestAddr); + + if (!pSourceBus || !pDestBus) + { + PRINTK(KERN_ERR "virtual to bus translation failure for source/dest %p/%p->%p/%p\n", + kernCB.m_pSourceAddr, kernCB.m_pDestAddr, + pSourceBus, pDestBus); + *pError = 1; + return 0; + } + + //update the user structure with the new bus addresses + kernCB.m_pSourceAddr = pSourceBus; + kernCB.m_pDestAddr = pDestBus; + + PRINTK_VERBOSE(KERN_DEBUG "final source %p dest %p\n", kernCB.m_pSourceAddr, kernCB.m_pDestAddr); + + //sort out the bus address for the next block + pUNext = kernCB.m_pNext; + + if (kernCB.m_pNext) + { + void __iomem *pNextBus; + pNextBus = UserVirtualToBusViaCbCache(kernCB.m_pNext); + + if (!pNextBus) + { + PRINTK(KERN_ERR "virtual to bus translation failure for m_pNext\n"); + *pError = 1; + return 0; + } + + //update the pointer with the bus address + kernCB.m_pNext = pNextBus; + } + + //write it back to user space + if (copy_to_user(pUserCB, &kernCB, sizeof(struct DmaControlBlock)) != 0) + { + PRINTK(KERN_ERR "copy_to_user failed for cb %p\n", pUserCB); + *pError = 1; + return 0; + } + + __cpuc_flush_dcache_area(pUserCB, 32); + + *pError = 0; + return pUNext; +} + +static int DmaKick(struct DmaControlBlock __user *pUserCB) +{ + void __iomem *pBusCB; + + pBusCB = UserVirtualToBusViaCbCache(pUserCB); + if (!pBusCB) + { + PRINTK(KERN_ERR "virtual to bus translation failure for cb\n"); + return 1; + } + + //flush_cache_all(); + + bcm_dma_start(g_pDmaChanBase, (dma_addr_t)pBusCB); + + return 0; +} + +static void DmaWaitAll(void) +{ + int counter = 0; + volatile int inner_count; + volatile unsigned int cs; + unsigned long time_before, time_after; + + time_before = jiffies; + //bcm_dma_wait_idle(g_pDmaChanBase); + dsb(); + + cs = readl(g_pDmaChanBase); + + while ((cs & 1) == 1) + { + cs = readl(g_pDmaChanBase); + counter++; + + for (inner_count = 0; inner_count < 32; inner_count++); + + asm volatile ("MCR p15,0,r0,c7,c0,4 \n"); + //cpu_do_idle(); + if (counter >= 1000000) + { + PRINTK(KERN_WARNING "DMA failed to finish in a timely fashion\n"); + break; + } + } + time_after = jiffies; + PRINTK_VERBOSE(KERN_DEBUG "done, counter %d, cs %08x", counter, cs); + PRINTK_VERBOSE(KERN_DEBUG "took %ld jiffies, %d HZ\n", time_after - time_before, HZ); +} + +static long Ioctl(struct file *pFile, unsigned int cmd, unsigned long arg) +{ + int error = 0; + PRINTK_VERBOSE(KERN_DEBUG "ioctl cmd %x arg %lx\n", cmd, arg); + + switch (cmd) + { + case DMA_PREPARE: + case DMA_PREPARE_KICK: + case DMA_PREPARE_KICK_WAIT: + { + struct DmaControlBlock __user *pUCB = (struct DmaControlBlock *)arg; + int steps = 0; + unsigned long start_time = jiffies; + (void)start_time; + + //flush our address cache + FlushAddrCache(); + + PRINTK_VERBOSE(KERN_DEBUG "dma prepare\n"); + + //do virtual to bus translation for each entry + do + { + pUCB = DmaPrepare(pUCB, &error); + } while (error == 0 && ++steps && pUCB); + PRINTK_VERBOSE(KERN_DEBUG "prepare done in %d steps, %ld\n", steps, jiffies - start_time); + + //carry straight on if we want to kick too + if (cmd == DMA_PREPARE || error) + { + PRINTK_VERBOSE(KERN_DEBUG "falling out\n"); + return error ? -EINVAL : 0; + } + } + case DMA_KICK: + PRINTK_VERBOSE(KERN_DEBUG "dma begin\n"); + + if (cmd == DMA_KICK) + FlushAddrCache(); + + DmaKick((struct DmaControlBlock __user *)arg); + + if (cmd != DMA_PREPARE_KICK_WAIT) + break; +/* case DMA_WAIT_ONE: + //PRINTK(KERN_DEBUG "dma wait one\n"); + break;*/ + case DMA_WAIT_ALL: + //PRINTK(KERN_DEBUG "dma wait all\n"); + DmaWaitAll(); + break; + case DMA_MAX_BURST: + if (g_dmaChan == 0) + return 10; + else + return 5; + case DMA_SET_MIN_PHYS: + g_pMinPhys = (void __user *)arg; + PRINTK(KERN_DEBUG "min/max user/phys bypass set to %p %p\n", g_pMinPhys, g_pMaxPhys); + break; + case DMA_SET_MAX_PHYS: + g_pMaxPhys = (void __user *)arg; + PRINTK(KERN_DEBUG "min/max user/phys bypass set to %p %p\n", g_pMinPhys, g_pMaxPhys); + break; + case DMA_SET_PHYS_OFFSET: + g_physOffset = arg; + PRINTK(KERN_DEBUG "user/phys bypass offset set to %ld\n", g_physOffset); + break; + case DMA_CMA_SET_SIZE: + { + unsigned int pBusAddr; + + if (g_cmaHandle) + { + PRINTK(KERN_ERR "memory has already been allocated (handle %d)\n", g_cmaHandle); + return -EINVAL; + } + + PRINTK(KERN_INFO "allocating %ld bytes of VC memory\n", arg * 4096); + + //get the memory + if (AllocateVcMemory(&g_cmaHandle, arg * 4096, 4096, MEM_FLAG_L1_NONALLOCATING | MEM_FLAG_NO_INIT | MEM_FLAG_HINT_PERMALOCK)) + { + PRINTK(KERN_ERR "failed to allocate %ld bytes of VC memory\n", arg * 4096); + g_cmaHandle = 0; + return -EINVAL; + } + + //get an address for it + PRINTK(KERN_INFO "trying to map VC memory\n"); + + if (LockVcMemory(&pBusAddr, g_cmaHandle)) + { + PRINTK(KERN_ERR "failed to map CMA handle %d, releasing memory\n", g_cmaHandle); + ReleaseVcMemory(g_cmaHandle); + g_cmaHandle = 0; + } + + PRINTK(KERN_INFO "bus address for CMA memory is %x\n", pBusAddr); + return pBusAddr; + } + case DMA_GET_VERSION: + PRINTK(KERN_DEBUG "returning version number, %d\n", VERSION_NUMBER); + return VERSION_NUMBER; + default: + PRINTK(KERN_DEBUG "unknown ioctl: %d\n", cmd); + return -EINVAL; + } + + return 0; +} + +static ssize_t Read(struct file *pFile, char __user *pUser, size_t count, loff_t *offp) +{ + return -EIO; +} + +static int Mmap(struct file *pFile, struct vm_area_struct *pVma) +{ + struct PageList *pPages; + struct VmaPageList *pVmaList; + + PRINTK_VERBOSE(KERN_DEBUG "MMAP vma %p, length %ld (%s %d)\n", + pVma, pVma->vm_end - pVma->vm_start, + current->comm, current->pid); + PRINTK_VERBOSE(KERN_DEBUG "MMAP %p %d (tracked %d)\n", pVma, current->pid, g_trackedPages); + + //make a new page list + pPages = (struct PageList *)kmalloc(sizeof(struct PageList), GFP_KERNEL); + if (!pPages) + { + PRINTK(KERN_ERR "couldn\'t allocate a new page list (%s %d)\n", + current->comm, current->pid); + return -ENOMEM; + } + + //clear the page list + pPages->m_used = 0; + pPages->m_pNext = 0; + + //insert our vma and new page list somewhere + if (!pVma->vm_private_data) + { + struct VmaPageList *pList; + + PRINTK_VERBOSE(KERN_DEBUG "new vma list, making new one (%s %d)\n", + current->comm, current->pid); + + //make a new vma list + pList = (struct VmaPageList *)kmalloc(sizeof(struct VmaPageList), GFP_KERNEL); + if (!pList) + { + PRINTK(KERN_ERR "couldn\'t allocate vma page list (%s %d)\n", + current->comm, current->pid); + kfree(pPages); + return -ENOMEM; + } + + //clear this list + pVma->vm_private_data = (void *)pList; + pList->m_refCount = 0; + } + + pVmaList = (struct VmaPageList *)pVma->vm_private_data; + + //add it to the vma list + pVmaList->m_pPageHead = pPages; + pVmaList->m_pPageTail = pPages; + + pVma->vm_ops = &g_vmOps4k; + pVma->vm_flags |= VM_IO; + + VmaOpen4k(pVma); + + return 0; +} + +/****** VMA OPERATIONS ******/ + +static void VmaOpen4k(struct vm_area_struct *pVma) +{ + struct VmaPageList *pVmaList; + + PRINTK_VERBOSE(KERN_DEBUG "vma open %p private %p (%s %d), %d live pages\n", pVma, pVma->vm_private_data, current->comm, current->pid, g_trackedPages); + PRINTK_VERBOSE(KERN_DEBUG "OPEN %p %d %ld pages (tracked pages %d)\n", + pVma, current->pid, (pVma->vm_end - pVma->vm_start) >> 12, + g_trackedPages); + + pVmaList = (struct VmaPageList *)pVma->vm_private_data; + + if (pVmaList) + { + pVmaList->m_refCount++; + PRINTK_VERBOSE(KERN_DEBUG "ref count is now %d\n", pVmaList->m_refCount); + } + else + { + PRINTK_VERBOSE(KERN_DEBUG "err, open but no vma page list\n"); + } +} + +static void VmaClose4k(struct vm_area_struct *pVma) +{ + struct VmaPageList *pVmaList; + int freed = 0; + + PRINTK_VERBOSE(KERN_DEBUG "vma close %p private %p (%s %d)\n", pVma, pVma->vm_private_data, current->comm, current->pid); + + //wait for any dmas to finish + DmaWaitAll(); + + //find our vma in the list + pVmaList = (struct VmaPageList *)pVma->vm_private_data; + + //may be a fork + if (pVmaList) + { + struct PageList *pPages; + + pVmaList->m_refCount--; + + if (pVmaList->m_refCount == 0) + { + PRINTK_VERBOSE(KERN_DEBUG "found vma, freeing pages (%s %d)\n", + current->comm, current->pid); + + pPages = pVmaList->m_pPageHead; + + if (!pPages) + { + PRINTK(KERN_ERR "no page list (%s %d)!\n", + current->comm, current->pid); + return; + } + + while (pPages) + { + struct PageList *next; + int count; + + PRINTK_VERBOSE(KERN_DEBUG "page list (%s %d)\n", + current->comm, current->pid); + + next = pPages->m_pNext; + for (count = 0; count < pPages->m_used; count++) + { + PRINTK_VERBOSE(KERN_DEBUG "freeing page %p (%s %d)\n", + pPages->m_pPages[count], + current->comm, current->pid); + __free_pages(pPages->m_pPages[count], 0); + g_trackedPages--; + freed++; + } + + PRINTK_VERBOSE(KERN_DEBUG "freeing page list (%s %d)\n", + current->comm, current->pid); + kfree(pPages); + pPages = next; + } + + //remove our vma from the list + kfree(pVmaList); + pVma->vm_private_data = 0; + } + else + { + PRINTK_VERBOSE(KERN_DEBUG "ref count is %d, not closing\n", pVmaList->m_refCount); + } + } + else + { + PRINTK_VERBOSE(KERN_ERR "uh-oh, vma %p not found (%s %d)!\n", pVma, current->comm, current->pid); + PRINTK_VERBOSE(KERN_ERR "CLOSE ERR\n"); + } + + PRINTK_VERBOSE(KERN_DEBUG "CLOSE %p %d %d pages (tracked pages %d)", + pVma, current->pid, freed, g_trackedPages); + + PRINTK_VERBOSE(KERN_DEBUG "%d pages open\n", g_trackedPages); +} + +static int VmaFault4k(struct vm_area_struct *pVma, struct vm_fault *pVmf) +{ + PRINTK_VERBOSE(KERN_DEBUG "vma fault for vma %p private %p at offset %ld (%s %d)\n", pVma, pVma->vm_private_data, pVmf->pgoff, + current->comm, current->pid); + PRINTK_VERBOSE(KERN_DEBUG "FAULT\n"); + pVmf->page = alloc_page(GFP_KERNEL); + + if (pVmf->page) + { + PRINTK_VERBOSE(KERN_DEBUG "alloc page virtual %p\n", page_address(pVmf->page)); + } + + if (!pVmf->page) + { + PRINTK(KERN_ERR "vma fault oom (%s %d)\n", current->comm, current->pid); + return VM_FAULT_OOM; + } + else + { + struct VmaPageList *pVmaList; + + get_page(pVmf->page); + g_trackedPages++; + + //find our vma in the list + pVmaList = (struct VmaPageList *)pVma->vm_private_data; + + if (pVmaList) + { + PRINTK_VERBOSE(KERN_DEBUG "vma found (%s %d)\n", current->comm, current->pid); + + if (pVmaList->m_pPageTail->m_used == PAGES_PER_LIST) + { + PRINTK_VERBOSE(KERN_DEBUG "making new page list (%s %d)\n", current->comm, current->pid); + //making a new page list + pVmaList->m_pPageTail->m_pNext = (struct PageList *)kmalloc(sizeof(struct PageList), GFP_KERNEL); + if (!pVmaList->m_pPageTail->m_pNext) + return -ENOMEM; + + //update the tail pointer + pVmaList->m_pPageTail = pVmaList->m_pPageTail->m_pNext; + pVmaList->m_pPageTail->m_used = 0; + pVmaList->m_pPageTail->m_pNext = 0; + } + + PRINTK_VERBOSE(KERN_DEBUG "adding page to list (%s %d)\n", current->comm, current->pid); + + pVmaList->m_pPageTail->m_pPages[pVmaList->m_pPageTail->m_used] = pVmf->page; + pVmaList->m_pPageTail->m_used++; + } + else + PRINTK(KERN_ERR "returned page for vma we don\'t know %p (%s %d)\n", pVma, current->comm, current->pid); + + return 0; + } +} + +/****** GENERIC FUNCTIONS ******/ +static int __init dmaer_init(void) +{ + int result = alloc_chrdev_region(&g_majorMinor, 0, 1, "dmaer"); + if (result < 0) + { + PRINTK(KERN_ERR "unable to get major device number\n"); + return result; + } + else + PRINTK(KERN_DEBUG "major device number %d\n", MAJOR(g_majorMinor)); + + PRINTK(KERN_DEBUG "vma list size %d, page list size %d, page size %ld\n", + sizeof(struct VmaPageList), sizeof(struct PageList), PAGE_SIZE); + + //get a dma channel to work with + result = bcm_dma_chan_alloc(BCM_DMA_FEATURE_FAST, (void **)&g_pDmaChanBase, &g_dmaIrq); + + //uncomment to force to channel 0 + //result = 0; + //g_pDmaChanBase = 0xce808000; + + if (result < 0) + { + PRINTK(KERN_ERR "failed to allocate dma channel\n"); + cdev_del(&g_cDev); + unregister_chrdev_region(g_majorMinor, 1); + } + + //reset the channel + PRINTK(KERN_DEBUG "allocated dma channel %d (%p), initial state %08x\n", result, g_pDmaChanBase, *g_pDmaChanBase); + *g_pDmaChanBase = 1 << 31; + PRINTK(KERN_DEBUG "post-reset %08x\n", *g_pDmaChanBase); + + g_dmaChan = result; + + //clear the cache stats + g_cacheHit = 0; + g_cacheMiss = 0; + + //register our device - after this we are go go go + cdev_init(&g_cDev, &g_fOps); + g_cDev.owner = THIS_MODULE; + g_cDev.ops = &g_fOps; + + result = cdev_add(&g_cDev, g_majorMinor, 1); + if (result < 0) + { + PRINTK(KERN_ERR "failed to add character device\n"); + unregister_chrdev_region(g_majorMinor, 1); + bcm_dma_chan_free(g_dmaChan); + return result; + } + + return 0; +} + +static void __exit dmaer_exit(void) +{ + PRINTK(KERN_INFO "closing dmaer device, cache stats: %d hits %d misses\n", g_cacheHit, g_cacheMiss); + //unregister the device + cdev_del(&g_cDev); + unregister_chrdev_region(g_majorMinor, 1); + //free the dma channel + bcm_dma_chan_free(g_dmaChan); +} + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Simon Hall"); +module_init(dmaer_init); +module_exit(dmaer_exit); + diff --git a/arch/arm/mach-bcm2708/include/mach/vc_support.h b/arch/arm/mach-bcm2708/include/mach/vc_support.h new file mode 100755 index 0000000..70e809f --- /dev/null +++ b/arch/arm/mach-bcm2708/include/mach/vc_support.h @@ -0,0 +1,69 @@ +#ifndef _VC_SUPPORT_H_ +#define _VC_SUPPORT_H_ + +/* + * vc_support.h + * + * Created on: 25 Nov 2012 + * Author: Simon + */ + +enum { +/* + If a MEM_HANDLE_T is discardable, the memory manager may resize it to size + 0 at any time when it is not locked or retained. + */ + MEM_FLAG_DISCARDABLE = 1 << 0, + + /* + If a MEM_HANDLE_T is allocating (or normal), its block of memory will be + accessed in an allocating fashion through the cache. + */ + MEM_FLAG_NORMAL = 0 << 2, + MEM_FLAG_ALLOCATING = MEM_FLAG_NORMAL, + + /* + If a MEM_HANDLE_T is direct, its block of memory will be accessed + directly, bypassing the cache. + */ + MEM_FLAG_DIRECT = 1 << 2, + + /* + If a MEM_HANDLE_T is coherent, its block of memory will be accessed in a + non-allocating fashion through the cache. + */ + MEM_FLAG_COHERENT = 2 << 2, + + /* + If a MEM_HANDLE_T is L1-nonallocating, its block of memory will be accessed by + the VPU in a fashion which is allocating in L2, but only coherent in L1. + */ + MEM_FLAG_L1_NONALLOCATING = (MEM_FLAG_DIRECT | MEM_FLAG_COHERENT), + + /* + If a MEM_HANDLE_T is zero'd, its contents are set to 0 rather than + MEM_HANDLE_INVALID on allocation and resize up. + */ + MEM_FLAG_ZERO = 1 << 4, + + /* + If a MEM_HANDLE_T is uninitialised, it will not be reset to a defined value + (either zero, or all 1's) on allocation. + */ + MEM_FLAG_NO_INIT = 1 << 5, + + /* + Hints. + */ + MEM_FLAG_HINT_PERMALOCK = 1 << 6, /* Likely to be locked for long periods of time. */ +}; + +unsigned int AllocateVcMemory(unsigned int *pHandle, unsigned int size, unsigned int alignment, unsigned int flags); +unsigned int ReleaseVcMemory(unsigned int handle); +unsigned int LockVcMemory(unsigned int *pBusAddress, unsigned int handle); +unsigned int UnlockVcMemory(unsigned int handle); + +unsigned int ExecuteVcCode(unsigned int code, + unsigned int r0, unsigned int r1, unsigned int r2, unsigned int r3, unsigned int r4, unsigned int r5); + +#endif diff --git a/arch/arm/mach-bcm2708/vc_support.c b/arch/arm/mach-bcm2708/vc_support.c new file mode 100755 index 0000000..5cb1335 --- /dev/null +++ b/arch/arm/mach-bcm2708/vc_support.c @@ -0,0 +1,319 @@ +/* + * vc_support.c + * + * Created on: 25 Nov 2012 + * Author: Simon + */ + +#include +#include + +#ifdef ECLIPSE_IGNORE + +#define __user +#define __init +#define __exit +#define __iomem +#define KERN_DEBUG +#define KERN_ERR +#define KERN_WARNING +#define KERN_INFO +#define _IOWR(a, b, c) b +#define _IOW(a, b, c) b +#define _IO(a, b) b + +#endif + +/****** VC MAILBOX FUNCTIONALITY ******/ +unsigned int AllocateVcMemory(unsigned int *pHandle, unsigned int size, unsigned int alignment, unsigned int flags) +{ + struct vc_msg + { + unsigned int m_msgSize; + unsigned int m_response; + + struct vc_tag + { + unsigned int m_tagId; + unsigned int m_sendBufferSize; + union { + unsigned int m_sendDataSize; + unsigned int m_recvDataSize; + }; + + struct args + { + union { + unsigned int m_size; + unsigned int m_handle; + }; + unsigned int m_alignment; + unsigned int m_flags; + } m_args; + } m_tag; + + unsigned int m_endTag; + } msg; + int s; + + msg.m_msgSize = sizeof(msg); + msg.m_response = 0; + msg.m_endTag = 0; + + //fill in the tag for the allocation command + msg.m_tag.m_tagId = 0x3000c; + msg.m_tag.m_sendBufferSize = 12; + msg.m_tag.m_sendDataSize = 12; + + //fill in our args + msg.m_tag.m_args.m_size = size; + msg.m_tag.m_args.m_alignment = alignment; + msg.m_tag.m_args.m_flags = flags; + + //run the command + s = bcm_mailbox_property(&msg, sizeof(msg)); + + if (s == 0 && msg.m_response == 0x80000000 && msg.m_tag.m_recvDataSize == 0x80000004) + { + *pHandle = msg.m_tag.m_args.m_handle; + return 0; + } + else + { + printk(KERN_ERR "failed to allocate vc memory: s=%d response=%08x recv data size=%08x\n", + s, msg.m_response, msg.m_tag.m_recvDataSize); + return 1; + } +} + +unsigned int ReleaseVcMemory(unsigned int handle) +{ + struct vc_msg + { + unsigned int m_msgSize; + unsigned int m_response; + + struct vc_tag + { + unsigned int m_tagId; + unsigned int m_sendBufferSize; + union { + unsigned int m_sendDataSize; + unsigned int m_recvDataSize; + }; + + struct args + { + union { + unsigned int m_handle; + unsigned int m_error; + }; + } m_args; + } m_tag; + + unsigned int m_endTag; + } msg; + int s; + + msg.m_msgSize = sizeof(msg); + msg.m_response = 0; + msg.m_endTag = 0; + + //fill in the tag for the release command + msg.m_tag.m_tagId = 0x3000f; + msg.m_tag.m_sendBufferSize = 4; + msg.m_tag.m_sendDataSize = 4; + + //pass across the handle + msg.m_tag.m_args.m_handle = handle; + + s = bcm_mailbox_property(&msg, sizeof(msg)); + + if (s == 0 && msg.m_response == 0x80000000 && msg.m_tag.m_recvDataSize == 0x80000004 && msg.m_tag.m_args.m_error == 0) + return 0; + else + { + printk(KERN_ERR "failed to release vc memory: s=%d response=%08x recv data size=%08x error=%08x\n", + s, msg.m_response, msg.m_tag.m_recvDataSize, msg.m_tag.m_args.m_error); + return 1; + } +} + +unsigned int LockVcMemory(unsigned int *pBusAddress, unsigned int handle) +{ + struct vc_msg + { + unsigned int m_msgSize; + unsigned int m_response; + + struct vc_tag + { + unsigned int m_tagId; + unsigned int m_sendBufferSize; + union { + unsigned int m_sendDataSize; + unsigned int m_recvDataSize; + }; + + struct args + { + union { + unsigned int m_handle; + unsigned int m_busAddress; + }; + } m_args; + } m_tag; + + unsigned int m_endTag; + } msg; + int s; + + msg.m_msgSize = sizeof(msg); + msg.m_response = 0; + msg.m_endTag = 0; + + //fill in the tag for the lock command + msg.m_tag.m_tagId = 0x3000d; + msg.m_tag.m_sendBufferSize = 4; + msg.m_tag.m_sendDataSize = 4; + + //pass across the handle + msg.m_tag.m_args.m_handle = handle; + + s = bcm_mailbox_property(&msg, sizeof(msg)); + + if (s == 0 && msg.m_response == 0x80000000 && msg.m_tag.m_recvDataSize == 0x80000004) + { + //pick out the bus address + *pBusAddress = msg.m_tag.m_args.m_busAddress; + return 0; + } + else + { + printk(KERN_ERR "failed to lock vc memory: s=%d response=%08x recv data size=%08x\n", + s, msg.m_response, msg.m_tag.m_recvDataSize); + return 1; + } +} + +unsigned int UnlockVcMemory(unsigned int handle) +{ + struct vc_msg + { + unsigned int m_msgSize; + unsigned int m_response; + + struct vc_tag + { + unsigned int m_tagId; + unsigned int m_sendBufferSize; + union { + unsigned int m_sendDataSize; + unsigned int m_recvDataSize; + }; + + struct args + { + union { + unsigned int m_handle; + unsigned int m_error; + }; + } m_args; + } m_tag; + + unsigned int m_endTag; + } msg; + int s; + + msg.m_msgSize = sizeof(msg); + msg.m_response = 0; + msg.m_endTag = 0; + + //fill in the tag for the unlock command + msg.m_tag.m_tagId = 0x3000e; + msg.m_tag.m_sendBufferSize = 4; + msg.m_tag.m_sendDataSize = 4; + + //pass across the handle + msg.m_tag.m_args.m_handle = handle; + + s = bcm_mailbox_property(&msg, sizeof(msg)); + + //check the error code too + if (s == 0 && msg.m_response == 0x80000000 && msg.m_tag.m_recvDataSize == 0x80000004 && msg.m_tag.m_args.m_error == 0) + return 0; + else + { + printk(KERN_ERR "failed to unlock vc memory: s=%d response=%08x recv data size=%08x error%08x\n", + s, msg.m_response, msg.m_tag.m_recvDataSize, msg.m_tag.m_args.m_error); + return 1; + } +} + +unsigned int ExecuteVcCode(unsigned int code, + unsigned int r0, unsigned int r1, unsigned int r2, unsigned int r3, unsigned int r4, unsigned int r5) +{ + struct vc_msg + { + unsigned int m_msgSize; + unsigned int m_response; + + struct vc_tag + { + unsigned int m_tagId; + unsigned int m_sendBufferSize; + union { + unsigned int m_sendDataSize; + unsigned int m_recvDataSize; + }; + + struct args + { + union { + unsigned int m_pCode; + unsigned int m_return; + }; + unsigned int m_r0; + unsigned int m_r1; + unsigned int m_r2; + unsigned int m_r3; + unsigned int m_r4; + unsigned int m_r5; + } m_args; + } m_tag; + + unsigned int m_endTag; + } msg; + int s; + + msg.m_msgSize = sizeof(msg); + msg.m_response = 0; + msg.m_endTag = 0; + + //fill in the tag for the unlock command + msg.m_tag.m_tagId = 0x30010; + msg.m_tag.m_sendBufferSize = 28; + msg.m_tag.m_sendDataSize = 28; + + //pass across the handle + msg.m_tag.m_args.m_pCode = code; + msg.m_tag.m_args.m_r0 = r0; + msg.m_tag.m_args.m_r1 = r1; + msg.m_tag.m_args.m_r2 = r2; + msg.m_tag.m_args.m_r3 = r3; + msg.m_tag.m_args.m_r4 = r4; + msg.m_tag.m_args.m_r5 = r5; + + s = bcm_mailbox_property(&msg, sizeof(msg)); + + //check the error code too + if (s == 0 && msg.m_response == 0x80000000 && msg.m_tag.m_recvDataSize == 0x80000004) + return msg.m_tag.m_args.m_return; + else + { + printk(KERN_ERR "failed to execute: s=%d response=%08x recv data size=%08x\n", + s, msg.m_response, msg.m_tag.m_recvDataSize); + return 1; + } +} +