diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt deleted file mode 100644 index ecad88d..0000000 --- a/Documentation/DMA-mapping.txt +++ /dev/null @@ -1,766 +0,0 @@ - Dynamic DMA mapping - =================== - - David S. Miller - Richard Henderson - Jakub Jelinek - -This document describes the DMA mapping system in terms of the pci_ -API. For a similar API that works for generic devices, see -DMA-API.txt. - -Most of the 64bit platforms have special hardware that translates bus -addresses (DMA addresses) into physical addresses. This is similar to -how page tables and/or a TLB translates virtual addresses to physical -addresses on a CPU. This is needed so that e.g. PCI devices can -access with a Single Address Cycle (32bit DMA address) any page in the -64bit physical address space. Previously in Linux those 64bit -platforms had to set artificial limits on the maximum RAM size in the -system, so that the virt_to_bus() static scheme works (the DMA address -translation tables were simply filled on bootup to map each bus -address to the physical page __pa(bus_to_virt())). - -So that Linux can use the dynamic DMA mapping, it needs some help from the -drivers, namely it has to take into account that DMA addresses should be -mapped only for the time they are actually used and unmapped after the DMA -transfer. - -The following API will work of course even on platforms where no such -hardware exists, see e.g. arch/x86/include/asm/pci.h for how it is implemented on -top of the virt_to_bus interface. - -First of all, you should make sure - -#include - -is in your driver. This file will obtain for you the definition of the -dma_addr_t (which can hold any valid DMA address for the platform) -type which should be used everywhere you hold a DMA (bus) address -returned from the DMA mapping functions. - - What memory is DMA'able? - -The first piece of information you must know is what kernel memory can -be used with the DMA mapping facilities. There has been an unwritten -set of rules regarding this, and this text is an attempt to finally -write them down. - -If you acquired your memory via the page allocator -(i.e. __get_free_page*()) or the generic memory allocators -(i.e. kmalloc() or kmem_cache_alloc()) then you may DMA to/from -that memory using the addresses returned from those routines. - -This means specifically that you may _not_ use the memory/addresses -returned from vmalloc() for DMA. It is possible to DMA to the -_underlying_ memory mapped into a vmalloc() area, but this requires -walking page tables to get the physical addresses, and then -translating each of those pages back to a kernel address using -something like __va(). [ EDIT: Update this when we integrate -Gerd Knorr's generic code which does this. ] - -This rule also means that you may use neither kernel image addresses -(items in data/text/bss segments), nor module image addresses, nor -stack addresses for DMA. These could all be mapped somewhere entirely -different than the rest of physical memory. Even if those classes of -memory could physically work with DMA, you'd need to ensure the I/O -buffers were cacheline-aligned. Without that, you'd see cacheline -sharing problems (data corruption) on CPUs with DMA-incoherent caches. -(The CPU could write to one word, DMA would write to a different one -in the same cache line, and one of them could be overwritten.) - -Also, this means that you cannot take the return of a kmap() -call and DMA to/from that. This is similar to vmalloc(). - -What about block I/O and networking buffers? The block I/O and -networking subsystems make sure that the buffers they use are valid -for you to DMA from/to. - - DMA addressing limitations - -Does your device have any DMA addressing limitations? For example, is -your device only capable of driving the low order 24-bits of address -on the PCI bus for SAC DMA transfers? If so, you need to inform the -PCI layer of this fact. - -By default, the kernel assumes that your device can address the full -32-bits in a SAC cycle. For a 64-bit DAC capable device, this needs -to be increased. And for a device with limitations, as discussed in -the previous paragraph, it needs to be decreased. - -pci_alloc_consistent() by default will return 32-bit DMA addresses. -PCI-X specification requires PCI-X devices to support 64-bit -addressing (DAC) for all transactions. And at least one platform (SGI -SN2) requires 64-bit consistent allocations to operate correctly when -the IO bus is in PCI-X mode. Therefore, like with pci_set_dma_mask(), -it's good practice to call pci_set_consistent_dma_mask() to set the -appropriate mask even if your device only supports 32-bit DMA -(default) and especially if it's a PCI-X device. - -For correct operation, you must interrogate the PCI layer in your -device probe routine to see if the PCI controller on the machine can -properly support the DMA addressing limitation your device has. It is -good style to do this even if your device holds the default setting, -because this shows that you did think about these issues wrt. your -device. - -The query is performed via a call to pci_set_dma_mask(): - - int pci_set_dma_mask(struct pci_dev *pdev, u64 device_mask); - -The query for consistent allocations is performed via a call to -pci_set_consistent_dma_mask(): - - int pci_set_consistent_dma_mask(struct pci_dev *pdev, u64 device_mask); - -Here, pdev is a pointer to the PCI device struct of your device, and -device_mask is a bit mask describing which bits of a PCI address your -device supports. It returns zero if your card can perform DMA -properly on the machine given the address mask you provided. - -If it returns non-zero, your device cannot perform DMA properly on -this platform, and attempting to do so will result in undefined -behavior. You must either use a different mask, or not use DMA. - -This means that in the failure case, you have three options: - -1) Use another DMA mask, if possible (see below). -2) Use some non-DMA mode for data transfer, if possible. -3) Ignore this device and do not initialize it. - -It is recommended that your driver print a kernel KERN_WARNING message -when you end up performing either #2 or #3. In this manner, if a user -of your driver reports that performance is bad or that the device is not -even detected, you can ask them for the kernel messages to find out -exactly why. - -The standard 32-bit addressing PCI device would do something like -this: - - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { - printk(KERN_WARNING - "mydev: No suitable DMA available.\n"); - goto ignore_this_device; - } - -Another common scenario is a 64-bit capable device. The approach -here is to try for 64-bit DAC addressing, but back down to a -32-bit mask should that fail. The PCI platform code may fail the -64-bit mask not because the platform is not capable of 64-bit -addressing. Rather, it may fail in this case simply because -32-bit SAC addressing is done more efficiently than DAC addressing. -Sparc64 is one platform which behaves in this way. - -Here is how you would handle a 64-bit capable device which can drive -all 64-bits when accessing streaming DMA: - - int using_dac; - - if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - using_dac = 1; - } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { - using_dac = 0; - } else { - printk(KERN_WARNING - "mydev: No suitable DMA available.\n"); - goto ignore_this_device; - } - -If a card is capable of using 64-bit consistent allocations as well, -the case would look like this: - - int using_dac, consistent_using_dac; - - if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - using_dac = 1; - consistent_using_dac = 1; - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { - using_dac = 0; - consistent_using_dac = 0; - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } else { - printk(KERN_WARNING - "mydev: No suitable DMA available.\n"); - goto ignore_this_device; - } - -pci_set_consistent_dma_mask() will always be able to set the same or a -smaller mask as pci_set_dma_mask(). However for the rare case that a -device driver only uses consistent allocations, one would have to -check the return value from pci_set_consistent_dma_mask(). - -Finally, if your device can only drive the low 24-bits of -address during PCI bus mastering you might do something like: - - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(24))) { - printk(KERN_WARNING - "mydev: 24-bit DMA addressing not available.\n"); - goto ignore_this_device; - } - -When pci_set_dma_mask() is successful, and returns zero, the PCI layer -saves away this mask you have provided. The PCI layer will use this -information later when you make DMA mappings. - -There is a case which we are aware of at this time, which is worth -mentioning in this documentation. If your device supports multiple -functions (for example a sound card provides playback and record -functions) and the various different functions have _different_ -DMA addressing limitations, you may wish to probe each mask and -only provide the functionality which the machine can handle. It -is important that the last call to pci_set_dma_mask() be for the -most specific mask. - -Here is pseudo-code showing how this might be done: - - #define PLAYBACK_ADDRESS_BITS DMA_BIT_MASK(32) - #define RECORD_ADDRESS_BITS DMA_BIT_MASK(24) - - struct my_sound_card *card; - struct pci_dev *pdev; - - ... - if (!pci_set_dma_mask(pdev, PLAYBACK_ADDRESS_BITS)) { - card->playback_enabled = 1; - } else { - card->playback_enabled = 0; - printk(KERN_WARNING "%s: Playback disabled due to DMA limitations.\n", - card->name); - } - if (!pci_set_dma_mask(pdev, RECORD_ADDRESS_BITS)) { - card->record_enabled = 1; - } else { - card->record_enabled = 0; - printk(KERN_WARNING "%s: Record disabled due to DMA limitations.\n", - card->name); - } - -A sound card was used as an example here because this genre of PCI -devices seems to be littered with ISA chips given a PCI front end, -and thus retaining the 16MB DMA addressing limitations of ISA. - - Types of DMA mappings - -There are two types of DMA mappings: - -- Consistent DMA mappings which are usually mapped at driver - initialization, unmapped at the end and for which the hardware should - guarantee that the device and the CPU can access the data - in parallel and will see updates made by each other without any - explicit software flushing. - - Think of "consistent" as "synchronous" or "coherent". - - The current default is to return consistent memory in the low 32 - bits of the PCI bus space. However, for future compatibility you - should set the consistent mask even if this default is fine for your - driver. - - Good examples of what to use consistent mappings for are: - - - Network card DMA ring descriptors. - - SCSI adapter mailbox command data structures. - - Device firmware microcode executed out of - main memory. - - The invariant these examples all require is that any CPU store - to memory is immediately visible to the device, and vice - versa. Consistent mappings guarantee this. - - IMPORTANT: Consistent DMA memory does not preclude the usage of - proper memory barriers. The CPU may reorder stores to - consistent memory just as it may normal memory. Example: - if it is important for the device to see the first word - of a descriptor updated before the second, you must do - something like: - - desc->word0 = address; - wmb(); - desc->word1 = DESC_VALID; - - in order to get correct behavior on all platforms. - - Also, on some platforms your driver may need to flush CPU write - buffers in much the same way as it needs to flush write buffers - found in PCI bridges (such as by reading a register's value - after writing it). - -- Streaming DMA mappings which are usually mapped for one DMA transfer, - unmapped right after it (unless you use pci_dma_sync_* below) and for which - hardware can optimize for sequential accesses. - - This of "streaming" as "asynchronous" or "outside the coherency - domain". - - Good examples of what to use streaming mappings for are: - - - Networking buffers transmitted/received by a device. - - Filesystem buffers written/read by a SCSI device. - - The interfaces for using this type of mapping were designed in - such a way that an implementation can make whatever performance - optimizations the hardware allows. To this end, when using - such mappings you must be explicit about what you want to happen. - -Neither type of DMA mapping has alignment restrictions that come -from PCI, although some devices may have such restrictions. -Also, systems with caches that aren't DMA-coherent will work better -when the underlying buffers don't share cache lines with other data. - - - Using Consistent DMA mappings. - -To allocate and map large (PAGE_SIZE or so) consistent DMA regions, -you should do: - - dma_addr_t dma_handle; - - cpu_addr = pci_alloc_consistent(pdev, size, &dma_handle); - -where pdev is a struct pci_dev *. This may be called in interrupt context. -You should use dma_alloc_coherent (see DMA-API.txt) for buses -where devices don't have struct pci_dev (like ISA, EISA). - -This argument is needed because the DMA translations may be bus -specific (and often is private to the bus which the device is attached -to). - -Size is the length of the region you want to allocate, in bytes. - -This routine will allocate RAM for that region, so it acts similarly to -__get_free_pages (but takes size instead of a page order). If your -driver needs regions sized smaller than a page, you may prefer using -the pci_pool interface, described below. - -The consistent DMA mapping interfaces, for non-NULL pdev, will by -default return a DMA address which is SAC (Single Address Cycle) -addressable. Even if the device indicates (via PCI dma mask) that it -may address the upper 32-bits and thus perform DAC cycles, consistent -allocation will only return > 32-bit PCI addresses for DMA if the -consistent dma mask has been explicitly changed via -pci_set_consistent_dma_mask(). This is true of the pci_pool interface -as well. - -pci_alloc_consistent returns two values: the virtual address which you -can use to access it from the CPU and dma_handle which you pass to the -card. - -The cpu return address and the DMA bus master address are both -guaranteed to be aligned to the smallest PAGE_SIZE order which -is greater than or equal to the requested size. This invariant -exists (for example) to guarantee that if you allocate a chunk -which is smaller than or equal to 64 kilobytes, the extent of the -buffer you receive will not cross a 64K boundary. - -To unmap and free such a DMA region, you call: - - pci_free_consistent(pdev, size, cpu_addr, dma_handle); - -where pdev, size are the same as in the above call and cpu_addr and -dma_handle are the values pci_alloc_consistent returned to you. -This function may not be called in interrupt context. - -If your driver needs lots of smaller memory regions, you can write -custom code to subdivide pages returned by pci_alloc_consistent, -or you can use the pci_pool API to do that. A pci_pool is like -a kmem_cache, but it uses pci_alloc_consistent not __get_free_pages. -Also, it understands common hardware constraints for alignment, -like queue heads needing to be aligned on N byte boundaries. - -Create a pci_pool like this: - - struct pci_pool *pool; - - pool = pci_pool_create(name, pdev, size, align, alloc); - -The "name" is for diagnostics (like a kmem_cache name); pdev and size -are as above. The device's hardware alignment requirement for this -type of data is "align" (which is expressed in bytes, and must be a -power of two). If your device has no boundary crossing restrictions, -pass 0 for alloc; passing 4096 says memory allocated from this pool -must not cross 4KByte boundaries (but at that time it may be better to -go for pci_alloc_consistent directly instead). - -Allocate memory from a pci pool like this: - - cpu_addr = pci_pool_alloc(pool, flags, &dma_handle); - -flags are SLAB_KERNEL if blocking is permitted (not in_interrupt nor -holding SMP locks), SLAB_ATOMIC otherwise. Like pci_alloc_consistent, -this returns two values, cpu_addr and dma_handle. - -Free memory that was allocated from a pci_pool like this: - - pci_pool_free(pool, cpu_addr, dma_handle); - -where pool is what you passed to pci_pool_alloc, and cpu_addr and -dma_handle are the values pci_pool_alloc returned. This function -may be called in interrupt context. - -Destroy a pci_pool by calling: - - pci_pool_destroy(pool); - -Make sure you've called pci_pool_free for all memory allocated -from a pool before you destroy the pool. This function may not -be called in interrupt context. - - DMA Direction - -The interfaces described in subsequent portions of this document -take a DMA direction argument, which is an integer and takes on -one of the following values: - - PCI_DMA_BIDIRECTIONAL - PCI_DMA_TODEVICE - PCI_DMA_FROMDEVICE - PCI_DMA_NONE - -One should provide the exact DMA direction if you know it. - -PCI_DMA_TODEVICE means "from main memory to the PCI device" -PCI_DMA_FROMDEVICE means "from the PCI device to main memory" -It is the direction in which the data moves during the DMA -transfer. - -You are _strongly_ encouraged to specify this as precisely -as you possibly can. - -If you absolutely cannot know the direction of the DMA transfer, -specify PCI_DMA_BIDIRECTIONAL. It means that the DMA can go in -either direction. The platform guarantees that you may legally -specify this, and that it will work, but this may be at the -cost of performance for example. - -The value PCI_DMA_NONE is to be used for debugging. One can -hold this in a data structure before you come to know the -precise direction, and this will help catch cases where your -direction tracking logic has failed to set things up properly. - -Another advantage of specifying this value precisely (outside of -potential platform-specific optimizations of such) is for debugging. -Some platforms actually have a write permission boolean which DMA -mappings can be marked with, much like page protections in the user -program address space. Such platforms can and do report errors in the -kernel logs when the PCI controller hardware detects violation of the -permission setting. - -Only streaming mappings specify a direction, consistent mappings -implicitly have a direction attribute setting of -PCI_DMA_BIDIRECTIONAL. - -The SCSI subsystem tells you the direction to use in the -'sc_data_direction' member of the SCSI command your driver is -working on. - -For Networking drivers, it's a rather simple affair. For transmit -packets, map/unmap them with the PCI_DMA_TODEVICE direction -specifier. For receive packets, just the opposite, map/unmap them -with the PCI_DMA_FROMDEVICE direction specifier. - - Using Streaming DMA mappings - -The streaming DMA mapping routines can be called from interrupt -context. There are two versions of each map/unmap, one which will -map/unmap a single memory region, and one which will map/unmap a -scatterlist. - -To map a single region, you do: - - struct pci_dev *pdev = mydev->pdev; - dma_addr_t dma_handle; - void *addr = buffer->ptr; - size_t size = buffer->len; - - dma_handle = pci_map_single(pdev, addr, size, direction); - -and to unmap it: - - pci_unmap_single(pdev, dma_handle, size, direction); - -You should call pci_unmap_single when the DMA activity is finished, e.g. -from the interrupt which told you that the DMA transfer is done. - -Using cpu pointers like this for single mappings has a disadvantage, -you cannot reference HIGHMEM memory in this way. Thus, there is a -map/unmap interface pair akin to pci_{map,unmap}_single. These -interfaces deal with page/offset pairs instead of cpu pointers. -Specifically: - - struct pci_dev *pdev = mydev->pdev; - dma_addr_t dma_handle; - struct page *page = buffer->page; - unsigned long offset = buffer->offset; - size_t size = buffer->len; - - dma_handle = pci_map_page(pdev, page, offset, size, direction); - - ... - - pci_unmap_page(pdev, dma_handle, size, direction); - -Here, "offset" means byte offset within the given page. - -With scatterlists, you map a region gathered from several regions by: - - int i, count = pci_map_sg(pdev, sglist, nents, direction); - struct scatterlist *sg; - - for_each_sg(sglist, sg, count, i) { - hw_address[i] = sg_dma_address(sg); - hw_len[i] = sg_dma_len(sg); - } - -where nents is the number of entries in the sglist. - -The implementation is free to merge several consecutive sglist entries -into one (e.g. if DMA mapping is done with PAGE_SIZE granularity, any -consecutive sglist entries can be merged into one provided the first one -ends and the second one starts on a page boundary - in fact this is a huge -advantage for cards which either cannot do scatter-gather or have very -limited number of scatter-gather entries) and returns the actual number -of sg entries it mapped them to. On failure 0 is returned. - -Then you should loop count times (note: this can be less than nents times) -and use sg_dma_address() and sg_dma_len() macros where you previously -accessed sg->address and sg->length as shown above. - -To unmap a scatterlist, just call: - - pci_unmap_sg(pdev, sglist, nents, direction); - -Again, make sure DMA activity has already finished. - -PLEASE NOTE: The 'nents' argument to the pci_unmap_sg call must be - the _same_ one you passed into the pci_map_sg call, - it should _NOT_ be the 'count' value _returned_ from the - pci_map_sg call. - -Every pci_map_{single,sg} call should have its pci_unmap_{single,sg} -counterpart, because the bus address space is a shared resource (although -in some ports the mapping is per each BUS so less devices contend for the -same bus address space) and you could render the machine unusable by eating -all bus addresses. - -If you need to use the same streaming DMA region multiple times and touch -the data in between the DMA transfers, the buffer needs to be synced -properly in order for the cpu and device to see the most uptodate and -correct copy of the DMA buffer. - -So, firstly, just map it with pci_map_{single,sg}, and after each DMA -transfer call either: - - pci_dma_sync_single_for_cpu(pdev, dma_handle, size, direction); - -or: - - pci_dma_sync_sg_for_cpu(pdev, sglist, nents, direction); - -as appropriate. - -Then, if you wish to let the device get at the DMA area again, -finish accessing the data with the cpu, and then before actually -giving the buffer to the hardware call either: - - pci_dma_sync_single_for_device(pdev, dma_handle, size, direction); - -or: - - pci_dma_sync_sg_for_device(dev, sglist, nents, direction); - -as appropriate. - -After the last DMA transfer call one of the DMA unmap routines -pci_unmap_{single,sg}. If you don't touch the data from the first pci_map_* -call till pci_unmap_*, then you don't have to call the pci_dma_sync_* -routines at all. - -Here is pseudo code which shows a situation in which you would need -to use the pci_dma_sync_*() interfaces. - - my_card_setup_receive_buffer(struct my_card *cp, char *buffer, int len) - { - dma_addr_t mapping; - - mapping = pci_map_single(cp->pdev, buffer, len, PCI_DMA_FROMDEVICE); - - cp->rx_buf = buffer; - cp->rx_len = len; - cp->rx_dma = mapping; - - give_rx_buf_to_card(cp); - } - - ... - - my_card_interrupt_handler(int irq, void *devid, struct pt_regs *regs) - { - struct my_card *cp = devid; - - ... - if (read_card_status(cp) == RX_BUF_TRANSFERRED) { - struct my_card_header *hp; - - /* Examine the header to see if we wish - * to accept the data. But synchronize - * the DMA transfer with the CPU first - * so that we see updated contents. - */ - pci_dma_sync_single_for_cpu(cp->pdev, cp->rx_dma, - cp->rx_len, - PCI_DMA_FROMDEVICE); - - /* Now it is safe to examine the buffer. */ - hp = (struct my_card_header *) cp->rx_buf; - if (header_is_ok(hp)) { - pci_unmap_single(cp->pdev, cp->rx_dma, cp->rx_len, - PCI_DMA_FROMDEVICE); - pass_to_upper_layers(cp->rx_buf); - make_and_setup_new_rx_buf(cp); - } else { - /* Just sync the buffer and give it back - * to the card. - */ - pci_dma_sync_single_for_device(cp->pdev, - cp->rx_dma, - cp->rx_len, - PCI_DMA_FROMDEVICE); - give_rx_buf_to_card(cp); - } - } - } - -Drivers converted fully to this interface should not use virt_to_bus any -longer, nor should they use bus_to_virt. Some drivers have to be changed a -little bit, because there is no longer an equivalent to bus_to_virt in the -dynamic DMA mapping scheme - you have to always store the DMA addresses -returned by the pci_alloc_consistent, pci_pool_alloc, and pci_map_single -calls (pci_map_sg stores them in the scatterlist itself if the platform -supports dynamic DMA mapping in hardware) in your driver structures and/or -in the card registers. - -All PCI drivers should be using these interfaces with no exceptions. -It is planned to completely remove virt_to_bus() and bus_to_virt() as -they are entirely deprecated. Some ports already do not provide these -as it is impossible to correctly support them. - - Optimizing Unmap State Space Consumption - -On many platforms, pci_unmap_{single,page}() is simply a nop. -Therefore, keeping track of the mapping address and length is a waste -of space. Instead of filling your drivers up with ifdefs and the like -to "work around" this (which would defeat the whole purpose of a -portable API) the following facilities are provided. - -Actually, instead of describing the macros one by one, we'll -transform some example code. - -1) Use DECLARE_PCI_UNMAP_{ADDR,LEN} in state saving structures. - Example, before: - - struct ring_state { - struct sk_buff *skb; - dma_addr_t mapping; - __u32 len; - }; - - after: - - struct ring_state { - struct sk_buff *skb; - DECLARE_PCI_UNMAP_ADDR(mapping) - DECLARE_PCI_UNMAP_LEN(len) - }; - - NOTE: DO NOT put a semicolon at the end of the DECLARE_*() - macro. - -2) Use pci_unmap_{addr,len}_set to set these values. - Example, before: - - ringp->mapping = FOO; - ringp->len = BAR; - - after: - - pci_unmap_addr_set(ringp, mapping, FOO); - pci_unmap_len_set(ringp, len, BAR); - -3) Use pci_unmap_{addr,len} to access these values. - Example, before: - - pci_unmap_single(pdev, ringp->mapping, ringp->len, - PCI_DMA_FROMDEVICE); - - after: - - pci_unmap_single(pdev, - pci_unmap_addr(ringp, mapping), - pci_unmap_len(ringp, len), - PCI_DMA_FROMDEVICE); - -It really should be self-explanatory. We treat the ADDR and LEN -separately, because it is possible for an implementation to only -need the address in order to perform the unmap operation. - - Platform Issues - -If you are just writing drivers for Linux and do not maintain -an architecture port for the kernel, you can safely skip down -to "Closing". - -1) Struct scatterlist requirements. - - Struct scatterlist must contain, at a minimum, the following - members: - - struct page *page; - unsigned int offset; - unsigned int length; - - The base address is specified by a "page+offset" pair. - - Previous versions of struct scatterlist contained a "void *address" - field that was sometimes used instead of page+offset. As of Linux - 2.5., page+offset is always used, and the "address" field has been - deleted. - -2) More to come... - - Handling Errors - -DMA address space is limited on some architectures and an allocation -failure can be determined by: - -- checking if pci_alloc_consistent returns NULL or pci_map_sg returns 0 - -- checking the returned dma_addr_t of pci_map_single and pci_map_page - by using pci_dma_mapping_error(): - - dma_addr_t dma_handle; - - dma_handle = pci_map_single(pdev, addr, size, direction); - if (pci_dma_mapping_error(pdev, dma_handle)) { - /* - * reduce current DMA mapping usage, - * delay and try again later or - * reset driver. - */ - } - - Closing - -This document, and the API itself, would not be in it's current -form without the feedback and suggestions from numerous individuals. -We would like to specifically mention, in no particular order, the -following people: - - Russell King - Leo Dagum - Ralf Baechle - Grant Grundler - Jay Estabrook - Thomas Sailer - Andrea Arcangeli - Jens Axboe - David Mosberger-Tang diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl index f508a8a..5e7d84b 100644 --- a/Documentation/DocBook/mtdnand.tmpl +++ b/Documentation/DocBook/mtdnand.tmpl @@ -174,7 +174,7 @@ static struct mtd_info *board_mtd; -static unsigned long baseaddr; +static void __iomem *baseaddr; Static example @@ -182,7 +182,7 @@ static unsigned long baseaddr; static struct mtd_info board_mtd; static struct nand_chip board_chip; -static unsigned long baseaddr; +static void __iomem *baseaddr; @@ -283,8 +283,8 @@ int __init board_init (void) } /* map physical address */ - baseaddr = (unsigned long)ioremap(CHIP_PHYSICAL_ADDRESS, 1024); - if(!baseaddr){ + baseaddr = ioremap(CHIP_PHYSICAL_ADDRESS, 1024); + if (!baseaddr) { printk("Ioremap to access NAND chip failed\n"); err = -EIO; goto out_mtd; @@ -316,7 +316,7 @@ int __init board_init (void) goto out; out_ior: - iounmap((void *)baseaddr); + iounmap(baseaddr); out_mtd: kfree (board_mtd); out: @@ -341,7 +341,7 @@ static void __exit board_cleanup (void) nand_release (board_mtd); /* unmap physical address */ - iounmap((void *)baseaddr); + iounmap(baseaddr); /* Free the MTD device structure */ kfree (board_mtd); diff --git a/Documentation/IO-mapping.txt b/Documentation/IO-mapping.txt index 78a4406..1b5aa10 100644 --- a/Documentation/IO-mapping.txt +++ b/Documentation/IO-mapping.txt @@ -157,7 +157,7 @@ For such memory, you can do things like * access only the 640k-1MB area, so anything else * has to be remapped. */ - char * baseptr = ioremap(0xFC000000, 1024*1024); + void __iomem *baseptr = ioremap(0xFC000000, 1024*1024); /* write a 'A' to the offset 10 of the area */ writeb('A',baseptr+10); diff --git a/Documentation/PCI/PCI-DMA-mapping.txt b/Documentation/PCI/PCI-DMA-mapping.txt new file mode 100644 index 0000000..ecad88d --- /dev/null +++ b/Documentation/PCI/PCI-DMA-mapping.txt @@ -0,0 +1,766 @@ + Dynamic DMA mapping + =================== + + David S. Miller + Richard Henderson + Jakub Jelinek + +This document describes the DMA mapping system in terms of the pci_ +API. For a similar API that works for generic devices, see +DMA-API.txt. + +Most of the 64bit platforms have special hardware that translates bus +addresses (DMA addresses) into physical addresses. This is similar to +how page tables and/or a TLB translates virtual addresses to physical +addresses on a CPU. This is needed so that e.g. PCI devices can +access with a Single Address Cycle (32bit DMA address) any page in the +64bit physical address space. Previously in Linux those 64bit +platforms had to set artificial limits on the maximum RAM size in the +system, so that the virt_to_bus() static scheme works (the DMA address +translation tables were simply filled on bootup to map each bus +address to the physical page __pa(bus_to_virt())). + +So that Linux can use the dynamic DMA mapping, it needs some help from the +drivers, namely it has to take into account that DMA addresses should be +mapped only for the time they are actually used and unmapped after the DMA +transfer. + +The following API will work of course even on platforms where no such +hardware exists, see e.g. arch/x86/include/asm/pci.h for how it is implemented on +top of the virt_to_bus interface. + +First of all, you should make sure + +#include + +is in your driver. This file will obtain for you the definition of the +dma_addr_t (which can hold any valid DMA address for the platform) +type which should be used everywhere you hold a DMA (bus) address +returned from the DMA mapping functions. + + What memory is DMA'able? + +The first piece of information you must know is what kernel memory can +be used with the DMA mapping facilities. There has been an unwritten +set of rules regarding this, and this text is an attempt to finally +write them down. + +If you acquired your memory via the page allocator +(i.e. __get_free_page*()) or the generic memory allocators +(i.e. kmalloc() or kmem_cache_alloc()) then you may DMA to/from +that memory using the addresses returned from those routines. + +This means specifically that you may _not_ use the memory/addresses +returned from vmalloc() for DMA. It is possible to DMA to the +_underlying_ memory mapped into a vmalloc() area, but this requires +walking page tables to get the physical addresses, and then +translating each of those pages back to a kernel address using +something like __va(). [ EDIT: Update this when we integrate +Gerd Knorr's generic code which does this. ] + +This rule also means that you may use neither kernel image addresses +(items in data/text/bss segments), nor module image addresses, nor +stack addresses for DMA. These could all be mapped somewhere entirely +different than the rest of physical memory. Even if those classes of +memory could physically work with DMA, you'd need to ensure the I/O +buffers were cacheline-aligned. Without that, you'd see cacheline +sharing problems (data corruption) on CPUs with DMA-incoherent caches. +(The CPU could write to one word, DMA would write to a different one +in the same cache line, and one of them could be overwritten.) + +Also, this means that you cannot take the return of a kmap() +call and DMA to/from that. This is similar to vmalloc(). + +What about block I/O and networking buffers? The block I/O and +networking subsystems make sure that the buffers they use are valid +for you to DMA from/to. + + DMA addressing limitations + +Does your device have any DMA addressing limitations? For example, is +your device only capable of driving the low order 24-bits of address +on the PCI bus for SAC DMA transfers? If so, you need to inform the +PCI layer of this fact. + +By default, the kernel assumes that your device can address the full +32-bits in a SAC cycle. For a 64-bit DAC capable device, this needs +to be increased. And for a device with limitations, as discussed in +the previous paragraph, it needs to be decreased. + +pci_alloc_consistent() by default will return 32-bit DMA addresses. +PCI-X specification requires PCI-X devices to support 64-bit +addressing (DAC) for all transactions. And at least one platform (SGI +SN2) requires 64-bit consistent allocations to operate correctly when +the IO bus is in PCI-X mode. Therefore, like with pci_set_dma_mask(), +it's good practice to call pci_set_consistent_dma_mask() to set the +appropriate mask even if your device only supports 32-bit DMA +(default) and especially if it's a PCI-X device. + +For correct operation, you must interrogate the PCI layer in your +device probe routine to see if the PCI controller on the machine can +properly support the DMA addressing limitation your device has. It is +good style to do this even if your device holds the default setting, +because this shows that you did think about these issues wrt. your +device. + +The query is performed via a call to pci_set_dma_mask(): + + int pci_set_dma_mask(struct pci_dev *pdev, u64 device_mask); + +The query for consistent allocations is performed via a call to +pci_set_consistent_dma_mask(): + + int pci_set_consistent_dma_mask(struct pci_dev *pdev, u64 device_mask); + +Here, pdev is a pointer to the PCI device struct of your device, and +device_mask is a bit mask describing which bits of a PCI address your +device supports. It returns zero if your card can perform DMA +properly on the machine given the address mask you provided. + +If it returns non-zero, your device cannot perform DMA properly on +this platform, and attempting to do so will result in undefined +behavior. You must either use a different mask, or not use DMA. + +This means that in the failure case, you have three options: + +1) Use another DMA mask, if possible (see below). +2) Use some non-DMA mode for data transfer, if possible. +3) Ignore this device and do not initialize it. + +It is recommended that your driver print a kernel KERN_WARNING message +when you end up performing either #2 or #3. In this manner, if a user +of your driver reports that performance is bad or that the device is not +even detected, you can ask them for the kernel messages to find out +exactly why. + +The standard 32-bit addressing PCI device would do something like +this: + + if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { + printk(KERN_WARNING + "mydev: No suitable DMA available.\n"); + goto ignore_this_device; + } + +Another common scenario is a 64-bit capable device. The approach +here is to try for 64-bit DAC addressing, but back down to a +32-bit mask should that fail. The PCI platform code may fail the +64-bit mask not because the platform is not capable of 64-bit +addressing. Rather, it may fail in this case simply because +32-bit SAC addressing is done more efficiently than DAC addressing. +Sparc64 is one platform which behaves in this way. + +Here is how you would handle a 64-bit capable device which can drive +all 64-bits when accessing streaming DMA: + + int using_dac; + + if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { + using_dac = 1; + } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { + using_dac = 0; + } else { + printk(KERN_WARNING + "mydev: No suitable DMA available.\n"); + goto ignore_this_device; + } + +If a card is capable of using 64-bit consistent allocations as well, +the case would look like this: + + int using_dac, consistent_using_dac; + + if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { + using_dac = 1; + consistent_using_dac = 1; + pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { + using_dac = 0; + consistent_using_dac = 0; + pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + } else { + printk(KERN_WARNING + "mydev: No suitable DMA available.\n"); + goto ignore_this_device; + } + +pci_set_consistent_dma_mask() will always be able to set the same or a +smaller mask as pci_set_dma_mask(). However for the rare case that a +device driver only uses consistent allocations, one would have to +check the return value from pci_set_consistent_dma_mask(). + +Finally, if your device can only drive the low 24-bits of +address during PCI bus mastering you might do something like: + + if (pci_set_dma_mask(pdev, DMA_BIT_MASK(24))) { + printk(KERN_WARNING + "mydev: 24-bit DMA addressing not available.\n"); + goto ignore_this_device; + } + +When pci_set_dma_mask() is successful, and returns zero, the PCI layer +saves away this mask you have provided. The PCI layer will use this +information later when you make DMA mappings. + +There is a case which we are aware of at this time, which is worth +mentioning in this documentation. If your device supports multiple +functions (for example a sound card provides playback and record +functions) and the various different functions have _different_ +DMA addressing limitations, you may wish to probe each mask and +only provide the functionality which the machine can handle. It +is important that the last call to pci_set_dma_mask() be for the +most specific mask. + +Here is pseudo-code showing how this might be done: + + #define PLAYBACK_ADDRESS_BITS DMA_BIT_MASK(32) + #define RECORD_ADDRESS_BITS DMA_BIT_MASK(24) + + struct my_sound_card *card; + struct pci_dev *pdev; + + ... + if (!pci_set_dma_mask(pdev, PLAYBACK_ADDRESS_BITS)) { + card->playback_enabled = 1; + } else { + card->playback_enabled = 0; + printk(KERN_WARNING "%s: Playback disabled due to DMA limitations.\n", + card->name); + } + if (!pci_set_dma_mask(pdev, RECORD_ADDRESS_BITS)) { + card->record_enabled = 1; + } else { + card->record_enabled = 0; + printk(KERN_WARNING "%s: Record disabled due to DMA limitations.\n", + card->name); + } + +A sound card was used as an example here because this genre of PCI +devices seems to be littered with ISA chips given a PCI front end, +and thus retaining the 16MB DMA addressing limitations of ISA. + + Types of DMA mappings + +There are two types of DMA mappings: + +- Consistent DMA mappings which are usually mapped at driver + initialization, unmapped at the end and for which the hardware should + guarantee that the device and the CPU can access the data + in parallel and will see updates made by each other without any + explicit software flushing. + + Think of "consistent" as "synchronous" or "coherent". + + The current default is to return consistent memory in the low 32 + bits of the PCI bus space. However, for future compatibility you + should set the consistent mask even if this default is fine for your + driver. + + Good examples of what to use consistent mappings for are: + + - Network card DMA ring descriptors. + - SCSI adapter mailbox command data structures. + - Device firmware microcode executed out of + main memory. + + The invariant these examples all require is that any CPU store + to memory is immediately visible to the device, and vice + versa. Consistent mappings guarantee this. + + IMPORTANT: Consistent DMA memory does not preclude the usage of + proper memory barriers. The CPU may reorder stores to + consistent memory just as it may normal memory. Example: + if it is important for the device to see the first word + of a descriptor updated before the second, you must do + something like: + + desc->word0 = address; + wmb(); + desc->word1 = DESC_VALID; + + in order to get correct behavior on all platforms. + + Also, on some platforms your driver may need to flush CPU write + buffers in much the same way as it needs to flush write buffers + found in PCI bridges (such as by reading a register's value + after writing it). + +- Streaming DMA mappings which are usually mapped for one DMA transfer, + unmapped right after it (unless you use pci_dma_sync_* below) and for which + hardware can optimize for sequential accesses. + + This of "streaming" as "asynchronous" or "outside the coherency + domain". + + Good examples of what to use streaming mappings for are: + + - Networking buffers transmitted/received by a device. + - Filesystem buffers written/read by a SCSI device. + + The interfaces for using this type of mapping were designed in + such a way that an implementation can make whatever performance + optimizations the hardware allows. To this end, when using + such mappings you must be explicit about what you want to happen. + +Neither type of DMA mapping has alignment restrictions that come +from PCI, although some devices may have such restrictions. +Also, systems with caches that aren't DMA-coherent will work better +when the underlying buffers don't share cache lines with other data. + + + Using Consistent DMA mappings. + +To allocate and map large (PAGE_SIZE or so) consistent DMA regions, +you should do: + + dma_addr_t dma_handle; + + cpu_addr = pci_alloc_consistent(pdev, size, &dma_handle); + +where pdev is a struct pci_dev *. This may be called in interrupt context. +You should use dma_alloc_coherent (see DMA-API.txt) for buses +where devices don't have struct pci_dev (like ISA, EISA). + +This argument is needed because the DMA translations may be bus +specific (and often is private to the bus which the device is attached +to). + +Size is the length of the region you want to allocate, in bytes. + +This routine will allocate RAM for that region, so it acts similarly to +__get_free_pages (but takes size instead of a page order). If your +driver needs regions sized smaller than a page, you may prefer using +the pci_pool interface, described below. + +The consistent DMA mapping interfaces, for non-NULL pdev, will by +default return a DMA address which is SAC (Single Address Cycle) +addressable. Even if the device indicates (via PCI dma mask) that it +may address the upper 32-bits and thus perform DAC cycles, consistent +allocation will only return > 32-bit PCI addresses for DMA if the +consistent dma mask has been explicitly changed via +pci_set_consistent_dma_mask(). This is true of the pci_pool interface +as well. + +pci_alloc_consistent returns two values: the virtual address which you +can use to access it from the CPU and dma_handle which you pass to the +card. + +The cpu return address and the DMA bus master address are both +guaranteed to be aligned to the smallest PAGE_SIZE order which +is greater than or equal to the requested size. This invariant +exists (for example) to guarantee that if you allocate a chunk +which is smaller than or equal to 64 kilobytes, the extent of the +buffer you receive will not cross a 64K boundary. + +To unmap and free such a DMA region, you call: + + pci_free_consistent(pdev, size, cpu_addr, dma_handle); + +where pdev, size are the same as in the above call and cpu_addr and +dma_handle are the values pci_alloc_consistent returned to you. +This function may not be called in interrupt context. + +If your driver needs lots of smaller memory regions, you can write +custom code to subdivide pages returned by pci_alloc_consistent, +or you can use the pci_pool API to do that. A pci_pool is like +a kmem_cache, but it uses pci_alloc_consistent not __get_free_pages. +Also, it understands common hardware constraints for alignment, +like queue heads needing to be aligned on N byte boundaries. + +Create a pci_pool like this: + + struct pci_pool *pool; + + pool = pci_pool_create(name, pdev, size, align, alloc); + +The "name" is for diagnostics (like a kmem_cache name); pdev and size +are as above. The device's hardware alignment requirement for this +type of data is "align" (which is expressed in bytes, and must be a +power of two). If your device has no boundary crossing restrictions, +pass 0 for alloc; passing 4096 says memory allocated from this pool +must not cross 4KByte boundaries (but at that time it may be better to +go for pci_alloc_consistent directly instead). + +Allocate memory from a pci pool like this: + + cpu_addr = pci_pool_alloc(pool, flags, &dma_handle); + +flags are SLAB_KERNEL if blocking is permitted (not in_interrupt nor +holding SMP locks), SLAB_ATOMIC otherwise. Like pci_alloc_consistent, +this returns two values, cpu_addr and dma_handle. + +Free memory that was allocated from a pci_pool like this: + + pci_pool_free(pool, cpu_addr, dma_handle); + +where pool is what you passed to pci_pool_alloc, and cpu_addr and +dma_handle are the values pci_pool_alloc returned. This function +may be called in interrupt context. + +Destroy a pci_pool by calling: + + pci_pool_destroy(pool); + +Make sure you've called pci_pool_free for all memory allocated +from a pool before you destroy the pool. This function may not +be called in interrupt context. + + DMA Direction + +The interfaces described in subsequent portions of this document +take a DMA direction argument, which is an integer and takes on +one of the following values: + + PCI_DMA_BIDIRECTIONAL + PCI_DMA_TODEVICE + PCI_DMA_FROMDEVICE + PCI_DMA_NONE + +One should provide the exact DMA direction if you know it. + +PCI_DMA_TODEVICE means "from main memory to the PCI device" +PCI_DMA_FROMDEVICE means "from the PCI device to main memory" +It is the direction in which the data moves during the DMA +transfer. + +You are _strongly_ encouraged to specify this as precisely +as you possibly can. + +If you absolutely cannot know the direction of the DMA transfer, +specify PCI_DMA_BIDIRECTIONAL. It means that the DMA can go in +either direction. The platform guarantees that you may legally +specify this, and that it will work, but this may be at the +cost of performance for example. + +The value PCI_DMA_NONE is to be used for debugging. One can +hold this in a data structure before you come to know the +precise direction, and this will help catch cases where your +direction tracking logic has failed to set things up properly. + +Another advantage of specifying this value precisely (outside of +potential platform-specific optimizations of such) is for debugging. +Some platforms actually have a write permission boolean which DMA +mappings can be marked with, much like page protections in the user +program address space. Such platforms can and do report errors in the +kernel logs when the PCI controller hardware detects violation of the +permission setting. + +Only streaming mappings specify a direction, consistent mappings +implicitly have a direction attribute setting of +PCI_DMA_BIDIRECTIONAL. + +The SCSI subsystem tells you the direction to use in the +'sc_data_direction' member of the SCSI command your driver is +working on. + +For Networking drivers, it's a rather simple affair. For transmit +packets, map/unmap them with the PCI_DMA_TODEVICE direction +specifier. For receive packets, just the opposite, map/unmap them +with the PCI_DMA_FROMDEVICE direction specifier. + + Using Streaming DMA mappings + +The streaming DMA mapping routines can be called from interrupt +context. There are two versions of each map/unmap, one which will +map/unmap a single memory region, and one which will map/unmap a +scatterlist. + +To map a single region, you do: + + struct pci_dev *pdev = mydev->pdev; + dma_addr_t dma_handle; + void *addr = buffer->ptr; + size_t size = buffer->len; + + dma_handle = pci_map_single(pdev, addr, size, direction); + +and to unmap it: + + pci_unmap_single(pdev, dma_handle, size, direction); + +You should call pci_unmap_single when the DMA activity is finished, e.g. +from the interrupt which told you that the DMA transfer is done. + +Using cpu pointers like this for single mappings has a disadvantage, +you cannot reference HIGHMEM memory in this way. Thus, there is a +map/unmap interface pair akin to pci_{map,unmap}_single. These +interfaces deal with page/offset pairs instead of cpu pointers. +Specifically: + + struct pci_dev *pdev = mydev->pdev; + dma_addr_t dma_handle; + struct page *page = buffer->page; + unsigned long offset = buffer->offset; + size_t size = buffer->len; + + dma_handle = pci_map_page(pdev, page, offset, size, direction); + + ... + + pci_unmap_page(pdev, dma_handle, size, direction); + +Here, "offset" means byte offset within the given page. + +With scatterlists, you map a region gathered from several regions by: + + int i, count = pci_map_sg(pdev, sglist, nents, direction); + struct scatterlist *sg; + + for_each_sg(sglist, sg, count, i) { + hw_address[i] = sg_dma_address(sg); + hw_len[i] = sg_dma_len(sg); + } + +where nents is the number of entries in the sglist. + +The implementation is free to merge several consecutive sglist entries +into one (e.g. if DMA mapping is done with PAGE_SIZE granularity, any +consecutive sglist entries can be merged into one provided the first one +ends and the second one starts on a page boundary - in fact this is a huge +advantage for cards which either cannot do scatter-gather or have very +limited number of scatter-gather entries) and returns the actual number +of sg entries it mapped them to. On failure 0 is returned. + +Then you should loop count times (note: this can be less than nents times) +and use sg_dma_address() and sg_dma_len() macros where you previously +accessed sg->address and sg->length as shown above. + +To unmap a scatterlist, just call: + + pci_unmap_sg(pdev, sglist, nents, direction); + +Again, make sure DMA activity has already finished. + +PLEASE NOTE: The 'nents' argument to the pci_unmap_sg call must be + the _same_ one you passed into the pci_map_sg call, + it should _NOT_ be the 'count' value _returned_ from the + pci_map_sg call. + +Every pci_map_{single,sg} call should have its pci_unmap_{single,sg} +counterpart, because the bus address space is a shared resource (although +in some ports the mapping is per each BUS so less devices contend for the +same bus address space) and you could render the machine unusable by eating +all bus addresses. + +If you need to use the same streaming DMA region multiple times and touch +the data in between the DMA transfers, the buffer needs to be synced +properly in order for the cpu and device to see the most uptodate and +correct copy of the DMA buffer. + +So, firstly, just map it with pci_map_{single,sg}, and after each DMA +transfer call either: + + pci_dma_sync_single_for_cpu(pdev, dma_handle, size, direction); + +or: + + pci_dma_sync_sg_for_cpu(pdev, sglist, nents, direction); + +as appropriate. + +Then, if you wish to let the device get at the DMA area again, +finish accessing the data with the cpu, and then before actually +giving the buffer to the hardware call either: + + pci_dma_sync_single_for_device(pdev, dma_handle, size, direction); + +or: + + pci_dma_sync_sg_for_device(dev, sglist, nents, direction); + +as appropriate. + +After the last DMA transfer call one of the DMA unmap routines +pci_unmap_{single,sg}. If you don't touch the data from the first pci_map_* +call till pci_unmap_*, then you don't have to call the pci_dma_sync_* +routines at all. + +Here is pseudo code which shows a situation in which you would need +to use the pci_dma_sync_*() interfaces. + + my_card_setup_receive_buffer(struct my_card *cp, char *buffer, int len) + { + dma_addr_t mapping; + + mapping = pci_map_single(cp->pdev, buffer, len, PCI_DMA_FROMDEVICE); + + cp->rx_buf = buffer; + cp->rx_len = len; + cp->rx_dma = mapping; + + give_rx_buf_to_card(cp); + } + + ... + + my_card_interrupt_handler(int irq, void *devid, struct pt_regs *regs) + { + struct my_card *cp = devid; + + ... + if (read_card_status(cp) == RX_BUF_TRANSFERRED) { + struct my_card_header *hp; + + /* Examine the header to see if we wish + * to accept the data. But synchronize + * the DMA transfer with the CPU first + * so that we see updated contents. + */ + pci_dma_sync_single_for_cpu(cp->pdev, cp->rx_dma, + cp->rx_len, + PCI_DMA_FROMDEVICE); + + /* Now it is safe to examine the buffer. */ + hp = (struct my_card_header *) cp->rx_buf; + if (header_is_ok(hp)) { + pci_unmap_single(cp->pdev, cp->rx_dma, cp->rx_len, + PCI_DMA_FROMDEVICE); + pass_to_upper_layers(cp->rx_buf); + make_and_setup_new_rx_buf(cp); + } else { + /* Just sync the buffer and give it back + * to the card. + */ + pci_dma_sync_single_for_device(cp->pdev, + cp->rx_dma, + cp->rx_len, + PCI_DMA_FROMDEVICE); + give_rx_buf_to_card(cp); + } + } + } + +Drivers converted fully to this interface should not use virt_to_bus any +longer, nor should they use bus_to_virt. Some drivers have to be changed a +little bit, because there is no longer an equivalent to bus_to_virt in the +dynamic DMA mapping scheme - you have to always store the DMA addresses +returned by the pci_alloc_consistent, pci_pool_alloc, and pci_map_single +calls (pci_map_sg stores them in the scatterlist itself if the platform +supports dynamic DMA mapping in hardware) in your driver structures and/or +in the card registers. + +All PCI drivers should be using these interfaces with no exceptions. +It is planned to completely remove virt_to_bus() and bus_to_virt() as +they are entirely deprecated. Some ports already do not provide these +as it is impossible to correctly support them. + + Optimizing Unmap State Space Consumption + +On many platforms, pci_unmap_{single,page}() is simply a nop. +Therefore, keeping track of the mapping address and length is a waste +of space. Instead of filling your drivers up with ifdefs and the like +to "work around" this (which would defeat the whole purpose of a +portable API) the following facilities are provided. + +Actually, instead of describing the macros one by one, we'll +transform some example code. + +1) Use DECLARE_PCI_UNMAP_{ADDR,LEN} in state saving structures. + Example, before: + + struct ring_state { + struct sk_buff *skb; + dma_addr_t mapping; + __u32 len; + }; + + after: + + struct ring_state { + struct sk_buff *skb; + DECLARE_PCI_UNMAP_ADDR(mapping) + DECLARE_PCI_UNMAP_LEN(len) + }; + + NOTE: DO NOT put a semicolon at the end of the DECLARE_*() + macro. + +2) Use pci_unmap_{addr,len}_set to set these values. + Example, before: + + ringp->mapping = FOO; + ringp->len = BAR; + + after: + + pci_unmap_addr_set(ringp, mapping, FOO); + pci_unmap_len_set(ringp, len, BAR); + +3) Use pci_unmap_{addr,len} to access these values. + Example, before: + + pci_unmap_single(pdev, ringp->mapping, ringp->len, + PCI_DMA_FROMDEVICE); + + after: + + pci_unmap_single(pdev, + pci_unmap_addr(ringp, mapping), + pci_unmap_len(ringp, len), + PCI_DMA_FROMDEVICE); + +It really should be self-explanatory. We treat the ADDR and LEN +separately, because it is possible for an implementation to only +need the address in order to perform the unmap operation. + + Platform Issues + +If you are just writing drivers for Linux and do not maintain +an architecture port for the kernel, you can safely skip down +to "Closing". + +1) Struct scatterlist requirements. + + Struct scatterlist must contain, at a minimum, the following + members: + + struct page *page; + unsigned int offset; + unsigned int length; + + The base address is specified by a "page+offset" pair. + + Previous versions of struct scatterlist contained a "void *address" + field that was sometimes used instead of page+offset. As of Linux + 2.5., page+offset is always used, and the "address" field has been + deleted. + +2) More to come... + + Handling Errors + +DMA address space is limited on some architectures and an allocation +failure can be determined by: + +- checking if pci_alloc_consistent returns NULL or pci_map_sg returns 0 + +- checking the returned dma_addr_t of pci_map_single and pci_map_page + by using pci_dma_mapping_error(): + + dma_addr_t dma_handle; + + dma_handle = pci_map_single(pdev, addr, size, direction); + if (pci_dma_mapping_error(pdev, dma_handle)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + } + + Closing + +This document, and the API itself, would not be in it's current +form without the feedback and suggestions from numerous individuals. +We would like to specifically mention, in no particular order, the +following people: + + Russell King + Leo Dagum + Ralf Baechle + Grant Grundler + Jay Estabrook + Thomas Sailer + Andrea Arcangeli + Jens Axboe + David Mosberger-Tang diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX index 961a051..a406286 100644 --- a/Documentation/block/00-INDEX +++ b/Documentation/block/00-INDEX @@ -1,7 +1,5 @@ 00-INDEX - This file -as-iosched.txt - - Anticipatory IO scheduler barrier.txt - I/O Barriers biodoc.txt diff --git a/Documentation/block/as-iosched.txt b/Documentation/block/as-iosched.txt deleted file mode 100644 index 738b72b..0000000 --- a/Documentation/block/as-iosched.txt +++ /dev/null @@ -1,172 +0,0 @@ -Anticipatory IO scheduler -------------------------- -Nick Piggin 13 Sep 2003 - -Attention! Database servers, especially those using "TCQ" disks should -investigate performance with the 'deadline' IO scheduler. Any system with high -disk performance requirements should do so, in fact. - -If you see unusual performance characteristics of your disk systems, or you -see big performance regressions versus the deadline scheduler, please email -me. Database users don't bother unless you're willing to test a lot of patches -from me ;) its a known issue. - -Also, users with hardware RAID controllers, doing striping, may find -highly variable performance results with using the as-iosched. The -as-iosched anticipatory implementation is based on the notion that a disk -device has only one physical seeking head. A striped RAID controller -actually has a head for each physical device in the logical RAID device. - -However, setting the antic_expire (see tunable parameters below) produces -very similar behavior to the deadline IO scheduler. - -Selecting IO schedulers ------------------------ -Refer to Documentation/block/switching-sched.txt for information on -selecting an io scheduler on a per-device basis. - -Anticipatory IO scheduler Policies ----------------------------------- -The as-iosched implementation implements several layers of policies -to determine when an IO request is dispatched to the disk controller. -Here are the policies outlined, in order of application. - -1. one-way Elevator algorithm. - -The elevator algorithm is similar to that used in deadline scheduler, with -the addition that it allows limited backward movement of the elevator -(i.e. seeks backwards). A seek backwards can occur when choosing between -two IO requests where one is behind the elevator's current position, and -the other is in front of the elevator's position. If the seek distance to -the request in back of the elevator is less than half the seek distance to -the request in front of the elevator, then the request in back can be chosen. -Backward seeks are also limited to a maximum of MAXBACK (1024*1024) sectors. -This favors forward movement of the elevator, while allowing opportunistic -"short" backward seeks. - -2. FIFO expiration times for reads and for writes. - -This is again very similar to the deadline IO scheduler. The expiration -times for requests on these lists is tunable using the parameters read_expire -and write_expire discussed below. When a read or a write expires in this way, -the IO scheduler will interrupt its current elevator sweep or read anticipation -to service the expired request. - -3. Read and write request batching - -A batch is a collection of read requests or a collection of write -requests. The as scheduler alternates dispatching read and write batches -to the driver. In the case a read batch, the scheduler submits read -requests to the driver as long as there are read requests to submit, and -the read batch time limit has not been exceeded (read_batch_expire). -The read batch time limit begins counting down only when there are -competing write requests pending. - -In the case of a write batch, the scheduler submits write requests to -the driver as long as there are write requests available, and the -write batch time limit has not been exceeded (write_batch_expire). -However, the length of write batches will be gradually shortened -when read batches frequently exceed their time limit. - -When changing between batch types, the scheduler waits for all requests -from the previous batch to complete before scheduling requests for the -next batch. - -The read and write fifo expiration times described in policy 2 above -are checked only when in scheduling IO of a batch for the corresponding -(read/write) type. So for example, the read FIFO timeout values are -tested only during read batches. Likewise, the write FIFO timeout -values are tested only during write batches. For this reason, -it is generally not recommended for the read batch time -to be longer than the write expiration time, nor for the write batch -time to exceed the read expiration time (see tunable parameters below). - -When the IO scheduler changes from a read to a write batch, -it begins the elevator from the request that is on the head of the -write expiration FIFO. Likewise, when changing from a write batch to -a read batch, scheduler begins the elevator from the first entry -on the read expiration FIFO. - -4. Read anticipation. - -Read anticipation occurs only when scheduling a read batch. -This implementation of read anticipation allows only one read request -to be dispatched to the disk controller at a time. In -contrast, many write requests may be dispatched to the disk controller -at a time during a write batch. It is this characteristic that can make -the anticipatory scheduler perform anomalously with controllers supporting -TCQ, or with hardware striped RAID devices. Setting the antic_expire -queue parameter (see below) to zero disables this behavior, and the -anticipatory scheduler behaves essentially like the deadline scheduler. - -When read anticipation is enabled (antic_expire is not zero), reads -are dispatched to the disk controller one at a time. -At the end of each read request, the IO scheduler examines its next -candidate read request from its sorted read list. If that next request -is from the same process as the request that just completed, -or if the next request in the queue is "very close" to the -just completed request, it is dispatched immediately. Otherwise, -statistics (average think time, average seek distance) on the process -that submitted the just completed request are examined. If it seems -likely that that process will submit another request soon, and that -request is likely to be near the just completed request, then the IO -scheduler will stop dispatching more read requests for up to (antic_expire) -milliseconds, hoping that process will submit a new request near the one -that just completed. If such a request is made, then it is dispatched -immediately. If the antic_expire wait time expires, then the IO scheduler -will dispatch the next read request from the sorted read queue. - -To decide whether an anticipatory wait is worthwhile, the scheduler -maintains statistics for each process that can be used to compute -mean "think time" (the time between read requests), and mean seek -distance for that process. One observation is that these statistics -are associated with each process, but those statistics are not associated -with a specific IO device. So for example, if a process is doing IO -on several file systems on separate devices, the statistics will be -a combination of IO behavior from all those devices. - - -Tuning the anticipatory IO scheduler ------------------------------------- -When using 'as', the anticipatory IO scheduler there are 5 parameters under -/sys/block/*/queue/iosched/. All are units of milliseconds. - -The parameters are: -* read_expire - Controls how long until a read request becomes "expired". It also controls the - interval between which expired requests are served, so set to 50, a request - might take anywhere < 100ms to be serviced _if_ it is the next on the - expired list. Obviously request expiration strategies won't make the disk - go faster. The result basically equates to the timeslice a single reader - gets in the presence of other IO. 100*((seek time / read_expire) + 1) is - very roughly the % streaming read efficiency your disk should get with - multiple readers. - -* read_batch_expire - Controls how much time a batch of reads is given before pending writes are - served. A higher value is more efficient. This might be set below read_expire - if writes are to be given higher priority than reads, but reads are to be - as efficient as possible when there are no writes. Generally though, it - should be some multiple of read_expire. - -* write_expire, and -* write_batch_expire are equivalent to the above, for writes. - -* antic_expire - Controls the maximum amount of time we can anticipate a good read (one - with a short seek distance from the most recently completed request) before - giving up. Many other factors may cause anticipation to be stopped early, - or some processes will not be "anticipated" at all. Should be a bit higher - for big seek time devices though not a linear correspondence - most - processes have only a few ms thinktime. - -In addition to the tunables above there is a read-only file named est_time -which, when read, will show: - - - The probability of a task exiting without a cooperating task - submitting an anticipated IO. - - - The current mean think time. - - - The seek distance used to determine if an incoming IO is better. - diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 8d2158a..6fab97e 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -186,7 +186,7 @@ a virtual address mapping (unlike the earlier scheme of virtual address do not have a corresponding kernel virtual address space mapping) and low-memory pages. -Note: Please refer to Documentation/DMA-mapping.txt for a discussion +Note: Please refer to Documentation/PCI/PCI-DMA-mapping.txt for a discussion on PCI high mem DMA aspects and mapping of scatter gather lists, and support for 64 bit PCI. diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index af6885c..e1def17 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -196,7 +196,7 @@ nobarrier This also requires an IO stack which can support also be used to enable or disable barriers, for consistency with other ext4 mount options. -inode_readahead=n This tuning parameter controls the maximum +inode_readahead_blks=n This tuning parameter controls the maximum number of inode table blocks that ext4's inode table readahead algorithm will pre-read into the buffer cache. The default value is 32 blocks. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 5ba4d9d..736d456 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -240,7 +240,7 @@ and is between 256 and 4096 characters. It is defined in the file acpi_sleep= [HW,ACPI] Sleep options Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, - old_ordering, s4_nonvs } + old_ordering, s4_nonvs, sci_force_enable } See Documentation/power/video.txt for information on s3_bios and s3_mode. s3_beep is for debugging; it makes the PC's speaker beep @@ -253,6 +253,9 @@ and is between 256 and 4096 characters. It is defined in the file of _PTS is used by default). s4_nonvs prevents the kernel from saving/restoring the ACPI NVS memory during hibernation. + sci_force_enable causes the kernel to set SCI_EN directly + on resume from S1/S3 (which is against the ACPI spec, + but some broken systems don't work without it). acpi_use_timer_override [HW,ACPI] Use timer override. For some broken Nvidia NF5 boards diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index e1a1141..2811e45 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -685,7 +685,7 @@ struct kvm_vcpu_events { __u8 pad; } nmi; __u32 sipi_vector; - __u32 flags; /* must be zero */ + __u32 flags; }; 4.30 KVM_SET_VCPU_EVENTS @@ -701,6 +701,14 @@ vcpu. See KVM_GET_VCPU_EVENTS for the data structure. +Fields that may be modified asynchronously by running VCPUs can be excluded +from the update. These fields are nmi.pending and sipi_vector. Keep the +corresponding bits in the flags field cleared to suppress overwriting the +current in-kernel state. The bits are: + +KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel +KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector + 5. The kvm_run structure diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt index 169091f..75afa12 100644 --- a/Documentation/laptops/thinkpad-acpi.txt +++ b/Documentation/laptops/thinkpad-acpi.txt @@ -1092,8 +1092,8 @@ WARNING: its level up and down at every change. -Volume control --------------- +Volume control (Console Audio control) +-------------------------------------- procfs: /proc/acpi/ibm/volume ALSA: "ThinkPad Console Audio Control", default ID: "ThinkPadEC" @@ -1110,9 +1110,53 @@ the desktop environment to just provide on-screen-display feedback. Software volume control should be done only in the main AC97/HDA mixer. -This feature allows volume control on ThinkPad models with a digital -volume knob (when available, not all models have it), as well as -mute/unmute control. The available commands are: + +About the ThinkPad Console Audio control: + +ThinkPads have a built-in amplifier and muting circuit that drives the +console headphone and speakers. This circuit is after the main AC97 +or HDA mixer in the audio path, and under exclusive control of the +firmware. + +ThinkPads have three special hotkeys to interact with the console +audio control: volume up, volume down and mute. + +It is worth noting that the normal way the mute function works (on +ThinkPads that do not have a "mute LED") is: + +1. Press mute to mute. It will *always* mute, you can press it as + many times as you want, and the sound will remain mute. + +2. Press either volume key to unmute the ThinkPad (it will _not_ + change the volume, it will just unmute). + +This is a very superior design when compared to the cheap software-only +mute-toggle solution found on normal consumer laptops: you can be +absolutely sure the ThinkPad will not make noise if you press the mute +button, no matter the previous state. + +The IBM ThinkPads, and the earlier Lenovo ThinkPads have variable-gain +amplifiers driving the speakers and headphone output, and the firmware +also handles volume control for the headphone and speakers on these +ThinkPads without any help from the operating system (this volume +control stage exists after the main AC97 or HDA mixer in the audio +path). + +The newer Lenovo models only have firmware mute control, and depend on +the main HDA mixer to do volume control (which is done by the operating +system). In this case, the volume keys are filtered out for unmute +key press (there are some firmware bugs in this area) and delivered as +normal key presses to the operating system (thinkpad-acpi is not +involved). + + +The ThinkPad-ACPI volume control: + +The preferred way to interact with the Console Audio control is the +ALSA interface. + +The legacy procfs interface allows one to read the current state, +and if volume control is enabled, accepts the following commands: echo up >/proc/acpi/ibm/volume echo down >/proc/acpi/ibm/volume @@ -1121,12 +1165,10 @@ mute/unmute control. The available commands are: echo 'level ' >/proc/acpi/ibm/volume The number range is 0 to 14 although not all of them may be -distinct. The unmute the volume after the mute command, use either the +distinct. To unmute the volume after the mute command, use either the up or down command (the level command will not unmute the volume), or the unmute command. -The current volume level and mute state is shown in the file. - You can use the volume_capabilities parameter to tell the driver whether your thinkpad has volume control or mute-only control: volume_capabilities=1 for mixers with mute and volume control, diff --git a/Documentation/sound/alsa/Procfile.txt b/Documentation/sound/alsa/Procfile.txt index 719a819..07301de 100644 --- a/Documentation/sound/alsa/Procfile.txt +++ b/Documentation/sound/alsa/Procfile.txt @@ -95,7 +95,7 @@ card*/pcm*/xrun_debug It takes an integer value, can be changed by writing to this file, such as - # cat 5 > /proc/asound/card0/pcm0p/xrun_debug + # echo 5 > /proc/asound/card0/pcm0p/xrun_debug The value consists of the following bit flags: bit 0 = Enable XRUN/jiffies debug messages diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index 641a1ef..239f14b 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -53,14 +53,14 @@ size of the mcount call that is embedded in the function). For example, if the function foo() calls bar(), when the bar() function calls mcount(), the arguments mcount() will pass to the tracer are: "frompc" - the address bar() will use to return to foo() - "selfpc" - the address bar() (with _mcount() size adjustment) + "selfpc" - the address bar() (with mcount() size adjustment) Also keep in mind that this mcount function will be called *a lot*, so optimizing for the default case of no tracer will help the smooth running of your system when tracing is disabled. So the start of the mcount function is -typically the bare min with checking things before returning. That also means -the code flow should usually kept linear (i.e. no branching in the nop case). -This is of course an optimization and not a hard requirement. +typically the bare minimum with checking things before returning. That also +means the code flow should usually be kept linear (i.e. no branching in the nop +case). This is of course an optimization and not a hard requirement. Here is some pseudo code that should help (these functions should actually be implemented in assembly): @@ -131,10 +131,10 @@ some functions to save (hijack) and restore the return address. The mcount function should check the function pointers ftrace_graph_return (compare to ftrace_stub) and ftrace_graph_entry (compare to -ftrace_graph_entry_stub). If either of those are not set to the relevant stub +ftrace_graph_entry_stub). If either of those is not set to the relevant stub function, call the arch-specific function ftrace_graph_caller which in turn calls the arch-specific function prepare_ftrace_return. Neither of these -function names are strictly required, but you should use them anyways to stay +function names is strictly required, but you should use them anyway to stay consistent across the architecture ports -- easier to compare & contrast things. @@ -144,7 +144,7 @@ but the first argument should be a pointer to the "frompc". Typically this is located on the stack. This allows the function to hijack the return address temporarily to have it point to the arch-specific function return_to_handler. That function will simply call the common ftrace_return_to_handler function and -that will return the original return address with which, you can return to the +that will return the original return address with which you can return to the original call site. Here is the updated mcount pseudo code: diff --git a/Documentation/trace/mmiotrace.txt b/Documentation/trace/mmiotrace.txt index 162effb..664e738 100644 --- a/Documentation/trace/mmiotrace.txt +++ b/Documentation/trace/mmiotrace.txt @@ -44,7 +44,8 @@ Check for lost events. Usage ----- -Make sure debugfs is mounted to /sys/kernel/debug. If not, (requires root privileges) +Make sure debugfs is mounted to /sys/kernel/debug. +If not (requires root privileges): $ mount -t debugfs debugfs /sys/kernel/debug Check that the driver you are about to trace is not loaded. @@ -91,7 +92,7 @@ $ dmesg > dmesg.txt $ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt and then send the .tar.gz file. The trace compresses considerably. Replace "pciid" and "nick" with the PCI ID or model name of your piece of hardware -under investigation and your nick name. +under investigation and your nickname. How Mmiotrace Works @@ -100,7 +101,7 @@ How Mmiotrace Works Access to hardware IO-memory is gained by mapping addresses from PCI bus by calling one of the ioremap_*() functions. Mmiotrace is hooked into the __ioremap() function and gets called whenever a mapping is created. Mapping is -an event that is recorded into the trace log. Note, that ISA range mappings +an event that is recorded into the trace log. Note that ISA range mappings are not caught, since the mapping always exists and is returned directly. MMIO accesses are recorded via page faults. Just before __ioremap() returns, @@ -122,11 +123,11 @@ Trace Log Format ---------------- The raw log is text and easily filtered with e.g. grep and awk. One record is -one line in the log. A record starts with a keyword, followed by keyword -dependant arguments. Arguments are separated by a space, or continue until the +one line in the log. A record starts with a keyword, followed by keyword- +dependent arguments. Arguments are separated by a space, or continue until the end of line. The format for version 20070824 is as follows: -Explanation Keyword Space separated arguments +Explanation Keyword Space-separated arguments --------------------------------------------------------------------------- read event R width, timestamp, map id, physical, value, PC, PID @@ -136,7 +137,7 @@ iounmap event UNMAP timestamp, map id, PC, PID marker MARK timestamp, text version VERSION the string "20070824" info for reader LSPCI one line from lspci -v -PCI address map PCIDEV space separated /proc/bus/pci/devices data +PCI address map PCIDEV space-separated /proc/bus/pci/devices data unk. opcode UNKNOWN timestamp, map id, physical, data, PC, PID Timestamp is in seconds with decimals. Physical is a PCI bus address, virtual diff --git a/Documentation/trace/tracepoint-analysis.txt b/Documentation/trace/tracepoint-analysis.txt index 5eb4e48..87bee3c 100644 --- a/Documentation/trace/tracepoint-analysis.txt +++ b/Documentation/trace/tracepoint-analysis.txt @@ -10,8 +10,8 @@ Tracepoints (see Documentation/trace/tracepoints.txt) can be used without creating custom kernel modules to register probe functions using the event tracing infrastructure. -Simplistically, tracepoints will represent an important event that when can -be taken in conjunction with other tracepoints to build a "Big Picture" of +Simplistically, tracepoints represent important events that can be +taken in conjunction with other tracepoints to build a "Big Picture" of what is going on within the system. There are a large number of methods for gathering and interpreting these events. Lacking any current Best Practises, this document describes some of the methods that can be used. @@ -33,12 +33,12 @@ calling will give a fair indication of the number of events available. -2.2 PCL +2.2 PCL (Performance Counters for Linux) ------- -Discovery and enumeration of all counters and events, including tracepoints +Discovery and enumeration of all counters and events, including tracepoints, are available with the perf tool. Getting a list of available events is a -simple case of +simple case of: $ perf list 2>&1 | grep Tracepoint ext4:ext4_free_inode [Tracepoint event] @@ -49,19 +49,19 @@ simple case of [ .... remaining output snipped .... ] -2. Enabling Events +3. Enabling Events ================== -2.1 System-Wide Event Enabling +3.1 System-Wide Event Enabling ------------------------------ See Documentation/trace/events.txt for a proper description on how events can be enabled system-wide. A short example of enabling all events related -to page allocation would look something like +to page allocation would look something like: $ for i in `find /sys/kernel/debug/tracing/events -name "enable" | grep mm_`; do echo 1 > $i; done -2.2 System-Wide Event Enabling with SystemTap +3.2 System-Wide Event Enabling with SystemTap --------------------------------------------- In SystemTap, tracepoints are accessible using the kernel.trace() function @@ -86,7 +86,7 @@ were allocating the pages. print_count() } -2.3 System-Wide Event Enabling with PCL +3.3 System-Wide Event Enabling with PCL --------------------------------------- By specifying the -a switch and analysing sleep, the system-wide events @@ -107,16 +107,16 @@ for a duration of time can be examined. Similarly, one could execute a shell and exit it as desired to get a report at that point. -2.4 Local Event Enabling +3.4 Local Event Enabling ------------------------ Documentation/trace/ftrace.txt describes how to enable events on a per-thread basis using set_ftrace_pid. -2.5 Local Event Enablement with PCL +3.5 Local Event Enablement with PCL ----------------------------------- -Events can be activate and tracked for the duration of a process on a local +Events can be activated and tracked for the duration of a process on a local basis using PCL such as follows. $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ @@ -131,18 +131,18 @@ basis using PCL such as follows. 0.973913387 seconds time elapsed -3. Event Filtering +4. Event Filtering ================== Documentation/trace/ftrace.txt covers in-depth how to filter events in ftrace. Obviously using grep and awk of trace_pipe is an option as well as any script reading trace_pipe. -4. Analysing Event Variances with PCL +5. Analysing Event Variances with PCL ===================================== Any workload can exhibit variances between runs and it can be important -to know what the standard deviation in. By and large, this is left to the +to know what the standard deviation is. By and large, this is left to the performance analyst to do it by hand. In the event that the discrete event occurrences are useful to the performance analyst, then perf can be used. @@ -166,7 +166,7 @@ In the event that some higher-level event is required that depends on some aggregation of discrete events, then a script would need to be developed. Using --repeat, it is also possible to view how events are fluctuating over -time on a system wide basis using -a and sleep. +time on a system-wide basis using -a and sleep. $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ -e kmem:mm_pagevec_free \ @@ -180,7 +180,7 @@ time on a system wide basis using -a and sleep. 1.002251757 seconds time elapsed ( +- 0.005% ) -5. Higher-Level Analysis with Helper Scripts +6. Higher-Level Analysis with Helper Scripts ============================================ When events are enabled the events that are triggering can be read from @@ -190,11 +190,11 @@ be gathered on-line as appropriate. Examples of post-processing might include o Reading information from /proc for the PID that triggered the event o Deriving a higher-level event from a series of lower-level events. - o Calculate latencies between two events + o Calculating latencies between two events Documentation/trace/postprocess/trace-pagealloc-postprocess.pl is an example script that can read trace_pipe from STDIN or a copy of a trace. When used -on-line, it can be interrupted once to generate a report without existing +on-line, it can be interrupted once to generate a report without exiting and twice to exit. Simplistically, the script just reads STDIN and counts up events but it @@ -212,12 +212,12 @@ also can do more such as processes, the parent process responsible for creating all the helpers can be identified -6. Lower-Level Analysis with PCL +7. Lower-Level Analysis with PCL ================================ -There may also be a requirement to identify what functions with a program +There may also be a requirement to identify what functions within a program were generating events within the kernel. To begin this sort of analysis, the -data must be recorded. At the time of writing, this required root +data must be recorded. At the time of writing, this required root: $ perf record -c 1 \ -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ @@ -253,11 +253,11 @@ perf report. # (For more details, try: perf report --sort comm,dso,symbol) # -According to this, the vast majority of events occured triggered on events -within the VDSO. With simple binaries, this will often be the case so lets +According to this, the vast majority of events triggered on events +within the VDSO. With simple binaries, this will often be the case so let's take a slightly different example. In the course of writing this, it was -noticed that X was generating an insane amount of page allocations so lets look -at it +noticed that X was generating an insane amount of page allocations so let's look +at it: $ perf record -c 1 -f \ -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ @@ -280,8 +280,8 @@ This was interrupted after a few seconds and # (For more details, try: perf report --sort comm,dso,symbol) # -So, almost half of the events are occuring in a library. To get an idea which -symbol. +So, almost half of the events are occurring in a library. To get an idea which +symbol: $ perf report --sort comm,dso,symbol # Samples: 27666 @@ -297,7 +297,7 @@ symbol. 0.01% Xorg /opt/gfx-test/lib/libpixman-1.so.0.13.1 [.] get_fast_path 0.00% Xorg [kernel] [k] ftrace_trace_userstack -To see where within the function pixmanFillsse2 things are going wrong +To see where within the function pixmanFillsse2 things are going wrong: $ perf annotate pixmanFillsse2 [ ... ] diff --git a/Documentation/vgaarbiter.txt b/Documentation/vgaarbiter.txt index 987f9b0..43a9b06 100644 --- a/Documentation/vgaarbiter.txt +++ b/Documentation/vgaarbiter.txt @@ -103,7 +103,7 @@ I.2 libpciaccess ---------------- To use the vga arbiter char device it was implemented an API inside the -libpciaccess library. One fieldd was added to struct pci_device (each device +libpciaccess library. One field was added to struct pci_device (each device on the system): /* the type of resource decoded by the device */ diff --git a/MAINTAINERS b/MAINTAINERS index 66f5f7d..c22d597 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2169,10 +2169,9 @@ F: drivers/hwmon/f75375s.c F: include/linux/f75375s.h FIREWIRE SUBSYSTEM -M: Kristian Hoegsberg M: Stefan Richter L: linux1394-devel@lists.sourceforge.net -W: http://www.linux1394.org/ +W: http://ieee1394.wiki.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394-2.6.git S: Maintained F: drivers/firewire/ @@ -2705,22 +2704,14 @@ S: Supported F: drivers/idle/i7300_idle.c IEEE 1394 SUBSYSTEM -M: Ben Collins M: Stefan Richter L: linux1394-devel@lists.sourceforge.net -W: http://www.linux1394.org/ +W: http://ieee1394.wiki.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394-2.6.git -S: Maintained +S: Obsolete F: Documentation/debugging-via-ohci1394.txt F: drivers/ieee1394/ -IEEE 1394 RAW I/O DRIVER -M: Dan Dennedy -M: Stefan Richter -L: linux1394-devel@lists.sourceforge.net -S: Maintained -F: drivers/ieee1394/raw1394* - IEEE 802.15.4 SUBSYSTEM M: Dmitry Eremin-Solenikov M: Sergey Lapin diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h index 360724d..988911b 100644 --- a/arch/ia64/kvm/vcpu.h +++ b/arch/ia64/kvm/vcpu.h @@ -388,6 +388,9 @@ static inline u64 __gpfn_is_io(u64 gpfn) #define _vmm_raw_spin_lock(x) do {}while(0) #define _vmm_raw_spin_unlock(x) do {}while(0) #else +typedef struct { + volatile unsigned int lock; +} vmm_spinlock_t; #define _vmm_raw_spin_lock(x) \ do { \ __u32 *ia64_spinlock_ptr = (__u32 *) (x); \ @@ -405,12 +408,12 @@ static inline u64 __gpfn_is_io(u64 gpfn) #define _vmm_raw_spin_unlock(x) \ do { barrier(); \ - ((spinlock_t *)x)->raw_lock.lock = 0; } \ + ((vmm_spinlock_t *)x)->lock = 0; } \ while (0) #endif -void vmm_spin_lock(spinlock_t *lock); -void vmm_spin_unlock(spinlock_t *lock); +void vmm_spin_lock(vmm_spinlock_t *lock); +void vmm_spin_unlock(vmm_spinlock_t *lock); enum { I_TLB = 1, D_TLB = 2 diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c index f4b4c89..7a62f75 100644 --- a/arch/ia64/kvm/vmm.c +++ b/arch/ia64/kvm/vmm.c @@ -60,12 +60,12 @@ static void __exit kvm_vmm_exit(void) return ; } -void vmm_spin_lock(spinlock_t *lock) +void vmm_spin_lock(vmm_spinlock_t *lock) { _vmm_raw_spin_lock(lock); } -void vmm_spin_unlock(spinlock_t *lock) +void vmm_spin_unlock(vmm_spinlock_t *lock) { _vmm_raw_spin_unlock(lock); } diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c index 20b3852..4332f7e 100644 --- a/arch/ia64/kvm/vtlb.c +++ b/arch/ia64/kvm/vtlb.c @@ -182,7 +182,7 @@ void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps) { u64 i, dirty_pages = 1; u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; - spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); + vmm_spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE; dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index e8dfdbd..cadbed6 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1107,6 +1107,12 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { struct dev_archdata *sd = &dev->dev.archdata; + /* Cardbus can call us to add new devices to a bus, so ignore + * those who are already fully discovered + */ + if (dev->is_added) + continue; + /* Setup OF node pointer in archdata */ sd->of_node = pci_device_to_OF_node(dev); @@ -1147,6 +1153,13 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus) } EXPORT_SYMBOL(pcibios_fixup_bus); +void __devinit pci_fixup_cardbus(struct pci_bus *bus) +{ + /* Now fixup devices on that bus */ + pcibios_setup_bus_devices(bus); +} + + static int skip_isa_ioresource_align(struct pci_dev *dev) { if ((ppc_pci_flags & PPC_PCI_CAN_SKIP_ISA_ALIGN) && diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 5598f88..e4beeb3 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -390,6 +390,26 @@ static void kvmppc_mmu_book3s_64_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, { u64 rb = 0, rs = 0; + /* + * According to Book3 2.01 mtsrin is implemented as: + * + * The SLB entry specified by (RB)32:35 is loaded from register + * RS, as follows. + * + * SLBE Bit Source SLB Field + * + * 0:31 0x0000_0000 ESID-0:31 + * 32:35 (RB)32:35 ESID-32:35 + * 36 0b1 V + * 37:61 0x00_0000|| 0b0 VSID-0:24 + * 62:88 (RS)37:63 VSID-25:51 + * 89:91 (RS)33:35 Ks Kp N + * 92 (RS)36 L ((RS)36 must be 0b0) + * 93 0b0 C + */ + + dprintk("KVM MMU: mtsrin(0x%x, 0x%lx)\n", srnum, value); + /* ESID = srnum */ rb |= (srnum & 0xf) << 28; /* Set the valid bit */ @@ -400,7 +420,7 @@ static void kvmppc_mmu_book3s_64_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, /* VSID = VSID */ rs |= (value & 0xfffffff) << 12; /* flags = flags */ - rs |= ((value >> 27) & 0xf) << 9; + rs |= ((value >> 28) & 0x7) << 9; kvmppc_mmu_book3s_64_slbmte(vcpu, rs, rb); } diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index f8ed065..f25bbd3 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -9,6 +9,7 @@ targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinu KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -fno-strict-aliasing -fPIC KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +cflags-$(CONFIG_X86_32) := -march=i386 cflags-$(CONFIG_X86_64) := -mcmodel=small KBUILD_CFLAGS += $(cflags-y) KBUILD_CFLAGS += $(call cc-option,-ffreestanding) diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 950df43..f46b79f 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -254,6 +254,10 @@ struct kvm_reinject_control { __u8 reserved[31]; }; +/* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ +#define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 +#define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 + /* for KVM_GET/SET_VCPU_EVENTS */ struct kvm_vcpu_events { struct { diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 811bfab..bc54fa9 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -31,20 +31,20 @@ * contiguous (although various IO spaces may punch holes in * it).. * - * N - Number of bits in the node portion of a socket physical - * address. + * N - Number of bits in the node portion of a socket physical + * address. * - * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of - * routers always have low bit of 1, C/MBricks have low bit - * equal to 0. Most addressing macros that target UV hub chips - * right shift the NASID by 1 to exclude the always-zero bit. - * NASIDs contain up to 15 bits. + * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of + * routers always have low bit of 1, C/MBricks have low bit + * equal to 0. Most addressing macros that target UV hub chips + * right shift the NASID by 1 to exclude the always-zero bit. + * NASIDs contain up to 15 bits. * * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead * of nasids. * - * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant - * of the nasid for socket usage. + * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant + * of the nasid for socket usage. * * * NumaLink Global Physical Address Format: @@ -71,12 +71,12 @@ * * * APICID format - * NOTE!!!!!! This is the current format of the APICID. However, code - * should assume that this will change in the future. Use functions - * in this file for all APICID bit manipulations and conversion. + * NOTE!!!!!! This is the current format of the APICID. However, code + * should assume that this will change in the future. Use functions + * in this file for all APICID bit manipulations and conversion. * - * 1111110000000000 - * 5432109876543210 + * 1111110000000000 + * 5432109876543210 * pppppppppplc0cch * sssssssssss * @@ -89,9 +89,9 @@ * Note: Processor only supports 12 bits in the APICID register. The ACPI * tables hold all 16 bits. Software needs to be aware of this. * - * Unless otherwise specified, all references to APICID refer to - * the FULL value contained in ACPI tables, not the subset in the - * processor APICID register. + * Unless otherwise specified, all references to APICID refer to + * the FULL value contained in ACPI tables, not the subset in the + * processor APICID register. */ @@ -151,16 +151,16 @@ struct uv_hub_info_s { }; DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); -#define uv_hub_info (&__get_cpu_var(__uv_hub_info)) +#define uv_hub_info (&__get_cpu_var(__uv_hub_info)) #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) /* * Local & Global MMR space macros. - * Note: macros are intended to be used ONLY by inline functions - * in this file - not by other kernel code. - * n - NASID (full 15-bit global nasid) - * g - GNODE (full 15-bit global nasid, right shifted 1) - * p - PNODE (local part of nsids, right shifted 1) + * Note: macros are intended to be used ONLY by inline functions + * in this file - not by other kernel code. + * n - NASID (full 15-bit global nasid) + * g - GNODE (full 15-bit global nasid, right shifted 1) + * p - PNODE (local part of nsids, right shifted 1) */ #define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) #define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra) @@ -215,8 +215,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); /* * Macros for converting between kernel virtual addresses, socket local physical * addresses, and UV global physical addresses. - * Note: use the standard __pa() & __va() macros for converting - * between socket virtual and socket physical addresses. + * Note: use the standard __pa() & __va() macros for converting + * between socket virtual and socket physical addresses. */ /* socket phys RAM --> UV global physical address */ @@ -287,21 +287,18 @@ static inline int uv_apicid_to_pnode(int apicid) * Access global MMRs using the low memory MMR32 space. This region supports * faster MMR access but not all MMRs are accessible in this space. */ -static inline unsigned long *uv_global_mmr32_address(int pnode, - unsigned long offset) +static inline unsigned long *uv_global_mmr32_address(int pnode, unsigned long offset) { return __va(UV_GLOBAL_MMR32_BASE | UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset); } -static inline void uv_write_global_mmr32(int pnode, unsigned long offset, - unsigned long val) +static inline void uv_write_global_mmr32(int pnode, unsigned long offset, unsigned long val) { writeq(val, uv_global_mmr32_address(pnode, offset)); } -static inline unsigned long uv_read_global_mmr32(int pnode, - unsigned long offset) +static inline unsigned long uv_read_global_mmr32(int pnode, unsigned long offset) { return readq(uv_global_mmr32_address(pnode, offset)); } @@ -310,21 +307,18 @@ static inline unsigned long uv_read_global_mmr32(int pnode, * Access Global MMR space using the MMR space located at the top of physical * memory. */ -static inline unsigned long *uv_global_mmr64_address(int pnode, - unsigned long offset) +static inline unsigned long *uv_global_mmr64_address(int pnode, unsigned long offset) { return __va(UV_GLOBAL_MMR64_BASE | UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); } -static inline void uv_write_global_mmr64(int pnode, unsigned long offset, - unsigned long val) +static inline void uv_write_global_mmr64(int pnode, unsigned long offset, unsigned long val) { writeq(val, uv_global_mmr64_address(pnode, offset)); } -static inline unsigned long uv_read_global_mmr64(int pnode, - unsigned long offset) +static inline unsigned long uv_read_global_mmr64(int pnode, unsigned long offset) { return readq(uv_global_mmr64_address(pnode, offset)); } @@ -338,6 +332,16 @@ static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long o return UV_GLOBAL_GRU_MMR_BASE | offset | (pnode << uv_hub_info->m_val); } +static inline void uv_write_global_mmr8(int pnode, unsigned long offset, unsigned char val) +{ + writeb(val, uv_global_mmr64_address(pnode, offset)); +} + +static inline unsigned char uv_read_global_mmr8(int pnode, unsigned long offset) +{ + return readb(uv_global_mmr64_address(pnode, offset)); +} + /* * Access hub local MMRs. Faster than using global space but only local MMRs * are accessible. @@ -457,11 +461,17 @@ static inline void uv_set_scir_bits(unsigned char value) } } +static inline unsigned long uv_scir_offset(int apicid) +{ + return SCIR_LOCAL_MMR_BASE | (apicid & 0x3f); +} + static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) { if (uv_cpu_hub_info(cpu)->scir.state != value) { + uv_write_global_mmr8(uv_cpu_to_pnode(cpu), + uv_cpu_hub_info(cpu)->scir.offset, value); uv_cpu_hub_info(cpu)->scir.state = value; - uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value); } } diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 82e5086..f996103 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -162,6 +162,8 @@ static int __init acpi_sleep_setup(char *str) #endif if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); + if (strncmp(str, "sci_force_enable", 16) == 0) + acpi_set_sci_en_on_resume(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 1dca9c3..fb490ce 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -138,6 +138,11 @@ int amd_iommus_present; bool amd_iommu_np_cache __read_mostly; /* + * Set to true if ACPI table parsing and hardware intialization went properly + */ +static bool amd_iommu_initialized; + +/* * List of protection domains - used during resume */ LIST_HEAD(amd_iommu_pd_list); @@ -929,6 +934,8 @@ static int __init init_iommu_all(struct acpi_table_header *table) } WARN_ON(p != end); + amd_iommu_initialized = true; + return 0; } @@ -1263,6 +1270,9 @@ static int __init amd_iommu_init(void) if (acpi_table_parse("IVRS", init_iommu_all) != 0) goto free; + if (!amd_iommu_initialized) + goto free; + if (acpi_table_parse("IVRS", init_memory_definitions) != 0) goto free; diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d56b0ef..5f92494 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -629,8 +629,10 @@ void __init uv_system_init(void) uv_rtc_init(); for_each_present_cpu(cpu) { + int apicid = per_cpu(x86_cpu_to_apicid, cpu); + nid = cpu_to_node(cpu); - pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); + pnode = uv_apicid_to_pnode(apicid); blade = boot_pnode_to_blade(pnode); lcpu = uv_blade_info[blade].nr_possible_cpus; uv_blade_info[blade].nr_possible_cpus++; @@ -651,15 +653,13 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; - uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; + uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); uv_node_to_blade[nid] = blade; uv_cpu_to_blade[cpu] = blade; max_pnode = max(pnode, max_pnode); - printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, " - "lcpu %d, blade %d\n", - cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid, - lcpu, blade); + printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, lcpu %d, blade %d\n", + cpu, apicid, pnode, nid, lcpu, blade); } /* Add blade/pnode info for nodes without cpus */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c223b7e..d616c06 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2347,7 +2347,7 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) callchain_store(entry, PERF_CONTEXT_KERNEL); callchain_store(entry, regs->ip); - dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); + dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); } /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 98c2cde..c6ee241 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -103,8 +103,8 @@ void show_regs_common(void) if (!product) product = ""; - printk("\n"); - printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n", + printk(KERN_CONT "\n"); + printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 9c517b5..37ad1e0 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -139,16 +139,16 @@ void __show_regs(struct pt_regs *regs, int all) show_regs_common(); - printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", + printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", (u16)regs->cs, regs->ip, regs->flags, smp_processor_id()); print_symbol("EIP is at %s\n", regs->ip); - printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", + printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); - printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", + printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", regs->si, regs->di, regs->bp, sp); - printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", + printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); if (!all) @@ -158,19 +158,19 @@ void __show_regs(struct pt_regs *regs, int all) cr2 = read_cr2(); cr3 = read_cr3(); cr4 = read_cr4_safe(); - printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", + printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); get_debugreg(d2, 2); get_debugreg(d3, 3); - printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", + printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", d0, d1, d2, d3); get_debugreg(d6, 6); get_debugreg(d7, 7); - printk("DR6: %08lx DR7: %08lx\n", + printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n", d6, d7); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 52fbd0c..f9e0331 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -161,19 +161,19 @@ void __show_regs(struct pt_regs *regs, int all) unsigned int ds, cs, es; show_regs_common(); - printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); + printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); - printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, + printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, regs->flags); - printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", + printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n", regs->ax, regs->bx, regs->cx); - printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", + printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n", regs->dx, regs->si, regs->di); - printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", + printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n", regs->bp, regs->r8, regs->r9); - printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", + printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n", regs->r10, regs->r11, regs->r12); - printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", + printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", regs->r13, regs->r14, regs->r15); asm("movl %%ds,%0" : "=r" (ds)); @@ -194,21 +194,21 @@ void __show_regs(struct pt_regs *regs, int all) cr3 = read_cr3(); cr4 = read_cr4(); - printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", + printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", fs, fsindex, gs, gsindex, shadowgs); - printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, + printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); - printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, + printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); get_debugreg(d2, 2); - printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); + printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); get_debugreg(d3, 3); get_debugreg(d6, 6); get_debugreg(d7, 7); - printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); + printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); } void show_regs(struct pt_regs *regs) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cd60c0b..3063a0c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1150,6 +1150,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) hrtimer_cancel(&apic->lapic_timer.timer); update_divide_count(apic); start_apic_timer(apic); + apic->irr_pending = true; } void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a601713..58a0f1e 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -455,8 +455,6 @@ out_unlock: static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { struct kvm_shadow_walk_iterator iterator; - pt_element_t gpte; - gpa_t pte_gpa = -1; int level; u64 *sptep; int need_flush = 0; @@ -470,10 +468,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (level == PT_PAGE_TABLE_LEVEL || ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { - struct kvm_mmu_page *sp = page_header(__pa(sptep)); - - pte_gpa = (sp->gfn << PAGE_SHIFT); - pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); if (is_shadow_present_pte(*sptep)) { rmap_remove(vcpu->kvm, sptep); @@ -492,18 +486,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (need_flush) kvm_flush_remote_tlbs(vcpu->kvm); spin_unlock(&vcpu->kvm->mmu_lock); - - if (pte_gpa == -1) - return; - if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, - sizeof(pt_element_t))) - return; - if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) { - if (mmu_topup_memory_caches(vcpu)) - return; - kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte, - sizeof(pt_element_t), 0); - } } static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9d06896..6651dbf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1913,7 +1913,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->sipi_vector = vcpu->arch.sipi_vector; - events->flags = 0; + events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING + | KVM_VCPUEVENT_VALID_SIPI_VECTOR); vcpu_put(vcpu); } @@ -1921,7 +1922,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { - if (events->flags) + if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING + | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) return -EINVAL; vcpu_load(vcpu); @@ -1938,10 +1940,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, kvm_pic_clear_isr_ack(vcpu->kvm); vcpu->arch.nmi_injected = events->nmi.injected; - vcpu->arch.nmi_pending = events->nmi.pending; + if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) + vcpu->arch.nmi_pending = events->nmi.pending; kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); - vcpu->arch.sipi_vector = events->sipi_vector; + if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) + vcpu->arch.sipi_vector = events->sipi_vector; vcpu_put(vcpu); diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index 4901d0d..af3b6c8 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c @@ -106,26 +106,25 @@ void kmemcheck_error_recall(void) switch (e->type) { case KMEMCHECK_ERROR_INVALID_ACCESS: - printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read " - "from %s memory (%p)\n", + printk(KERN_WARNING "WARNING: kmemcheck: Caught %d-bit read from %s memory (%p)\n", 8 * e->size, e->state < ARRAY_SIZE(desc) ? desc[e->state] : "(invalid shadow state)", (void *) e->address); - printk(KERN_INFO); + printk(KERN_WARNING); for (i = 0; i < SHADOW_COPY_SIZE; ++i) - printk("%02x", e->memory_copy[i]); - printk("\n"); + printk(KERN_CONT "%02x", e->memory_copy[i]); + printk(KERN_CONT "\n"); - printk(KERN_INFO); + printk(KERN_WARNING); for (i = 0; i < SHADOW_COPY_SIZE; ++i) { if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) - printk(" %c", short_desc[e->shadow_copy[i]]); + printk(KERN_CONT " %c", short_desc[e->shadow_copy[i]]); else - printk(" ?"); + printk(KERN_CONT " ?"); } - printk("\n"); - printk(KERN_INFO "%*c\n", 2 + 2 + printk(KERN_CONT "\n"); + printk(KERN_WARNING "%*c\n", 2 + 2 * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); break; case KMEMCHECK_ERROR_BUG: diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index 145df00..f939d60 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c @@ -51,7 +51,7 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) } } -void __init update_res(struct pci_root_info *info, size_t start, +void __devinit update_res(struct pci_root_info *info, size_t start, size_t end, unsigned long flags, int merge) { int i; diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk index 5bbb5a3..fd1ab80 100644 --- a/arch/x86/tools/chkobjdump.awk +++ b/arch/x86/tools/chkobjdump.awk @@ -8,14 +8,24 @@ BEGIN { od_sver = 19; } -/^GNU/ { - split($3, ver, "."); +/^GNU objdump/ { + verstr = "" + for (i = 3; i <= NF; i++) + if (match($(i), "^[0-9]")) { + verstr = $(i); + break; + } + if (verstr == "") { + printf("Warning: Failed to find objdump version number.\n"); + exit 0; + } + split(verstr, ver, "."); if (ver[1] > od_ver || (ver[1] == od_ver && ver[2] >= od_sver)) { exit 1; } else { printf("Warning: objdump version %s is older than %d.%d\n", - $4, od_ver, od_sver); + verstr, od_ver, od_sver); print("Warning: Skipping posttest."); # Logic is inverted, because we just skip test without error. exit 0; diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 8873b9b..8618d89 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -402,7 +402,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, * our current implementations need. If we'll ever need * more the interface will need revisiting. */ - page = alloc_page(GFP_KERNEL | __GFP_ZERO); + page = alloc_page(gfp_mask | __GFP_ZERO); if (!page) goto out_free_bio; if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size) diff --git a/block/blk-settings.c b/block/blk-settings.c index 6ae118d..d52d4ad 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -505,21 +505,30 @@ static unsigned int lcm(unsigned int a, unsigned int b) /** * blk_stack_limits - adjust queue_limits for stacked devices - * @t: the stacking driver limits (top) - * @b: the underlying queue limits (bottom) + * @t: the stacking driver limits (top device) + * @b: the underlying queue limits (bottom, component device) * @offset: offset to beginning of data within component device * * Description: - * Merges two queue_limit structs. Returns 0 if alignment didn't - * change. Returns -1 if adding the bottom device caused - * misalignment. + * This function is used by stacking drivers like MD and DM to ensure + * that all component devices have compatible block sizes and + * alignments. The stacking driver must provide a queue_limits + * struct (top) and then iteratively call the stacking function for + * all component (bottom) devices. The stacking function will + * attempt to combine the values and ensure proper alignment. + * + * Returns 0 if the top and bottom queue_limits are compatible. The + * top device's block sizes and alignment offsets may be adjusted to + * ensure alignment with the bottom device. If no compatible sizes + * and alignments exist, -1 is returned and the resulting top + * queue_limits will have the misaligned flag set to indicate that + * the alignment_offset is undefined. */ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset) { - int ret; - - ret = 0; + sector_t alignment; + unsigned int top, bottom; t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); @@ -537,6 +546,22 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size); + alignment = queue_limit_alignment_offset(b, offset); + + /* Bottom device has different alignment. Check that it is + * compatible with the current top alignment. + */ + if (t->alignment_offset != alignment) { + + top = max(t->physical_block_size, t->io_min) + + t->alignment_offset; + bottom = max(b->physical_block_size, b->io_min) + alignment; + + /* Verify that top and bottom intervals line up */ + if (max(top, bottom) & (min(top, bottom) - 1)) + t->misaligned = 1; + } + t->logical_block_size = max(t->logical_block_size, b->logical_block_size); @@ -544,54 +569,64 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->physical_block_size); t->io_min = max(t->io_min, b->io_min); + t->io_opt = lcm(t->io_opt, b->io_opt); + t->no_cluster |= b->no_cluster; t->discard_zeroes_data &= b->discard_zeroes_data; - /* Bottom device offset aligned? */ - if (offset && - (offset & (b->physical_block_size - 1)) != b->alignment_offset) { + /* Physical block size a multiple of the logical block size? */ + if (t->physical_block_size & (t->logical_block_size - 1)) { + t->physical_block_size = t->logical_block_size; t->misaligned = 1; - ret = -1; } - /* - * Temporarily disable discard granularity. It's currently buggy - * since we default to 0 for discard_granularity, hence this - * "failure" will always trigger for non-zero offsets. - */ -#if 0 - if (offset && - (offset & (b->discard_granularity - 1)) != b->discard_alignment) { - t->discard_misaligned = 1; - ret = -1; + /* Minimum I/O a multiple of the physical block size? */ + if (t->io_min & (t->physical_block_size - 1)) { + t->io_min = t->physical_block_size; + t->misaligned = 1; } -#endif - - /* If top has no alignment offset, inherit from bottom */ - if (!t->alignment_offset) - t->alignment_offset = - b->alignment_offset & (b->physical_block_size - 1); - if (!t->discard_alignment) - t->discard_alignment = - b->discard_alignment & (b->discard_granularity - 1); - - /* Top device aligned on logical block boundary? */ - if (t->alignment_offset & (t->logical_block_size - 1)) { + /* Optimal I/O a multiple of the physical block size? */ + if (t->io_opt & (t->physical_block_size - 1)) { + t->io_opt = 0; t->misaligned = 1; - ret = -1; } - /* Find lcm() of optimal I/O size and granularity */ - t->io_opt = lcm(t->io_opt, b->io_opt); - t->discard_granularity = lcm(t->discard_granularity, - b->discard_granularity); + /* Find lowest common alignment_offset */ + t->alignment_offset = lcm(t->alignment_offset, alignment) + & (max(t->physical_block_size, t->io_min) - 1); - /* Verify that optimal I/O size is a multiple of io_min */ - if (t->io_min && t->io_opt % t->io_min) - ret = -1; + /* Verify that new alignment_offset is on a logical block boundary */ + if (t->alignment_offset & (t->logical_block_size - 1)) + t->misaligned = 1; + + /* Discard alignment and granularity */ + if (b->discard_granularity) { + unsigned int granularity = b->discard_granularity; + offset &= granularity - 1; + + alignment = (granularity + b->discard_alignment - offset) + & (granularity - 1); + + if (t->discard_granularity != 0 && + t->discard_alignment != alignment) { + top = t->discard_granularity + t->discard_alignment; + bottom = b->discard_granularity + alignment; + + /* Verify that top and bottom intervals line up */ + if (max(top, bottom) & (min(top, bottom) - 1)) + t->discard_misaligned = 1; + } + + t->max_discard_sectors = min_not_zero(t->max_discard_sectors, + b->max_discard_sectors); + t->discard_granularity = max(t->discard_granularity, + b->discard_granularity); + t->discard_alignment = lcm(t->discard_alignment, alignment) & + (t->discard_granularity - 1); + } - return ret; + return t->misaligned ? -1 : 0; } EXPORT_SYMBOL(blk_stack_limits); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e2f8046..918c7fd 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -208,8 +208,6 @@ struct cfq_data { /* Root service tree for cfq_groups */ struct cfq_rb_root grp_service_tree; struct cfq_group root_group; - /* Number of active cfq groups on group service tree */ - int nr_groups; /* * The priority currently being served @@ -294,8 +292,7 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg, enum wl_prio_t prio, - enum wl_type_t type, - struct cfq_data *cfqd) + enum wl_type_t type) { if (!cfqg) return NULL; @@ -842,7 +839,6 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg) __cfq_group_service_tree_add(st, cfqg); cfqg->on_st = true; - cfqd->nr_groups++; st->total_weight += cfqg->weight; } @@ -863,7 +859,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); cfqg->on_st = false; - cfqd->nr_groups--; st->total_weight -= cfqg->weight; if (!RB_EMPTY_NODE(&cfqg->rb_node)) cfq_rb_erase(&cfqg->rb_node, st); @@ -1150,7 +1145,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, #endif service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), - cfqq_type(cfqq), cfqd); + cfqq_type(cfqq)); if (cfq_class_idle(cfqq)) { rb_key = CFQ_IDLE_DELAY; parent = rb_last(&service_tree->rb); @@ -1513,9 +1508,6 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, struct cfq_io_context *cic; struct cfq_queue *cfqq; - /* Deny merge if bio and rq don't belong to same cfq group */ - if ((RQ_CFQQ(rq))->cfqg != cfq_get_cfqg(cfqd, 0)) - return false; /* * Disallow merge of a sync bio into an async request. */ @@ -1616,7 +1608,7 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) { struct cfq_rb_root *service_tree = service_tree_for(cfqd->serving_group, cfqd->serving_prio, - cfqd->serving_type, cfqd); + cfqd->serving_type); if (!cfqd->rq_queued) return NULL; @@ -1675,13 +1667,17 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, #define CFQQ_SEEKY(cfqq) ((cfqq)->seek_mean > CFQQ_SEEK_THR) static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq, - struct request *rq) + struct request *rq, bool for_preempt) { sector_t sdist = cfqq->seek_mean; if (!sample_valid(cfqq->seek_samples)) sdist = CFQQ_SEEK_THR; + /* if seek_mean is big, using it as close criteria is meaningless */ + if (sdist > CFQQ_SEEK_THR && !for_preempt) + sdist = CFQQ_SEEK_THR; + return cfq_dist_from_last(cfqd, rq) <= sdist; } @@ -1709,7 +1705,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, * will contain the closest sector. */ __cfqq = rb_entry(parent, struct cfq_queue, p_node); - if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) + if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false)) return __cfqq; if (blk_rq_pos(__cfqq->next_rq) < sector) @@ -1720,7 +1716,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, return NULL; __cfqq = rb_entry(node, struct cfq_queue, p_node); - if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) + if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false)) return __cfqq; return NULL; @@ -1963,8 +1959,7 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq) } static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, - struct cfq_group *cfqg, enum wl_prio_t prio, - bool prio_changed) + struct cfq_group *cfqg, enum wl_prio_t prio) { struct cfq_queue *queue; int i; @@ -1972,24 +1967,9 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, unsigned long lowest_key = 0; enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD; - if (prio_changed) { - /* - * When priorities switched, we prefer starting - * from SYNC_NOIDLE (first choice), or just SYNC - * over ASYNC - */ - if (service_tree_for(cfqg, prio, cur_best, cfqd)->count) - return cur_best; - cur_best = SYNC_WORKLOAD; - if (service_tree_for(cfqg, prio, cur_best, cfqd)->count) - return cur_best; - - return ASYNC_WORKLOAD; - } - - for (i = 0; i < 3; ++i) { - /* otherwise, select the one with lowest rb_key */ - queue = cfq_rb_first(service_tree_for(cfqg, prio, i, cfqd)); + for (i = 0; i <= SYNC_WORKLOAD; ++i) { + /* select the one with lowest rb_key */ + queue = cfq_rb_first(service_tree_for(cfqg, prio, i)); if (queue && (!key_valid || time_before(queue->rb_key, lowest_key))) { lowest_key = queue->rb_key; @@ -2003,8 +1983,6 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) { - enum wl_prio_t previous_prio = cfqd->serving_prio; - bool prio_changed; unsigned slice; unsigned count; struct cfq_rb_root *st; @@ -2032,24 +2010,19 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload * expiration time */ - prio_changed = (cfqd->serving_prio != previous_prio); - st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type, - cfqd); + st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type); count = st->count; /* - * If priority didn't change, check workload expiration, - * and that we still have other queues ready + * check workload expiration, and that we still have other queues ready */ - if (!prio_changed && count && - !time_after(jiffies, cfqd->workload_expires)) + if (count && !time_after(jiffies, cfqd->workload_expires)) return; /* otherwise select new workload type */ cfqd->serving_type = - cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio, prio_changed); - st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type, - cfqd); + cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); + st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type); count = st->count; /* @@ -3143,7 +3116,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, * if this request is as-good as one we would expect from the * current cfqq, let it preempt */ - if (cfq_rq_close(cfqd, cfqq, rq)) + if (cfq_rq_close(cfqd, cfqq, rq, true)) return true; return false; diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c index 3ec27c7..f84f6b4 100644 --- a/crypto/async_tx/raid6test.c +++ b/crypto/async_tx/raid6test.c @@ -214,6 +214,13 @@ static int raid6_test(void) err += test(4, &tests); if (NDISKS > 5) err += test(5, &tests); + /* the 11 and 12 disk cases are special for ioatdma (p-disabled + * q-continuation without extended descriptor) + */ + if (NDISKS > 12) { + err += test(11, &tests); + err += test(12, &tests); + } err += test(NDISKS, &tests); pr("\n"); diff --git a/drivers/Kconfig b/drivers/Kconfig index 8a07363..368ae6d 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -28,7 +28,7 @@ source "drivers/md/Kconfig" source "drivers/message/fusion/Kconfig" -source "drivers/ieee1394/Kconfig" +source "drivers/firewire/Kconfig" source "drivers/message/i2o/Kconfig" diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 5f2c379..79d33d9 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -81,6 +81,23 @@ static int acpi_sleep_prepare(u32 acpi_state) #ifdef CONFIG_ACPI_SLEEP static u32 acpi_target_sleep_state = ACPI_STATE_S0; /* + * According to the ACPI specification the BIOS should make sure that ACPI is + * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states. Still, + * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI + * on such systems during resume. Unfortunately that doesn't help in + * particularly pathological cases in which SCI_EN has to be set directly on + * resume, although the specification states very clearly that this flag is + * owned by the hardware. The set_sci_en_on_resume variable will be set in such + * cases. + */ +static bool set_sci_en_on_resume; + +void __init acpi_set_sci_en_on_resume(void) +{ + set_sci_en_on_resume = true; +} + +/* * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the * user to request that behavior by using the 'acpi_old_suspend_ordering' * kernel command line option that causes the following variable to be set. @@ -170,18 +187,6 @@ static void acpi_pm_end(void) #endif /* CONFIG_ACPI_SLEEP */ #ifdef CONFIG_SUSPEND -/* - * According to the ACPI specification the BIOS should make sure that ACPI is - * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states. Still, - * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI - * on such systems during resume. Unfortunately that doesn't help in - * particularly pathological cases in which SCI_EN has to be set directly on - * resume, although the specification states very clearly that this flag is - * owned by the hardware. The set_sci_en_on_resume variable will be set in such - * cases. - */ -static bool set_sci_en_on_resume; - extern void do_suspend_lowlevel(void); static u32 acpi_suspend_states[] = { diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 05dff63..72e76b4 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -999,8 +999,10 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device) sprintf(name, "acpi_video%d", count++); device->backlight = backlight_device_register(name, NULL, device, &acpi_backlight_ops); - device->backlight->props.max_brightness = device->brightness->count-3; kfree(name); + if (IS_ERR(device->backlight)) + return; + device->backlight->props.max_brightness = device->brightness->count-3; result = sysfs_create_link(&device->backlight->dev.kobj, &device->dev->dev.kobj, "device"); @@ -1979,6 +1981,10 @@ acpi_video_switch_brightness(struct acpi_video_device *device, int event) unsigned long long level_current, level_next; int result = -EINVAL; + /* no warning message if acpi_backlight=vendor is used */ + if (!acpi_video_backlight_support()) + return 0; + if (!device->brightness) goto out; diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index eb4fa19..ce1fa92 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -7101,7 +7101,7 @@ static struct DAC960_privdata DAC960_BA_privdata = { static struct DAC960_privdata DAC960_LP_privdata = { .HardwareType = DAC960_LP_Controller, - .FirmwareType = DAC960_LP_Controller, + .FirmwareType = DAC960_V2_Controller, .InterruptHandler = DAC960_LP_InterruptHandler, .MemoryWindowSize = DAC960_LP_RegisterWindowSize, }; diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 13bb69d..64a223b 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -735,21 +735,6 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector part_stat_unlock(); } -/* - * Ensure we don't create aliases in VI caches - */ -static inline void -killalias(struct bio *bio) -{ - struct bio_vec *bv; - int i; - - if (bio_data_dir(bio) == READ) - __bio_for_each_segment(bv, bio, i, 0) { - flush_dcache_page(bv->bv_page); - } -} - void aoecmd_ata_rsp(struct sk_buff *skb) { @@ -871,7 +856,7 @@ aoecmd_ata_rsp(struct sk_buff *skb) if (buf->flags & BUFFL_FAIL) bio_endio(buf->bio, -EIO); else { - killalias(buf->bio); + bio_flush_dcache_pages(buf->bio); bio_endio(buf->bio, 0); } mempool_free(buf, d->bufpool); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 2312d78..c975587 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1490,7 +1490,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo); /* drbd_proc.c */ extern struct proc_dir_entry *drbd_proc; -extern struct file_operations drbd_proc_fops; +extern const struct file_operations drbd_proc_fops; extern const char *drbd_conn_str(enum drbd_conns s); extern const char *drbd_role_str(enum drbd_role s); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 157d1e4..9348f33 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -27,7 +27,6 @@ */ #include -#include #include #include #include @@ -151,7 +150,7 @@ wait_queue_head_t drbd_pp_wait; DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5); -static struct block_device_operations drbd_ops = { +static const struct block_device_operations drbd_ops = { .owner = THIS_MODULE, .open = drbd_open, .release = drbd_release, @@ -3623,7 +3622,7 @@ _drbd_fault_random(struct fault_random_state *rsp) { long refresh; - if (--rsp->count < 0) { + if (!rsp->count--) { get_random_bytes(&refresh, sizeof(refresh)); rsp->state += refresh; rsp->count = FAULT_RANDOM_REFRESH; diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index bdd0b49..df8ad96 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -38,7 +38,7 @@ static int drbd_proc_open(struct inode *inode, struct file *file); struct proc_dir_entry *drbd_proc; -struct file_operations drbd_proc_fops = { +const struct file_operations drbd_proc_fops = { .owner = THIS_MODULE, .open = drbd_proc_open, .read = seq_read, diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c548f24..259c135 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -28,7 +28,6 @@ #include #include -#include #include #include #include diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index ed8796f..b453c2b 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -24,7 +24,6 @@ */ #include -#include #include #include #include @@ -34,7 +33,6 @@ #include #include #include -#include #include #include diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index e0339aa..02b2583 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -860,7 +860,7 @@ static int mg_probe(struct platform_device *plat_dev) err = -EINVAL; goto probe_err_2; } - host->dev_base = ioremap(rsc->start , rsc->end + 1); + host->dev_base = ioremap(rsc->start, resource_size(rsc)); if (!host->dev_base) { printk(KERN_ERR "%s:%d ioremap fail\n", __func__, __LINE__); diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 2fb2e6c..5aa7a58 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -725,9 +725,14 @@ static struct pci_driver agp_amd64_pci_driver = { int __init agp_amd64_init(void) { int err = 0; + static int done = 0; if (agp_off) return -EINVAL; + + if (done++) + return agp_bridges_found ? 0 : -ENODEV; + err = pci_register_driver(&agp_amd64_pci_driver); if (err < 0) return err; @@ -771,12 +776,8 @@ static void __exit agp_amd64_cleanup(void) pci_unregister_driver(&agp_amd64_pci_driver); } -/* On AMD64 the PCI driver needs to initialize this driver early - for the IOMMU, so it has to be called via a backdoor. */ -#ifndef CONFIG_GART_IOMMU module_init(agp_amd64_init); module_exit(agp_amd64_cleanup); -#endif MODULE_AUTHOR("Dave Jones , Andi Kleen"); module_param(agp_try_unsupported, bool, 0); diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index e989f67..3d9c61e 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -158,10 +158,11 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, goto out; } } -out_unlock: - mutex_unlock(&rng_mutex); out: return ret ? : err; +out_unlock: + mutex_unlock(&rng_mutex); + goto out; } diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 679cd08..176f175 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -3204,7 +3204,7 @@ static __devinit int init_ipmi_si(void) #ifdef CONFIG_ACPI spmi_find_bmc(); #endif -#ifdef CONFIG_PNP +#ifdef CONFIG_ACPI pnp_register_driver(&ipmi_pnp_driver); #endif @@ -3330,7 +3330,7 @@ static __exit void cleanup_ipmi_si(void) #ifdef CONFIG_PCI pci_unregister_driver(&ipmi_pci_driver); #endif -#ifdef CONFIG_PNP +#ifdef CONFIG_ACPI pnp_unregister_driver(&ipmi_pnp_driver); #endif diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index f151125..efc1a61 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -815,7 +815,7 @@ atc_is_tx_complete(struct dma_chan *chan, dev_vdbg(chan2dev(chan), "is_tx_complete: %d (d%d, u%d)\n", cookie, done ? *done : 0, used ? *used : 0); - spin_lock_bh(atchan->lock); + spin_lock_bh(&atchan->lock); last_complete = atchan->completed_cookie; last_used = chan->cookie; @@ -830,7 +830,7 @@ atc_is_tx_complete(struct dma_chan *chan, ret = dma_async_is_complete(cookie, last_complete, last_used); } - spin_unlock_bh(atchan->lock); + spin_unlock_bh(&atchan->lock); if (done) *done = last_complete; diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index 4a99cd9..b5f2ee0 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -1294,8 +1294,8 @@ static int __exit coh901318_remove(struct platform_device *pdev) dma_async_device_unregister(&base->dma_slave); coh901318_pool_destroy(&base->pool); free_irq(platform_get_irq(pdev, 0), base); - kfree(base); iounmap(base->virtbase); + kfree(base); release_mem_region(pdev->resource->start, resource_size(pdev->resource)); return 0; diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 285bed0..d28369f 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -1270,8 +1270,6 @@ static int __init dw_probe(struct platform_device *pdev) goto err_kfree; } - memset(dw, 0, sizeof *dw); - dw->regs = ioremap(io->start, DW_REGLEN); if (!dw->regs) { err = -ENOMEM; diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index c524d36..dcc4ab7 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -1032,7 +1032,7 @@ int __devinit ioat_probe(struct ioatdma_device *device) dma->dev = &pdev->dev; if (!dma->chancnt) { - dev_err(dev, "zero channels detected\n"); + dev_err(dev, "channel enumeration error\n"); goto err_setup_interrupts; } diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 45edde9..bbc3e78 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -60,6 +60,7 @@ * @dca: direct cache access context * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) * @enumerate_channels: hw version specific channel enumeration + * @reset_hw: hw version specific channel (re)initialization * @cleanup_tasklet: select between the v2 and v3 cleanup routines * @timer_fn: select between the v2 and v3 timer watchdog routines * @self_test: hardware version specific self test for each supported op type @@ -78,6 +79,7 @@ struct ioatdma_device { struct dca_provider *dca; void (*intr_quirk)(struct ioatdma_device *device); int (*enumerate_channels)(struct ioatdma_device *device); + int (*reset_hw)(struct ioat_chan_common *chan); void (*cleanup_tasklet)(unsigned long data); void (*timer_fn)(unsigned long data); int (*self_test)(struct ioatdma_device *device); @@ -264,6 +266,22 @@ static inline void ioat_suspend(struct ioat_chan_common *chan) writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); } +static inline void ioat_reset(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + + writeb(IOAT_CHANCMD_RESET, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); +} + +static inline bool ioat_reset_pending(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + u8 cmd; + + cmd = readb(chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); + return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET; +} + static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr) { struct ioat_chan_common *chan = &ioat->base; diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 8f1f7f0..5f7a500 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -239,20 +239,50 @@ void __ioat2_restart_chan(struct ioat2_dma_chan *ioat) __ioat2_start_null_desc(ioat); } -static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) +int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo) { - struct ioat_chan_common *chan = &ioat->base; - unsigned long phys_complete; + unsigned long end = jiffies + tmo; + int err = 0; u32 status; status = ioat_chansts(chan); if (is_ioat_active(status) || is_ioat_idle(status)) ioat_suspend(chan); while (is_ioat_active(status) || is_ioat_idle(status)) { + if (end && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } status = ioat_chansts(chan); cpu_relax(); } + return err; +} + +int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo) +{ + unsigned long end = jiffies + tmo; + int err = 0; + + ioat_reset(chan); + while (ioat_reset_pending(chan)) { + if (end && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } + cpu_relax(); + } + + return err; +} + +static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + + ioat2_quiesce(chan, 0); if (ioat_cleanup_preamble(chan, &phys_complete)) __cleanup(ioat, phys_complete); @@ -318,6 +348,19 @@ void ioat2_timer_event(unsigned long data) spin_unlock_bh(&chan->cleanup_lock); } +static int ioat2_reset_hw(struct ioat_chan_common *chan) +{ + /* throw away whatever the channel was doing and get it initialized */ + u32 chanerr; + + ioat2_quiesce(chan, msecs_to_jiffies(100)); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + + return ioat2_reset_sync(chan, msecs_to_jiffies(200)); +} + /** * ioat2_enumerate_channels - find and initialize the device's channels * @device: the device to be enumerated @@ -360,6 +403,10 @@ int ioat2_enumerate_channels(struct ioatdma_device *device) (unsigned long) ioat); ioat->xfercap_log = xfercap_log; spin_lock_init(&ioat->ring_lock); + if (device->reset_hw(&ioat->base)) { + i = 0; + break; + } } dma->chancnt = i; return i; @@ -467,7 +514,6 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) struct ioat2_dma_chan *ioat = to_ioat2_chan(c); struct ioat_chan_common *chan = &ioat->base; struct ioat_ring_ent **ring; - u32 chanerr; int order; /* have we already been set up? */ @@ -477,12 +523,6 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) /* Setup register to interrupt and write completion status on error */ writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - if (chanerr) { - dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - } - /* allocate a completion writeback area */ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ chan->completion = pci_pool_alloc(chan->device->completion_pool, @@ -746,13 +786,7 @@ void ioat2_free_chan_resources(struct dma_chan *c) tasklet_disable(&chan->cleanup_task); del_timer_sync(&chan->timer); device->cleanup_tasklet((unsigned long) ioat); - - /* Delay 100ms after reset to allow internal DMA logic to quiesce - * before removing DMA descriptor resources. - */ - writeb(IOAT_CHANCMD_RESET, - chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); - mdelay(100); + device->reset_hw(chan); spin_lock_bh(&ioat->ring_lock); descs = ioat2_ring_space(ioat); @@ -839,6 +873,7 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) int err; device->enumerate_channels = ioat2_enumerate_channels; + device->reset_hw = ioat2_reset_hw; device->cleanup_tasklet = ioat2_cleanup_tasklet; device->timer_fn = ioat2_timer_event; device->self_test = ioat_dma_self_test; diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index 1d849ef..3afad8d 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -185,6 +185,8 @@ bool reshape_ring(struct ioat2_dma_chan *ioat, int order); void __ioat2_issue_pending(struct ioat2_dma_chan *ioat); void ioat2_cleanup_tasklet(unsigned long data); void ioat2_timer_event(unsigned long data); +int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo); +int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo); extern struct kobj_type ioat2_ktype; extern struct kmem_cache *ioat2_cache; #endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 42f6f10..9908c9e 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -650,9 +650,11 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, num_descs = ioat2_xferlen_to_descs(ioat, len); /* we need 2x the number of descriptors to cover greater than 3 - * sources + * sources (we need 1 extra source in the q-only continuation + * case and 3 extra sources in the p+q continuation case. */ - if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) { + if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || + (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { with_ext = 1; num_descs *= 2; } else @@ -1128,6 +1130,45 @@ static int __devinit ioat3_dma_self_test(struct ioatdma_device *device) return 0; } +static int ioat3_reset_hw(struct ioat_chan_common *chan) +{ + /* throw away whatever the channel was doing and get it + * initialized, with ioat3 specific workarounds + */ + struct ioatdma_device *device = chan->device; + struct pci_dev *pdev = device->pdev; + u32 chanerr; + u16 dev_id; + int err; + + ioat2_quiesce(chan, msecs_to_jiffies(100)); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + + /* -= IOAT ver.3 workarounds =- */ + /* Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3, and clear any + * pending errors + */ + pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); + err = pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); + if (err) { + dev_err(&pdev->dev, "channel error register unreachable\n"); + return err; + } + pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr); + + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) + pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); + + return ioat2_reset_sync(chan, msecs_to_jiffies(200)); +} + int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; @@ -1137,10 +1178,10 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) struct ioat_chan_common *chan; bool is_raid_device = false; int err; - u16 dev_id; u32 cap; device->enumerate_channels = ioat2_enumerate_channels; + device->reset_hw = ioat3_reset_hw; device->self_test = ioat3_dma_self_test; dma = &device->common; dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; @@ -1216,19 +1257,6 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->device_prep_dma_xor_val = NULL; #endif - /* -= IOAT ver.3 workarounds =- */ - /* Write CHANERRMSK_INT with 3E07h to mask out the errors - * that can cause stability issues for IOAT ver.3 - */ - pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); - - /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) - pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); - err = ioat_probe(device); if (err) return err; diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index f015ec1..e8ae63b 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -27,6 +27,7 @@ #define IOAT_PCI_DEVICE_ID_OFFSET 0x02 #define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148 +#define IOAT_PCI_CHANERR_INT_OFFSET 0x180 #define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184 /* MMIO Device Registers */ diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index 2e4a54c..d10cc89 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -23,16 +23,19 @@ #include #include #include -#include #include #include #include #include "shdma.h" /* DMA descriptor control */ -#define DESC_LAST (-1) -#define DESC_COMP (1) -#define DESC_NCOMP (0) +enum sh_dmae_desc_status { + DESC_IDLE, + DESC_PREPARED, + DESC_SUBMITTED, + DESC_COMPLETED, /* completed, have to call callback */ + DESC_WAITING, /* callback called, waiting for ack / re-submit */ +}; #define NR_DESCS_PER_CHANNEL 32 /* @@ -45,6 +48,8 @@ */ #define RS_DEFAULT (RS_DUAL) +static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all); + #define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id]) static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg) { @@ -106,11 +111,11 @@ static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan) return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT]; } -static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw) +static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs *hw) { - sh_dmae_writel(sh_chan, hw.sar, SAR); - sh_dmae_writel(sh_chan, hw.dar, DAR); - sh_dmae_writel(sh_chan, hw.tcr >> calc_xmit_shift(sh_chan), TCR); + sh_dmae_writel(sh_chan, hw->sar, SAR); + sh_dmae_writel(sh_chan, hw->dar, DAR); + sh_dmae_writel(sh_chan, hw->tcr >> calc_xmit_shift(sh_chan), TCR); } static void dmae_start(struct sh_dmae_chan *sh_chan) @@ -184,8 +189,9 @@ static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val) static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) { - struct sh_desc *desc = tx_to_sh_desc(tx); + struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c; struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan); + dma_async_tx_callback callback = tx->callback; dma_cookie_t cookie; spin_lock_bh(&sh_chan->desc_lock); @@ -195,45 +201,53 @@ static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) if (cookie < 0) cookie = 1; - /* If desc only in the case of 1 */ - if (desc->async_tx.cookie != -EBUSY) - desc->async_tx.cookie = cookie; - sh_chan->common.cookie = desc->async_tx.cookie; + sh_chan->common.cookie = cookie; + tx->cookie = cookie; + + /* Mark all chunks of this descriptor as submitted, move to the queue */ + list_for_each_entry_safe(chunk, c, desc->node.prev, node) { + /* + * All chunks are on the global ld_free, so, we have to find + * the end of the chain ourselves + */ + if (chunk != desc && (chunk->mark == DESC_IDLE || + chunk->async_tx.cookie > 0 || + chunk->async_tx.cookie == -EBUSY || + &chunk->node == &sh_chan->ld_free)) + break; + chunk->mark = DESC_SUBMITTED; + /* Callback goes to the last chunk */ + chunk->async_tx.callback = NULL; + chunk->cookie = cookie; + list_move_tail(&chunk->node, &sh_chan->ld_queue); + last = chunk; + } + + last->async_tx.callback = callback; + last->async_tx.callback_param = tx->callback_param; - list_splice_init(&desc->tx_list, sh_chan->ld_queue.prev); + dev_dbg(sh_chan->dev, "submit #%d@%p on %d: %x[%d] -> %x\n", + tx->cookie, &last->async_tx, sh_chan->id, + desc->hw.sar, desc->hw.tcr, desc->hw.dar); spin_unlock_bh(&sh_chan->desc_lock); return cookie; } +/* Called with desc_lock held */ static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan) { - struct sh_desc *desc, *_desc, *ret = NULL; + struct sh_desc *desc; - spin_lock_bh(&sh_chan->desc_lock); - list_for_each_entry_safe(desc, _desc, &sh_chan->ld_free, node) { - if (async_tx_test_ack(&desc->async_tx)) { + list_for_each_entry(desc, &sh_chan->ld_free, node) + if (desc->mark != DESC_PREPARED) { + BUG_ON(desc->mark != DESC_IDLE); list_del(&desc->node); - ret = desc; - break; + return desc; } - } - spin_unlock_bh(&sh_chan->desc_lock); - - return ret; -} - -static void sh_dmae_put_desc(struct sh_dmae_chan *sh_chan, struct sh_desc *desc) -{ - if (desc) { - spin_lock_bh(&sh_chan->desc_lock); - - list_splice_init(&desc->tx_list, &sh_chan->ld_free); - list_add(&desc->node, &sh_chan->ld_free); - spin_unlock_bh(&sh_chan->desc_lock); - } + return NULL; } static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) @@ -252,11 +266,10 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) dma_async_tx_descriptor_init(&desc->async_tx, &sh_chan->common); desc->async_tx.tx_submit = sh_dmae_tx_submit; - desc->async_tx.flags = DMA_CTRL_ACK; - INIT_LIST_HEAD(&desc->tx_list); - sh_dmae_put_desc(sh_chan, desc); + desc->mark = DESC_IDLE; spin_lock_bh(&sh_chan->desc_lock); + list_add(&desc->node, &sh_chan->ld_free); sh_chan->descs_allocated++; } spin_unlock_bh(&sh_chan->desc_lock); @@ -273,7 +286,10 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan) struct sh_desc *desc, *_desc; LIST_HEAD(list); - BUG_ON(!list_empty(&sh_chan->ld_queue)); + /* Prepared and not submitted descriptors can still be on the queue */ + if (!list_empty(&sh_chan->ld_queue)) + sh_dmae_chan_ld_cleanup(sh_chan, true); + spin_lock_bh(&sh_chan->desc_lock); list_splice_init(&sh_chan->ld_free, &list); @@ -292,6 +308,8 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy( struct sh_dmae_chan *sh_chan; struct sh_desc *first = NULL, *prev = NULL, *new; size_t copy_size; + LIST_HEAD(tx_list); + int chunks = (len + SH_DMA_TCR_MAX) / (SH_DMA_TCR_MAX + 1); if (!chan) return NULL; @@ -301,108 +319,189 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy( sh_chan = to_sh_chan(chan); + /* Have to lock the whole loop to protect against concurrent release */ + spin_lock_bh(&sh_chan->desc_lock); + + /* + * Chaining: + * first descriptor is what user is dealing with in all API calls, its + * cookie is at first set to -EBUSY, at tx-submit to a positive + * number + * if more than one chunk is needed further chunks have cookie = -EINVAL + * the last chunk, if not equal to the first, has cookie = -ENOSPC + * all chunks are linked onto the tx_list head with their .node heads + * only during this function, then they are immediately spliced + * back onto the free list in form of a chain + */ do { - /* Allocate the link descriptor from DMA pool */ + /* Allocate the link descriptor from the free list */ new = sh_dmae_get_desc(sh_chan); if (!new) { dev_err(sh_chan->dev, "No free memory for link descriptor\n"); - goto err_get_desc; + list_for_each_entry(new, &tx_list, node) + new->mark = DESC_IDLE; + list_splice(&tx_list, &sh_chan->ld_free); + spin_unlock_bh(&sh_chan->desc_lock); + return NULL; } - copy_size = min(len, (size_t)SH_DMA_TCR_MAX); + copy_size = min(len, (size_t)SH_DMA_TCR_MAX + 1); new->hw.sar = dma_src; new->hw.dar = dma_dest; new->hw.tcr = copy_size; - if (!first) + if (!first) { + /* First desc */ + new->async_tx.cookie = -EBUSY; first = new; + } else { + /* Other desc - invisible to the user */ + new->async_tx.cookie = -EINVAL; + } - new->mark = DESC_NCOMP; - async_tx_ack(&new->async_tx); + dev_dbg(sh_chan->dev, + "chaining %u of %u with %p, dst %x, cookie %d\n", + copy_size, len, &new->async_tx, dma_dest, + new->async_tx.cookie); + + new->mark = DESC_PREPARED; + new->async_tx.flags = flags; + new->chunks = chunks--; prev = new; len -= copy_size; dma_src += copy_size; dma_dest += copy_size; /* Insert the link descriptor to the LD ring */ - list_add_tail(&new->node, &first->tx_list); + list_add_tail(&new->node, &tx_list); } while (len); - new->async_tx.flags = flags; /* client is in control of this ack */ - new->async_tx.cookie = -EBUSY; /* Last desc */ + if (new != first) + new->async_tx.cookie = -ENOSPC; - return &first->async_tx; + /* Put them back on the free list, so, they don't get lost */ + list_splice_tail(&tx_list, &sh_chan->ld_free); -err_get_desc: - sh_dmae_put_desc(sh_chan, first); - return NULL; + spin_unlock_bh(&sh_chan->desc_lock); + return &first->async_tx; } -/* - * sh_chan_ld_cleanup - Clean up link descriptors - * - * This function clean up the ld_queue of DMA channel. - */ -static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan) +static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all) { struct sh_desc *desc, *_desc; + /* Is the "exposed" head of a chain acked? */ + bool head_acked = false; + dma_cookie_t cookie = 0; + dma_async_tx_callback callback = NULL; + void *param = NULL; spin_lock_bh(&sh_chan->desc_lock); list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) { - dma_async_tx_callback callback; - void *callback_param; - - /* non send data */ - if (desc->mark == DESC_NCOMP) + struct dma_async_tx_descriptor *tx = &desc->async_tx; + + BUG_ON(tx->cookie > 0 && tx->cookie != desc->cookie); + BUG_ON(desc->mark != DESC_SUBMITTED && + desc->mark != DESC_COMPLETED && + desc->mark != DESC_WAITING); + + /* + * queue is ordered, and we use this loop to (1) clean up all + * completed descriptors, and to (2) update descriptor flags of + * any chunks in a (partially) completed chain + */ + if (!all && desc->mark == DESC_SUBMITTED && + desc->cookie != cookie) break; - /* send data sesc */ - callback = desc->async_tx.callback; - callback_param = desc->async_tx.callback_param; + if (tx->cookie > 0) + cookie = tx->cookie; - /* Remove from ld_queue list */ - list_splice_init(&desc->tx_list, &sh_chan->ld_free); + if (desc->mark == DESC_COMPLETED && desc->chunks == 1) { + BUG_ON(sh_chan->completed_cookie != desc->cookie - 1); + sh_chan->completed_cookie = desc->cookie; + } - dev_dbg(sh_chan->dev, "link descriptor %p will be recycle.\n", - desc); + /* Call callback on the last chunk */ + if (desc->mark == DESC_COMPLETED && tx->callback) { + desc->mark = DESC_WAITING; + callback = tx->callback; + param = tx->callback_param; + dev_dbg(sh_chan->dev, "descriptor #%d@%p on %d callback\n", + tx->cookie, tx, sh_chan->id); + BUG_ON(desc->chunks != 1); + break; + } - list_move(&desc->node, &sh_chan->ld_free); - /* Run the link descriptor callback function */ - if (callback) { - spin_unlock_bh(&sh_chan->desc_lock); - dev_dbg(sh_chan->dev, "link descriptor %p callback\n", - desc); - callback(callback_param); - spin_lock_bh(&sh_chan->desc_lock); + if (tx->cookie > 0 || tx->cookie == -EBUSY) { + if (desc->mark == DESC_COMPLETED) { + BUG_ON(tx->cookie < 0); + desc->mark = DESC_WAITING; + } + head_acked = async_tx_test_ack(tx); + } else { + switch (desc->mark) { + case DESC_COMPLETED: + desc->mark = DESC_WAITING; + /* Fall through */ + case DESC_WAITING: + if (head_acked) + async_tx_ack(&desc->async_tx); + } + } + + dev_dbg(sh_chan->dev, "descriptor %p #%d completed.\n", + tx, tx->cookie); + + if (((desc->mark == DESC_COMPLETED || + desc->mark == DESC_WAITING) && + async_tx_test_ack(&desc->async_tx)) || all) { + /* Remove from ld_queue list */ + desc->mark = DESC_IDLE; + list_move(&desc->node, &sh_chan->ld_free); } } spin_unlock_bh(&sh_chan->desc_lock); + + if (callback) + callback(param); + + return callback; +} + +/* + * sh_chan_ld_cleanup - Clean up link descriptors + * + * This function cleans up the ld_queue of DMA channel. + */ +static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all) +{ + while (__ld_cleanup(sh_chan, all)) + ; } static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) { - struct list_head *ld_node; - struct sh_dmae_regs hw; + struct sh_desc *sd; + spin_lock_bh(&sh_chan->desc_lock); /* DMA work check */ - if (dmae_is_busy(sh_chan)) + if (dmae_is_busy(sh_chan)) { + spin_unlock_bh(&sh_chan->desc_lock); return; + } /* Find the first un-transfer desciptor */ - for (ld_node = sh_chan->ld_queue.next; - (ld_node != &sh_chan->ld_queue) - && (to_sh_desc(ld_node)->mark == DESC_COMP); - ld_node = ld_node->next) - cpu_relax(); - - if (ld_node != &sh_chan->ld_queue) { - /* Get the ld start address from ld_queue */ - hw = to_sh_desc(ld_node)->hw; - dmae_set_reg(sh_chan, hw); - dmae_start(sh_chan); - } + list_for_each_entry(sd, &sh_chan->ld_queue, node) + if (sd->mark == DESC_SUBMITTED) { + /* Get the ld start address from ld_queue */ + dmae_set_reg(sh_chan, &sd->hw); + dmae_start(sh_chan); + break; + } + + spin_unlock_bh(&sh_chan->desc_lock); } static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan) @@ -420,12 +519,11 @@ static enum dma_status sh_dmae_is_complete(struct dma_chan *chan, dma_cookie_t last_used; dma_cookie_t last_complete; - sh_dmae_chan_ld_cleanup(sh_chan); + sh_dmae_chan_ld_cleanup(sh_chan, false); last_used = chan->cookie; last_complete = sh_chan->completed_cookie; - if (last_complete == -EBUSY) - last_complete = last_used; + BUG_ON(last_complete < 0); if (done) *done = last_complete; @@ -480,11 +578,13 @@ static irqreturn_t sh_dmae_err(int irq, void *data) err = sh_dmae_rst(0); if (err) return err; +#ifdef SH_DMAC_BASE1 if (shdev->pdata.mode & SHDMA_DMAOR1) { err = sh_dmae_rst(1); if (err) return err; } +#endif disable_irq(irq); return IRQ_HANDLED; } @@ -494,35 +594,25 @@ static irqreturn_t sh_dmae_err(int irq, void *data) static void dmae_do_tasklet(unsigned long data) { struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data; - struct sh_desc *desc, *_desc, *cur_desc = NULL; + struct sh_desc *desc; u32 sar_buf = sh_dmae_readl(sh_chan, SAR); - list_for_each_entry_safe(desc, _desc, - &sh_chan->ld_queue, node) { - if ((desc->hw.sar + desc->hw.tcr) == sar_buf) { - cur_desc = desc; + spin_lock(&sh_chan->desc_lock); + list_for_each_entry(desc, &sh_chan->ld_queue, node) { + if ((desc->hw.sar + desc->hw.tcr) == sar_buf && + desc->mark == DESC_SUBMITTED) { + dev_dbg(sh_chan->dev, "done #%d@%p dst %u\n", + desc->async_tx.cookie, &desc->async_tx, + desc->hw.dar); + desc->mark = DESC_COMPLETED; break; } } + spin_unlock(&sh_chan->desc_lock); - if (cur_desc) { - switch (cur_desc->async_tx.cookie) { - case 0: /* other desc data */ - break; - case -EBUSY: /* last desc */ - sh_chan->completed_cookie = - cur_desc->async_tx.cookie; - break; - default: /* first desc ( 0 < )*/ - sh_chan->completed_cookie = - cur_desc->async_tx.cookie - 1; - break; - } - cur_desc->mark = DESC_COMP; - } /* Next desc */ sh_chan_xfer_ld_queue(sh_chan); - sh_dmae_chan_ld_cleanup(sh_chan); + sh_dmae_chan_ld_cleanup(sh_chan, false); } static unsigned int get_dmae_irq(unsigned int id) diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h index 60b81e5..108f1cf 100644 --- a/drivers/dma/shdma.h +++ b/drivers/dma/shdma.h @@ -13,9 +13,9 @@ #ifndef __DMA_SHDMA_H #define __DMA_SHDMA_H -#include -#include #include +#include +#include #define SH_DMA_TCR_MAX 0x00FFFFFF /* 16MB */ @@ -26,13 +26,16 @@ struct sh_dmae_regs { }; struct sh_desc { - struct list_head tx_list; struct sh_dmae_regs hw; struct list_head node; struct dma_async_tx_descriptor async_tx; + dma_cookie_t cookie; + int chunks; int mark; }; +struct device; + struct sh_dmae_chan { dma_cookie_t completed_cookie; /* The maximum cookie completed */ spinlock_t desc_lock; /* Descriptor operation lock */ diff --git a/drivers/firewire/Kconfig b/drivers/firewire/Kconfig index 13efcd3..a9371b3 100644 --- a/drivers/firewire/Kconfig +++ b/drivers/firewire/Kconfig @@ -1,5 +1,10 @@ +menu "IEEE 1394 (FireWire) support" + depends on PCI || BROKEN + # firewire-core does not depend on PCI but is + # not useful without PCI controller driver + comment "You can enable one or both FireWire driver stacks." -comment "See the help texts for more information." +comment "The newer stack is recommended." config FIREWIRE tristate "FireWire driver stack" @@ -15,16 +20,6 @@ config FIREWIRE To compile this driver as a module, say M here: the module will be called firewire-core. - This module functionally replaces ieee1394, raw1394, and video1394. - To access it from application programs, you generally need at least - libraw1394 v2. IIDC/DCAM applications need libdc1394 v2. - No libraries are required to access storage devices through the - firewire-sbp2 driver. - - NOTE: - FireWire audio devices currently require the old drivers (ieee1394, - ohci1394, raw1394). - config FIREWIRE_OHCI tristate "OHCI-1394 controllers" depends on PCI && FIREWIRE @@ -34,22 +29,7 @@ config FIREWIRE_OHCI is the only chipset in use, so say Y here. To compile this driver as a module, say M here: The module will be - called firewire-ohci. It replaces ohci1394 of the classic IEEE 1394 - stack. - - NOTE: - If you want to install firewire-ohci and ohci1394 together, you - should configure them only as modules and blacklist the driver(s) - which you don't want to have auto-loaded. Add either - - blacklist firewire-ohci - or - blacklist ohci1394 - blacklist video1394 - blacklist dv1394 - - to /etc/modprobe.conf or /etc/modprobe.d/* and update modprobe.conf - depending on your distribution. + called firewire-ohci. config FIREWIRE_OHCI_DEBUG bool @@ -66,8 +46,7 @@ config FIREWIRE_SBP2 like scanners. To compile this driver as a module, say M here: The module will be - called firewire-sbp2. It replaces sbp2 of the classic IEEE 1394 - stack. + called firewire-sbp2. You should also enable support for disks, CD-ROMs, etc. in the SCSI configuration section. @@ -83,5 +62,8 @@ config FIREWIRE_NET NOTE, this driver is not stable yet! To compile this driver as a module, say M here: The module will be - called firewire-net. It replaces eth1394 of the classic IEEE 1394 - stack. + called firewire-net. + +source "drivers/ieee1394/Kconfig" + +endmenu diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index 231e6ee..e6d6384 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -601,8 +601,9 @@ static void release_request(struct client *client, struct inbound_transaction_resource *r = container_of(resource, struct inbound_transaction_resource, resource); - fw_send_response(client->device->card, r->request, - RCODE_CONFLICT_ERROR); + if (r->request) + fw_send_response(client->device->card, r->request, + RCODE_CONFLICT_ERROR); kfree(r); } @@ -645,7 +646,8 @@ static void handle_request(struct fw_card *card, struct fw_request *request, failed: kfree(r); kfree(e); - fw_send_response(card, request, RCODE_CONFLICT_ERROR); + if (request) + fw_send_response(card, request, RCODE_CONFLICT_ERROR); } static void release_address_handler(struct client *client, @@ -715,15 +717,18 @@ static int ioctl_send_response(struct client *client, void *buffer) r = container_of(resource, struct inbound_transaction_resource, resource); - if (request->length < r->length) - r->length = request->length; - - if (copy_from_user(r->data, u64_to_uptr(request->data), r->length)) { - ret = -EFAULT; - goto out; + if (r->request) { + if (request->length < r->length) + r->length = request->length; + if (copy_from_user(r->data, u64_to_uptr(request->data), + r->length)) { + ret = -EFAULT; + kfree(r->request); + goto out; + } + fw_send_response(client->device->card, r->request, + request->rcode); } - - fw_send_response(client->device->card, r->request, request->rcode); out: kfree(r); diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 842739d..495849e 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -432,14 +432,20 @@ static struct fw_address_handler *lookup_overlapping_address_handler( return NULL; } +static bool is_enclosing_handler(struct fw_address_handler *handler, + unsigned long long offset, size_t length) +{ + return handler->offset <= offset && + offset + length <= handler->offset + handler->length; +} + static struct fw_address_handler *lookup_enclosing_address_handler( struct list_head *list, unsigned long long offset, size_t length) { struct fw_address_handler *handler; list_for_each_entry(handler, list, link) { - if (handler->offset <= offset && - offset + length <= handler->offset + handler->length) + if (is_enclosing_handler(handler, offset, length)) return handler; } @@ -465,6 +471,12 @@ const struct fw_address_region fw_unit_space_region = { .start = 0xfffff0000900ULL, .end = 0x1000000000000ULL, }; #endif /* 0 */ +static bool is_in_fcp_region(u64 offset, size_t length) +{ + return offset >= (CSR_REGISTER_BASE | CSR_FCP_COMMAND) && + offset + length <= (CSR_REGISTER_BASE | CSR_FCP_END); +} + /** * fw_core_add_address_handler - register for incoming requests * @handler: callback @@ -477,8 +489,11 @@ const struct fw_address_region fw_unit_space_region = * give the details of the particular request. * * Return value: 0 on success, non-zero otherwise. + * * The start offset of the handler's address region is determined by * fw_core_add_address_handler() and is returned in handler->offset. + * + * Address allocations are exclusive, except for the FCP registers. */ int fw_core_add_address_handler(struct fw_address_handler *handler, const struct fw_address_region *region) @@ -498,10 +513,12 @@ int fw_core_add_address_handler(struct fw_address_handler *handler, handler->offset = region->start; while (handler->offset + handler->length <= region->end) { - other = - lookup_overlapping_address_handler(&address_handler_list, - handler->offset, - handler->length); + if (is_in_fcp_region(handler->offset, handler->length)) + other = NULL; + else + other = lookup_overlapping_address_handler + (&address_handler_list, + handler->offset, handler->length); if (other != NULL) { handler->offset += other->length; } else { @@ -668,6 +685,9 @@ static struct fw_request *allocate_request(struct fw_packet *p) void fw_send_response(struct fw_card *card, struct fw_request *request, int rcode) { + if (WARN_ONCE(!request, "invalid for FCP address handlers")) + return; + /* unified transaction or broadcast transaction: don't respond */ if (request->ack != ACK_PENDING || HEADER_DESTINATION_IS_BROADCAST(request->request_header[0])) { @@ -686,26 +706,15 @@ void fw_send_response(struct fw_card *card, } EXPORT_SYMBOL(fw_send_response); -void fw_core_handle_request(struct fw_card *card, struct fw_packet *p) +static void handle_exclusive_region_request(struct fw_card *card, + struct fw_packet *p, + struct fw_request *request, + unsigned long long offset) { struct fw_address_handler *handler; - struct fw_request *request; - unsigned long long offset; unsigned long flags; int tcode, destination, source; - if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE) - return; - - request = allocate_request(p); - if (request == NULL) { - /* FIXME: send statically allocated busy packet. */ - return; - } - - offset = - ((unsigned long long) - HEADER_GET_OFFSET_HIGH(p->header[1]) << 32) | p->header[2]; tcode = HEADER_GET_TCODE(p->header[0]); destination = HEADER_GET_DESTINATION(p->header[0]); source = HEADER_GET_SOURCE(p->header[1]); @@ -732,6 +741,73 @@ void fw_core_handle_request(struct fw_card *card, struct fw_packet *p) request->data, request->length, handler->callback_data); } + +static void handle_fcp_region_request(struct fw_card *card, + struct fw_packet *p, + struct fw_request *request, + unsigned long long offset) +{ + struct fw_address_handler *handler; + unsigned long flags; + int tcode, destination, source; + + if ((offset != (CSR_REGISTER_BASE | CSR_FCP_COMMAND) && + offset != (CSR_REGISTER_BASE | CSR_FCP_RESPONSE)) || + request->length > 0x200) { + fw_send_response(card, request, RCODE_ADDRESS_ERROR); + + return; + } + + tcode = HEADER_GET_TCODE(p->header[0]); + destination = HEADER_GET_DESTINATION(p->header[0]); + source = HEADER_GET_SOURCE(p->header[1]); + + if (tcode != TCODE_WRITE_QUADLET_REQUEST && + tcode != TCODE_WRITE_BLOCK_REQUEST) { + fw_send_response(card, request, RCODE_TYPE_ERROR); + + return; + } + + spin_lock_irqsave(&address_handler_lock, flags); + list_for_each_entry(handler, &address_handler_list, link) { + if (is_enclosing_handler(handler, offset, request->length)) + handler->address_callback(card, NULL, tcode, + destination, source, + p->generation, p->speed, + offset, request->data, + request->length, + handler->callback_data); + } + spin_unlock_irqrestore(&address_handler_lock, flags); + + fw_send_response(card, request, RCODE_COMPLETE); +} + +void fw_core_handle_request(struct fw_card *card, struct fw_packet *p) +{ + struct fw_request *request; + unsigned long long offset; + + if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE) + return; + + request = allocate_request(p); + if (request == NULL) { + /* FIXME: send statically allocated busy packet. */ + return; + } + + offset = ((u64)HEADER_GET_OFFSET_HIGH(p->header[1]) << 32) | + p->header[2]; + + if (!is_in_fcp_region(offset, request->length)) + handle_exclusive_region_request(card, p, request, offset); + else + handle_fcp_region_request(card, p, request, offset); + +} EXPORT_SYMBOL(fw_core_handle_request); void fw_core_handle_response(struct fw_card *card, struct fw_packet *p) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 96768e1..a61571c 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -2226,7 +2226,6 @@ static int ohci_queue_iso_receive_dualbuffer(struct fw_iso_context *base, if (rest == 0) return -EINVAL; - /* FIXME: make packet-per-buffer/dual-buffer a context option */ while (rest > 0) { d = context_get_descriptors(&ctx->context, z + header_z, &d_bus); @@ -2470,7 +2469,10 @@ static int __devinit pci_probe(struct pci_dev *dev, } version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff; +#if 0 + /* FIXME: make it a context option or remove dual-buffer mode */ ohci->use_dualbuffer = version >= OHCI_VERSION_1_1; +#endif /* dual-buffer mode is broken if more than one IR context is active */ if (dev->vendor == PCI_VENDOR_ID_AGERE && diff --git a/drivers/ieee1394/Kconfig b/drivers/ieee1394/Kconfig index f102fcc..e02096c 100644 --- a/drivers/ieee1394/Kconfig +++ b/drivers/ieee1394/Kconfig @@ -1,8 +1,3 @@ -menu "IEEE 1394 (FireWire) support" - depends on PCI || BROKEN - -source "drivers/firewire/Kconfig" - config IEEE1394 tristate "Legacy alternative FireWire driver stack" depends on PCI || BROKEN @@ -16,8 +11,13 @@ config IEEE1394 is the core support only, you will also need to select a driver for your IEEE 1394 adapter. - To compile this driver as a module, say M here: the - module will be called ieee1394. + To compile this driver as a module, say M here: the module will be + called ieee1394. + + NOTE: + ieee1394 is superseded by the newer firewire-core driver. See + http://ieee1394.wiki.kernel.org/index.php/Juju_Migration for + further information on how to switch to the new FireWire drivers. config IEEE1394_OHCI1394 tristate "OHCI-1394 controllers" @@ -29,19 +29,23 @@ config IEEE1394_OHCI1394 use one of these chipsets. It should work with any OHCI-1394 compliant card, however. - To compile this driver as a module, say M here: the - module will be called ohci1394. + To compile this driver as a module, say M here: the module will be + called ohci1394. NOTE: + ohci1394 is superseded by the newer firewire-ohci driver. See + http://ieee1394.wiki.kernel.org/index.php/Juju_Migration for + further information on how to switch to the new FireWire drivers. + If you want to install firewire-ohci and ohci1394 together, you should configure them only as modules and blacklist the driver(s) which you don't want to have auto-loaded. Add either - blacklist firewire-ohci - or blacklist ohci1394 blacklist video1394 blacklist dv1394 + or + blacklist firewire-ohci to /etc/modprobe.conf or /etc/modprobe.d/* and update modprobe.conf depending on your distribution. @@ -58,8 +62,8 @@ config IEEE1394_PCILYNX Instruments PCILynx chip. Note: this driver is written for revision 2 of this chip and may not work with revision 0. - To compile this driver as a module, say M here: the - module will be called pcilynx. + To compile this driver as a module, say M here: the module will be + called pcilynx. Only some old and now very rare PCI and CardBus cards and PowerMacs G3 B&W contain the PCILynx controller. Therefore @@ -79,6 +83,14 @@ config IEEE1394_SBP2 You should also enable support for disks, CD-ROMs, etc. in the SCSI configuration section. + To compile this driver as a module, say M here: the module will be + called sbp2. + + NOTE: + sbp2 is superseded by the newer firewire-sbp2 driver. See + http://ieee1394.wiki.kernel.org/index.php/Juju_Migration for + further information on how to switch to the new FireWire drivers. + config IEEE1394_SBP2_PHYS_DMA bool "Enable replacement for physical DMA in SBP2" depends on IEEE1394_SBP2 && VIRT_TO_BUS && EXPERIMENTAL @@ -111,6 +123,11 @@ config IEEE1394_ETH1394 The module is called eth1394 although it does not emulate Ethernet. + NOTE: + eth1394 is superseded by the newer firewire-net driver. See + http://ieee1394.wiki.kernel.org/index.php/Juju_Migration for + further information on how to switch to the new FireWire drivers. + config IEEE1394_RAWIO tristate "raw1394 userspace interface" depends on IEEE1394 @@ -123,6 +140,11 @@ config IEEE1394_RAWIO To compile this driver as a module, say M here: the module will be called raw1394. + NOTE: + raw1394 is superseded by the newer firewire-core driver. See + http://ieee1394.wiki.kernel.org/index.php/Juju_Migration for + further information on how to switch to the new FireWire drivers. + config IEEE1394_VIDEO1394 tristate "video1394 userspace interface" depends on IEEE1394 && IEEE1394_OHCI1394 @@ -136,13 +158,18 @@ config IEEE1394_VIDEO1394 To compile this driver as a module, say M here: the module will be called video1394. + NOTE: + video1394 is superseded by the newer firewire-core driver. See + http://ieee1394.wiki.kernel.org/index.php/Juju_Migration for + further information on how to switch to the new FireWire drivers. + config IEEE1394_DV1394 tristate "dv1394 userspace interface (deprecated)" depends on IEEE1394 && IEEE1394_OHCI1394 help The dv1394 driver is unsupported and may be removed from Linux in a - future release. Its functionality is now provided by raw1394 together - with libraries such as libiec61883. + future release. Its functionality is now provided by either + raw1394 or firewire-core together with libraries such as libiec61883. config IEEE1394_VERBOSEDEBUG bool "Excessive debugging output" @@ -153,5 +180,3 @@ config IEEE1394_VERBOSEDEBUG will quickly result in large amounts of data sent to the system log. Say Y if you really need the debugging output. Everyone else says N. - -endmenu diff --git a/drivers/input/ff-memless.c b/drivers/input/ff-memless.c index b483b29..f967008 100644 --- a/drivers/input/ff-memless.c +++ b/drivers/input/ff-memless.c @@ -221,11 +221,27 @@ static int get_compatible_type(struct ff_device *ff, int effect_type) } /* + * Only left/right direction should be used (under/over 0x8000) for + * forward/reverse motor direction (to keep calculation fast & simple). + */ +static u16 ml_calculate_direction(u16 direction, u16 force, + u16 new_direction, u16 new_force) +{ + if (!force) + return new_direction; + if (!new_force) + return direction; + return (((u32)(direction >> 1) * force + + (new_direction >> 1) * new_force) / + (force + new_force)) << 1; +} + +/* * Combine two effects and apply gain. */ static void ml_combine_effects(struct ff_effect *effect, struct ml_effect_state *state, - unsigned int gain) + int gain) { struct ff_effect *new = state->effect; unsigned int strong, weak, i; @@ -252,8 +268,21 @@ static void ml_combine_effects(struct ff_effect *effect, break; case FF_RUMBLE: - strong = new->u.rumble.strong_magnitude * gain / 0xffff; - weak = new->u.rumble.weak_magnitude * gain / 0xffff; + strong = (u32)new->u.rumble.strong_magnitude * gain / 0xffff; + weak = (u32)new->u.rumble.weak_magnitude * gain / 0xffff; + + if (effect->u.rumble.strong_magnitude + strong) + effect->direction = ml_calculate_direction( + effect->direction, + effect->u.rumble.strong_magnitude, + new->direction, strong); + else if (effect->u.rumble.weak_magnitude + weak) + effect->direction = ml_calculate_direction( + effect->direction, + effect->u.rumble.weak_magnitude, + new->direction, weak); + else + effect->direction = 0; effect->u.rumble.strong_magnitude = min(strong + effect->u.rumble.strong_magnitude, 0xffffU); @@ -268,6 +297,13 @@ static void ml_combine_effects(struct ff_effect *effect, /* here we also scale it 0x7fff => 0xffff */ i = i * gain / 0x7fff; + if (effect->u.rumble.strong_magnitude + i) + effect->direction = ml_calculate_direction( + effect->direction, + effect->u.rumble.strong_magnitude, + new->direction, i); + else + effect->direction = 0; effect->u.rumble.strong_magnitude = min(i + effect->u.rumble.strong_magnitude, 0xffffU); effect->u.rumble.weak_magnitude = @@ -411,8 +447,6 @@ static int ml_ff_playback(struct input_dev *dev, int effect_id, int value) msecs_to_jiffies(state->effect->replay.length); state->adj_at = state->play_at; - ml_schedule_timer(ml); - } else { debug("initiated stop"); @@ -420,10 +454,10 @@ static int ml_ff_playback(struct input_dev *dev, int effect_id, int value) __set_bit(FF_EFFECT_ABORTING, &state->flags); else __clear_bit(FF_EFFECT_STARTED, &state->flags); - - ml_play_effects(ml); } + ml_play_effects(ml); + return 0; } diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c index f6c688c..b1edd77 100644 --- a/drivers/input/joystick/iforce/iforce-main.c +++ b/drivers/input/joystick/iforce/iforce-main.c @@ -210,7 +210,7 @@ static int iforce_open(struct input_dev *dev) return 0; } -static void iforce_release(struct input_dev *dev) +static void iforce_close(struct input_dev *dev) { struct iforce *iforce = input_get_drvdata(dev); int i; @@ -228,30 +228,17 @@ static void iforce_release(struct input_dev *dev) /* Disable force feedback playback */ iforce_send_packet(iforce, FF_CMD_ENABLE, "\001"); + /* Wait for the command to complete */ + wait_event_interruptible(iforce->wait, + !test_bit(IFORCE_XMIT_RUNNING, iforce->xmit_flags)); } switch (iforce->bus) { #ifdef CONFIG_JOYSTICK_IFORCE_USB - case IFORCE_USB: - usb_kill_urb(iforce->irq); - - /* The device was unplugged before the file - * was released */ - if (iforce->usbdev == NULL) { - iforce_delete_device(iforce); - kfree(iforce); - } - break; -#endif - } -} - -void iforce_delete_device(struct iforce *iforce) -{ - switch (iforce->bus) { -#ifdef CONFIG_JOYSTICK_IFORCE_USB case IFORCE_USB: - iforce_usb_delete(iforce); + usb_kill_urb(iforce->irq); + usb_kill_urb(iforce->out); + usb_kill_urb(iforce->ctrl); break; #endif #ifdef CONFIG_JOYSTICK_IFORCE_232 @@ -303,7 +290,7 @@ int iforce_init_device(struct iforce *iforce) input_dev->name = "Unknown I-Force device"; input_dev->open = iforce_open; - input_dev->close = iforce_release; + input_dev->close = iforce_close; /* * On-device memory allocation. diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c index 9f289d8..b41303d 100644 --- a/drivers/input/joystick/iforce/iforce-usb.c +++ b/drivers/input/joystick/iforce/iforce-usb.c @@ -109,6 +109,7 @@ static void iforce_usb_out(struct urb *urb) struct iforce *iforce = urb->context; if (urb->status) { + clear_bit(IFORCE_XMIT_RUNNING, iforce->xmit_flags); dbg("urb->status %d, exiting", urb->status); return; } @@ -186,33 +187,19 @@ fail: return err; } -/* Called by iforce_delete() */ -void iforce_usb_delete(struct iforce* iforce) -{ - usb_kill_urb(iforce->irq); - usb_kill_urb(iforce->out); - usb_kill_urb(iforce->ctrl); - - usb_free_urb(iforce->irq); - usb_free_urb(iforce->out); - usb_free_urb(iforce->ctrl); -} - static void iforce_usb_disconnect(struct usb_interface *intf) { struct iforce *iforce = usb_get_intfdata(intf); - int open = 0; /* FIXME! iforce->dev.handle->open; */ usb_set_intfdata(intf, NULL); - if (iforce) { - iforce->usbdev = NULL; - input_unregister_device(iforce->dev); - if (!open) { - iforce_delete_device(iforce); - kfree(iforce); - } - } + input_unregister_device(iforce->dev); + + usb_free_urb(iforce->irq); + usb_free_urb(iforce->out); + usb_free_urb(iforce->ctrl); + + kfree(iforce); } static struct usb_device_id iforce_usb_ids [] = { diff --git a/drivers/input/joystick/iforce/iforce.h b/drivers/input/joystick/iforce/iforce.h index f2d91f4..9f494b7 100644 --- a/drivers/input/joystick/iforce/iforce.h +++ b/drivers/input/joystick/iforce/iforce.h @@ -150,11 +150,9 @@ void iforce_serial_xmit(struct iforce *iforce); /* iforce-usb.c */ void iforce_usb_xmit(struct iforce *iforce); -void iforce_usb_delete(struct iforce *iforce); /* iforce-main.c */ int iforce_init_device(struct iforce *iforce); -void iforce_delete_device(struct iforce *iforce); /* iforce-packets.c */ int iforce_control_playback(struct iforce*, u16 id, unsigned int); diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index a357357..1f5e2ce 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -134,7 +134,8 @@ static const unsigned short atkbd_unxlate_table[128] = { #define ATKBD_CMD_GETID 0x02f2 #define ATKBD_CMD_SETREP 0x10f3 #define ATKBD_CMD_ENABLE 0x00f4 -#define ATKBD_CMD_RESET_DIS 0x00f5 +#define ATKBD_CMD_RESET_DIS 0x00f5 /* Reset to defaults and disable */ +#define ATKBD_CMD_RESET_DEF 0x00f6 /* Reset to defaults */ #define ATKBD_CMD_SETALL_MBR 0x00fa #define ATKBD_CMD_RESET_BAT 0x02ff #define ATKBD_CMD_RESEND 0x00fe @@ -836,7 +837,7 @@ static void atkbd_cleanup(struct serio *serio) struct atkbd *atkbd = serio_get_drvdata(serio); atkbd_disable(atkbd); - ps2_command(&atkbd->ps2dev, NULL, ATKBD_CMD_RESET_BAT); + ps2_command(&atkbd->ps2dev, NULL, ATKBD_CMD_RESET_DEF); } diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c index 34f4a29..d3c8b61 100644 --- a/drivers/input/keyboard/matrix_keypad.c +++ b/drivers/input/keyboard/matrix_keypad.c @@ -29,11 +29,13 @@ struct matrix_keypad { unsigned short *keycodes; unsigned int row_shift; + DECLARE_BITMAP(disabled_gpios, MATRIX_MAX_ROWS); + uint32_t last_key_state[MATRIX_MAX_COLS]; struct delayed_work work; + spinlock_t lock; bool scan_pending; bool stopped; - spinlock_t lock; }; /* @@ -222,9 +224,16 @@ static int matrix_keypad_suspend(struct device *dev) matrix_keypad_stop(keypad->input_dev); - if (device_may_wakeup(&pdev->dev)) - for (i = 0; i < pdata->num_row_gpios; i++) - enable_irq_wake(gpio_to_irq(pdata->row_gpios[i])); + if (device_may_wakeup(&pdev->dev)) { + for (i = 0; i < pdata->num_row_gpios; i++) { + if (!test_bit(i, keypad->disabled_gpios)) { + unsigned int gpio = pdata->row_gpios[i]; + + if (enable_irq_wake(gpio_to_irq(gpio)) == 0) + __set_bit(i, keypad->disabled_gpios); + } + } + } return 0; } @@ -236,9 +245,15 @@ static int matrix_keypad_resume(struct device *dev) const struct matrix_keypad_platform_data *pdata = keypad->pdata; int i; - if (device_may_wakeup(&pdev->dev)) - for (i = 0; i < pdata->num_row_gpios; i++) - disable_irq_wake(gpio_to_irq(pdata->row_gpios[i])); + if (device_may_wakeup(&pdev->dev)) { + for (i = 0; i < pdata->num_row_gpios; i++) { + if (test_and_clear_bit(i, keypad->disabled_gpios)) { + unsigned int gpio = pdata->row_gpios[i]; + + disable_irq_wake(gpio_to_irq(gpio)); + } + } + } matrix_keypad_start(keypad->input_dev); diff --git a/drivers/input/keyboard/twl4030_keypad.c b/drivers/input/keyboard/twl4030_keypad.c index eeaa7ac..21d6184 100644 --- a/drivers/input/keyboard/twl4030_keypad.c +++ b/drivers/input/keyboard/twl4030_keypad.c @@ -253,14 +253,6 @@ static irqreturn_t do_kp_irq(int irq, void *_kp) u8 reg; int ret; -#ifdef CONFIG_LOCKDEP - /* WORKAROUND for lockdep forcing IRQF_DISABLED on us, which - * we don't want and can't tolerate. Although it might be - * friendlier not to borrow this thread context... - */ - local_irq_enable(); -#endif - /* Read & Clear TWL4030 pending interrupt */ ret = twl4030_kpread(kp, ®, KEYP_ISR1, 1); @@ -403,7 +395,8 @@ static int __devinit twl4030_kp_probe(struct platform_device *pdev) * * NOTE: we assume this host is wired to TWL4040 INT1, not INT2 ... */ - error = request_irq(kp->irq, do_kp_irq, 0, pdev->name, kp); + error = request_threaded_irq(kp->irq, NULL, do_kp_irq, + 0, pdev->name, kp); if (error) { dev_info(kp->dbg_dev, "request_irq failed for irq no=%d\n", kp->irq); diff --git a/drivers/input/misc/twl4030-pwrbutton.c b/drivers/input/misc/twl4030-pwrbutton.c index bdde5c8..e9069b8 100644 --- a/drivers/input/misc/twl4030-pwrbutton.c +++ b/drivers/input/misc/twl4030-pwrbutton.c @@ -39,18 +39,8 @@ static irqreturn_t powerbutton_irq(int irq, void *_pwr) int err; u8 value; -#ifdef CONFIG_LOCKDEP - /* WORKAROUND for lockdep forcing IRQF_DISABLED on us, which - * we don't want and can't tolerate since this is a threaded - * IRQ and can sleep due to the i2c reads it has to issue. - * Although it might be friendlier not to borrow this thread - * context... - */ - local_irq_enable(); -#endif - err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &value, - STS_HW_CONDITIONS); + STS_HW_CONDITIONS); if (!err) { input_report_key(pwr, KEY_POWER, value & PWR_PWRON_IRQ); input_sync(pwr); @@ -80,7 +70,7 @@ static int __devinit twl4030_pwrbutton_probe(struct platform_device *pdev) pwr->phys = "twl4030_pwrbutton/input0"; pwr->dev.parent = &pdev->dev; - err = request_irq(irq, powerbutton_irq, + err = request_threaded_irq(irq, NULL, powerbutton_irq, IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING, "twl4030_pwrbutton", pwr); if (err < 0) { diff --git a/drivers/input/misc/wistron_btns.c b/drivers/input/misc/wistron_btns.c index 38da6ab..c0afb71 100644 --- a/drivers/input/misc/wistron_btns.c +++ b/drivers/input/misc/wistron_btns.c @@ -1328,7 +1328,7 @@ static struct platform_driver wistron_driver = { .driver = { .name = "wistron-bios", .owner = THIS_MODULE, -#if CONFIG_PM +#ifdef CONFIG_PM .pm = &wistron_pm_ops, #endif }, diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig index 3feeb3a..c714ca2 100644 --- a/drivers/input/mouse/Kconfig +++ b/drivers/input/mouse/Kconfig @@ -70,7 +70,7 @@ config MOUSE_PS2_SYNAPTICS config MOUSE_PS2_LIFEBOOK bool "Fujitsu Lifebook PS/2 mouse protocol extension" if EMBEDDED default y - depends on MOUSE_PS2 && X86 + depends on MOUSE_PS2 && X86 && DMI help Say Y here if you have a Fujitsu B-series Lifebook PS/2 TouchScreen connected to your system. diff --git a/drivers/input/mouse/hgpk.c b/drivers/input/mouse/hgpk.c index b146237..90be30e 100644 --- a/drivers/input/mouse/hgpk.c +++ b/drivers/input/mouse/hgpk.c @@ -427,7 +427,6 @@ static void hgpk_recalib_work(struct work_struct *work) static int hgpk_register(struct psmouse *psmouse) { - struct input_dev *dev = psmouse->dev; int err; /* register handlers */ diff --git a/drivers/input/mouse/lifebook.c b/drivers/input/mouse/lifebook.c index 2e6bdfe..6d7aa10 100644 --- a/drivers/input/mouse/lifebook.c +++ b/drivers/input/mouse/lifebook.c @@ -44,7 +44,6 @@ static int lifebook_set_6byte_proto(const struct dmi_system_id *d) } static const struct dmi_system_id __initconst lifebook_dmi_table[] = { -#if defined(CONFIG_DMI) && defined(CONFIG_X86) { /* FLORA-ie 55mi */ .matches = { @@ -118,7 +117,6 @@ static const struct dmi_system_id __initconst lifebook_dmi_table[] = { }, }, { } -#endif }; void __init lifebook_module_init(void) diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c index fd0bc09..401ac6b 100644 --- a/drivers/input/mouse/psmouse-base.c +++ b/drivers/input/mouse/psmouse-base.c @@ -1137,7 +1137,10 @@ static void psmouse_cleanup(struct serio *serio) if (psmouse->cleanup) psmouse->cleanup(psmouse); - psmouse_reset(psmouse); +/* + * Reset the mouse to defaults (bare PS/2 protocol). + */ + ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_RESET_DIS); /* * Some boxes, such as HP nx7400, get terribly confused if mouse diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c index 0236f0d..e0f3018 100644 --- a/drivers/input/serio/serio.c +++ b/drivers/input/serio/serio.c @@ -284,13 +284,7 @@ static void serio_handle_event(void) mutex_lock(&serio_mutex); - /* - * Note that we handle only one event here to give swsusp - * a chance to freeze kseriod thread. Serio events should - * be pretty rare so we are not concerned about taking - * performance hit. - */ - if ((event = serio_get_event())) { + while ((event = serio_get_event())) { switch (event->type) { case SERIO_REGISTER_PORT: @@ -380,10 +374,9 @@ static struct serio *serio_get_pending_child(struct serio *parent) static int serio_thread(void *nothing) { - set_freezable(); do { serio_handle_event(); - wait_event_freezable(serio_wait, + wait_event_interruptible(serio_wait, kthread_should_stop() || !list_empty(&serio_event_list)); } while (!kthread_should_stop()); diff --git a/drivers/md/md.c b/drivers/md/md.c index f4f5f82..dd3dfe4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -386,7 +386,9 @@ static void mddev_put(mddev_t *mddev) if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) return; if (!mddev->raid_disks && list_empty(&mddev->disks) && - !mddev->hold_active) { + mddev->ctime == 0 && !mddev->hold_active) { + /* Array is not configured at all, and not held active, + * so destroy it */ list_del(&mddev->all_mddevs); if (mddev->gendisk) { /* we did a probe so need to clean up. @@ -4355,7 +4357,7 @@ static int do_md_run(mddev_t * mddev) mddev->barriers_work = 1; mddev->ok_start_degraded = start_dirty_degraded; - if (start_readonly) + if (start_readonly && mddev->ro == 0) mddev->ro = 2; /* read-only, but switch on first write */ err = mddev->pers->run(mddev); @@ -4419,33 +4421,6 @@ static int do_md_run(mddev_t * mddev) set_capacity(disk, mddev->array_sectors); - /* If there is a partially-recovered drive we need to - * start recovery here. If we leave it to md_check_recovery, - * it will remove the drives and not do the right thing - */ - if (mddev->degraded && !mddev->sync_thread) { - int spares = 0; - list_for_each_entry(rdev, &mddev->disks, same_set) - if (rdev->raid_disk >= 0 && - !test_bit(In_sync, &rdev->flags) && - !test_bit(Faulty, &rdev->flags)) - /* complete an interrupted recovery */ - spares++; - if (spares && mddev->pers->sync_request) { - mddev->recovery = 0; - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - mddev->sync_thread = md_register_thread(md_do_sync, - mddev, - "resync"); - if (!mddev->sync_thread) { - printk(KERN_ERR "%s: could not start resync" - " thread...\n", - mdname(mddev)); - /* leave the spares where they are, it shouldn't hurt */ - mddev->recovery = 0; - } - } - } md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ @@ -5262,6 +5237,10 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) mddev->minor_version = info->minor_version; mddev->patch_version = info->patch_version; mddev->persistent = !info->not_persistent; + /* ensure mddev_put doesn't delete this now that there + * is some minimal configuration. + */ + mddev->ctime = get_seconds(); return 0; } mddev->major_version = MD_MAJOR_VERSION; @@ -6494,10 +6473,11 @@ void md_do_sync(mddev_t *mddev) mddev->curr_resync = 2; try_again: - if (kthread_should_stop()) { + if (kthread_should_stop()) set_bit(MD_RECOVERY_INTR, &mddev->recovery); + + if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) goto skip; - } for_each_mddev(mddev2, tmp) { if (mddev2 == mddev) continue; diff --git a/drivers/media/dvb/firewire/firedtv-fw.c b/drivers/media/dvb/firewire/firedtv-fw.c index fe44789..6223bf0 100644 --- a/drivers/media/dvb/firewire/firedtv-fw.c +++ b/drivers/media/dvb/firewire/firedtv-fw.c @@ -202,14 +202,8 @@ static void handle_fcp(struct fw_card *card, struct fw_request *request, unsigned long flags; int su; - if ((tcode != TCODE_WRITE_QUADLET_REQUEST && - tcode != TCODE_WRITE_BLOCK_REQUEST) || - offset != CSR_REGISTER_BASE + CSR_FCP_RESPONSE || - length == 0 || - (((u8 *)payload)[0] & 0xf0) != 0) { - fw_send_response(card, request, RCODE_TYPE_ERROR); + if (length < 2 || (((u8 *)payload)[0] & 0xf0) != 0) return; - } su = ((u8 *)payload)[1] & 0x7; @@ -230,10 +224,8 @@ static void handle_fcp(struct fw_card *card, struct fw_request *request, } spin_unlock_irqrestore(&node_list_lock, flags); - if (fdtv) { + if (fdtv) avc_recv(fdtv, payload, length); - fw_send_response(card, request, RCODE_COMPLETE); - } } static struct fw_address_handler fcp_handler = { diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c index 20d29ba..9df9a5a 100644 --- a/drivers/mfd/twl4030-irq.c +++ b/drivers/mfd/twl4030-irq.c @@ -568,12 +568,12 @@ static void twl4030_sih_do_edge(struct work_struct *work) bytes[byte] &= ~(0x03 << off); - spin_lock_irq(&d->lock); + raw_spin_lock_irq(&d->lock); if (d->status & IRQ_TYPE_EDGE_RISING) bytes[byte] |= BIT(off + 1); if (d->status & IRQ_TYPE_EDGE_FALLING) bytes[byte] |= BIT(off + 0); - spin_unlock_irq(&d->lock); + raw_spin_unlock_irq(&d->lock); edge_change &= ~BIT(i); } diff --git a/drivers/net/3c507.c b/drivers/net/3c507.c index fbc2311..77cf090 100644 --- a/drivers/net/3c507.c +++ b/drivers/net/3c507.c @@ -56,6 +56,7 @@ static const char version[] = #include #include #include +#include #include #include #include @@ -734,8 +735,7 @@ static void init_82586_mem(struct net_device *dev) memcpy_toio(lp->base, init_words + 5, sizeof(init_words) - 10); /* Fill in the station address. */ - memcpy_toio(lp->base+SA_OFFSET, dev->dev_addr, - sizeof(dev->dev_addr)); + memcpy_toio(lp->base+SA_OFFSET, dev->dev_addr, ETH_ALEN); /* The Tx-block list is written as needed. We just set up the values. */ lp->tx_cmd_link = IDLELOOP + 4; diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index e58a653..dd9a09c 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2346,6 +2346,7 @@ config GELIC_NET config GELIC_WIRELESS bool "PS3 Wireless support" + depends on WLAN depends on GELIC_NET select WIRELESS_EXT help @@ -2358,6 +2359,7 @@ config GELIC_WIRELESS config GELIC_WIRELESS_OLD_PSK_INTERFACE bool "PS3 Wireless private PSK interface (OBSOLETE)" depends on GELIC_WIRELESS + select WEXT_PRIV help This option retains the obsolete private interface to pass the PSK from user space programs to the driver. The PSK diff --git a/drivers/net/benet/be.h b/drivers/net/benet/be.h index 9e56014..9fd8e5e 100644 --- a/drivers/net/benet/be.h +++ b/drivers/net/benet/be.h @@ -275,6 +275,7 @@ struct be_adapter { u32 tx_fc; /* Tx flow control */ int link_speed; u8 port_type; + u8 transceiver; }; extern const struct ethtool_ops be_ethtool_ops; diff --git a/drivers/net/benet/be_cmds.c b/drivers/net/benet/be_cmds.c index 1b68bd9..102ade1 100644 --- a/drivers/net/benet/be_cmds.c +++ b/drivers/net/benet/be_cmds.c @@ -1479,6 +1479,41 @@ err: return status; } +int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, + u8 loopback_type, u8 enable) +{ + struct be_mcc_wrb *wrb; + struct be_cmd_req_set_lmode *req; + int status; + + spin_lock_bh(&adapter->mcc_lock); + + wrb = wrb_from_mccq(adapter); + if (!wrb) { + status = -EBUSY; + goto err; + } + + req = embedded_payload(wrb); + + be_wrb_hdr_prepare(wrb, sizeof(*req), true, 0, + OPCODE_LOWLEVEL_SET_LOOPBACK_MODE); + + be_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_LOWLEVEL, + OPCODE_LOWLEVEL_SET_LOOPBACK_MODE, + sizeof(*req)); + + req->src_port = port_num; + req->dest_port = port_num; + req->loopback_type = loopback_type; + req->loopback_state = enable; + + status = be_mcc_notify_wait(adapter); +err: + spin_unlock_bh(&adapter->mcc_lock); + return status; +} + int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, u32 loopback_type, u32 pkt_size, u32 num_pkts, u64 pattern) { @@ -1501,6 +1536,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, be_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_LOWLEVEL, OPCODE_LOWLEVEL_LOOPBACK_TEST, sizeof(*req)); + req->hdr.timeout = 4; req->pattern = cpu_to_le64(pattern); req->src_port = cpu_to_le32(port_num); diff --git a/drivers/net/benet/be_cmds.h b/drivers/net/benet/be_cmds.h index 92b87ef..c002b83 100644 --- a/drivers/net/benet/be_cmds.h +++ b/drivers/net/benet/be_cmds.h @@ -155,6 +155,7 @@ struct be_mcc_mailbox { #define OPCODE_LOWLEVEL_HOST_DDR_DMA 17 #define OPCODE_LOWLEVEL_LOOPBACK_TEST 18 +#define OPCODE_LOWLEVEL_SET_LOOPBACK_MODE 19 struct be_cmd_req_hdr { u8 opcode; /* dword 0 */ @@ -821,6 +822,19 @@ struct be_cmd_resp_loopback_test { u32 ticks_compl; }; +struct be_cmd_req_set_lmode { + struct be_cmd_req_hdr hdr; + u8 src_port; + u8 dest_port; + u8 loopback_type; + u8 loopback_state; +}; + +struct be_cmd_resp_set_lmode { + struct be_cmd_resp_hdr resp_hdr; + u8 rsvd0[4]; +}; + /********************** DDR DMA test *********************/ struct be_cmd_req_ddrdma_test { struct be_cmd_req_hdr hdr; @@ -912,3 +926,5 @@ extern int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, u32 num_pkts, u64 pattern); extern int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern, u32 byte_cnt, struct be_dma_mem *cmd); +extern int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, + u8 loopback_type, u8 enable); diff --git a/drivers/net/benet/be_ethtool.c b/drivers/net/benet/be_ethtool.c index 298b92c..5d001c4 100644 --- a/drivers/net/benet/be_ethtool.c +++ b/drivers/net/benet/be_ethtool.c @@ -118,6 +118,7 @@ static const char et_self_tests[][ETH_GSTRING_LEN] = { #define BE_MAC_LOOPBACK 0x0 #define BE_PHY_LOOPBACK 0x1 #define BE_ONE_PORT_EXT_LOOPBACK 0x2 +#define BE_NO_LOOPBACK 0xff static void be_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) @@ -339,28 +340,50 @@ static int be_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) status = be_cmd_read_port_type(adapter, adapter->port_num, &connector); - switch (connector) { - case 7: - ecmd->port = PORT_FIBRE; - break; - default: - ecmd->port = PORT_TP; - break; + if (!status) { + switch (connector) { + case 7: + ecmd->port = PORT_FIBRE; + ecmd->transceiver = XCVR_EXTERNAL; + break; + case 0: + ecmd->port = PORT_TP; + ecmd->transceiver = XCVR_EXTERNAL; + break; + default: + ecmd->port = PORT_TP; + ecmd->transceiver = XCVR_INTERNAL; + break; + } + } else { + ecmd->port = PORT_AUI; + ecmd->transceiver = XCVR_INTERNAL; } /* Save for future use */ adapter->link_speed = ecmd->speed; adapter->port_type = ecmd->port; + adapter->transceiver = ecmd->transceiver; } else { ecmd->speed = adapter->link_speed; ecmd->port = adapter->port_type; + ecmd->transceiver = adapter->transceiver; } ecmd->duplex = DUPLEX_FULL; ecmd->autoneg = AUTONEG_DISABLE; - ecmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_TP); ecmd->phy_address = adapter->port_num; - ecmd->transceiver = XCVR_INTERNAL; + switch (ecmd->port) { + case PORT_FIBRE: + ecmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE); + break; + case PORT_TP: + ecmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_TP); + break; + case PORT_AUI: + ecmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_AUI); + break; + } return 0; } @@ -489,6 +512,19 @@ err: return ret; } +static u64 be_loopback_test(struct be_adapter *adapter, u8 loopback_type, + u64 *status) +{ + be_cmd_set_loopback(adapter, adapter->port_num, + loopback_type, 1); + *status = be_cmd_loopback_test(adapter, adapter->port_num, + loopback_type, 1500, + 2, 0xabc); + be_cmd_set_loopback(adapter, adapter->port_num, + BE_NO_LOOPBACK, 1); + return *status; +} + static void be_self_test(struct net_device *netdev, struct ethtool_test *test, u64 *data) { @@ -497,23 +533,18 @@ be_self_test(struct net_device *netdev, struct ethtool_test *test, u64 *data) memset(data, 0, sizeof(u64) * ETHTOOL_TESTS_NUM); if (test->flags & ETH_TEST_FL_OFFLINE) { - data[0] = be_cmd_loopback_test(adapter, adapter->port_num, - BE_MAC_LOOPBACK, 1500, - 2, 0xabc); - if (data[0] != 0) + if (be_loopback_test(adapter, BE_MAC_LOOPBACK, + &data[0]) != 0) { test->flags |= ETH_TEST_FL_FAILED; - - data[1] = be_cmd_loopback_test(adapter, adapter->port_num, - BE_PHY_LOOPBACK, 1500, - 2, 0xabc); - if (data[1] != 0) + } + if (be_loopback_test(adapter, BE_PHY_LOOPBACK, + &data[1]) != 0) { test->flags |= ETH_TEST_FL_FAILED; - - data[2] = be_cmd_loopback_test(adapter, adapter->port_num, - BE_ONE_PORT_EXT_LOOPBACK, - 1500, 2, 0xabc); - if (data[2] != 0) + } + if (be_loopback_test(adapter, BE_ONE_PORT_EXT_LOOPBACK, + &data[2]) != 0) { test->flags |= ETH_TEST_FL_FAILED; + } data[3] = be_test_ddr_dma(adapter); if (data[3] != 0) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index 77ba135..306c2b8 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -7593,6 +7593,8 @@ static int bnx2x_nic_load(struct bnx2x *bp, int load_mode) if (bp->cnic_eth_dev.drv_state & CNIC_DRV_STATE_REGD) { bnx2x_set_iscsi_eth_mac_addr(bp, 1); bp->cnic_flags |= BNX2X_CNIC_FLAG_MAC_SET; + bnx2x_init_sb(bp, bp->cnic_sb, bp->cnic_sb_mapping, + CNIC_SB_ID(bp)); } mutex_unlock(&bp->cnic_mutex); #endif diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 0fb7a49..822f586 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1580,7 +1580,7 @@ static void ad_agg_selection_logic(struct aggregator *agg) // check if any partner replys if (best->is_individual) { pr_warning("%s: Warning: No 802.3ad response from the link partner for any adapters in the bond\n", - best->slave->dev->master->name); + best->slave ? best->slave->dev->master->name : "NULL"); } best->is_active = 1; diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index e0620d0..8bd3c9f 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -143,7 +143,6 @@ void gfar_start(struct net_device *dev); static void gfar_clear_exact_match(struct net_device *dev); static void gfar_set_mac_for_addr(struct net_device *dev, int num, u8 *addr); static int gfar_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -u16 gfar_select_queue(struct net_device *dev, struct sk_buff *skb); MODULE_AUTHOR("Freescale Semiconductor, Inc"); MODULE_DESCRIPTION("Gianfar Ethernet Driver"); @@ -455,7 +454,6 @@ static const struct net_device_ops gfar_netdev_ops = { .ndo_set_multicast_list = gfar_set_multi, .ndo_tx_timeout = gfar_timeout, .ndo_do_ioctl = gfar_ioctl, - .ndo_select_queue = gfar_select_queue, .ndo_get_stats = gfar_get_stats, .ndo_vlan_rx_register = gfar_vlan_rx_register, .ndo_set_mac_address = eth_mac_addr, @@ -506,10 +504,6 @@ static inline int gfar_uses_fcb(struct gfar_private *priv) return priv->vlgrp || priv->rx_csum_enable; } -u16 gfar_select_queue(struct net_device *dev, struct sk_buff *skb) -{ - return skb_get_queue_mapping(skb); -} static void free_tx_pointers(struct gfar_private *priv) { int i = 0; @@ -2470,10 +2464,11 @@ static int gfar_process_frame(struct net_device *dev, struct sk_buff *skb, fcb = (struct rxfcb *)skb->data; /* Remove the FCB from the skb */ - skb_set_queue_mapping(skb, fcb->rq); /* Remove the padded bytes, if there are any */ - if (amount_pull) + if (amount_pull) { + skb_record_rx_queue(skb, fcb->rq); skb_pull(skb, amount_pull); + } if (priv->rx_csum_enable) gfar_rx_checksum(skb, fcb); @@ -2554,7 +2549,7 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit) /* Remove the FCS from the packet length */ skb_put(skb, pkt_len); rx_queue->stats.rx_bytes += pkt_len; - + skb_record_rx_queue(skb, rx_queue->qindex); gfar_process_frame(dev, skb, amount_pull); } else { diff --git a/drivers/net/ibmlana.c b/drivers/net/ibmlana.c index 090a6d3..052c740 100644 --- a/drivers/net/ibmlana.c +++ b/drivers/net/ibmlana.c @@ -87,6 +87,7 @@ History: #include #include #include +#include #include #include @@ -988,7 +989,7 @@ static int __devinit ibmlana_init_one(struct device *kdev) /* copy out MAC address */ - for (z = 0; z < sizeof(dev->dev_addr); z++) + for (z = 0; z < ETH_ALEN; z++) dev->dev_addr[z] = inb(dev->base_addr + MACADDRPROM + z); /* print config */ diff --git a/drivers/net/igb/e1000_82575.c b/drivers/net/igb/e1000_82575.c index e8e9e91..c505b50 100644 --- a/drivers/net/igb/e1000_82575.c +++ b/drivers/net/igb/e1000_82575.c @@ -1096,9 +1096,7 @@ static s32 igb_setup_serdes_link_82575(struct e1000_hw *hw) hw_dbg("Configuring Autoneg:PCS_LCTL=0x%08X\n", reg); } else { /* Set PCS register for forced link */ - reg |= E1000_PCS_LCTL_FSD | /* Force Speed */ - E1000_PCS_LCTL_FORCE_LINK | /* Force Link */ - E1000_PCS_LCTL_FLV_LINK_UP; /* Force link value up */ + reg |= E1000_PCS_LCTL_FSD; /* Force Speed */ hw_dbg("Configuring Forced Link:PCS_LCTL=0x%08X\n", reg); } diff --git a/drivers/net/igb/e1000_phy.c b/drivers/net/igb/e1000_phy.c index 5c9d73e..3670a66 100644 --- a/drivers/net/igb/e1000_phy.c +++ b/drivers/net/igb/e1000_phy.c @@ -457,15 +457,6 @@ s32 igb_copper_link_setup_82580(struct e1000_hw *hw) phy_data |= I82580_CFG_ENABLE_DOWNSHIFT; ret_val = phy->ops.write_reg(hw, I82580_CFG_REG, phy_data); - if (ret_val) - goto out; - - /* Set number of link attempts before downshift */ - ret_val = phy->ops.read_reg(hw, I82580_CTRL_REG, &phy_data); - if (ret_val) - goto out; - phy_data &= ~I82580_CTRL_DOWNSHIFT_MASK; - ret_val = phy->ops.write_reg(hw, I82580_CTRL_REG, phy_data); out: return ret_val; diff --git a/drivers/net/igb/igb_ethtool.c b/drivers/net/igb/igb_ethtool.c index ac9d527..f771a6c 100644 --- a/drivers/net/igb/igb_ethtool.c +++ b/drivers/net/igb/igb_ethtool.c @@ -1795,7 +1795,7 @@ static int igb_wol_exclusion(struct igb_adapter *adapter, /* dual port cards only support WoL on port A from now on * unless it was enabled in the eeprom for port B * so exclude FUNC_1 ports from having WoL enabled */ - if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1 && + if ((rd32(E1000_STATUS) & E1000_STATUS_FUNC_MASK) && !adapter->eeprom_wol) { wol->supported = 0; break; diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index 78963a0..933c64f 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -1306,13 +1306,8 @@ void igb_reset(struct igb_adapter *adapter) hwm = min(((pba << 10) * 9 / 10), ((pba << 10) - 2 * adapter->max_frame_size)); - if (mac->type < e1000_82576) { - fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ - fc->low_water = fc->high_water - 8; - } else { - fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ - fc->low_water = fc->high_water - 16; - } + fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ + fc->low_water = fc->high_water - 16; fc->pause_time = 0xFFFF; fc->send_xon = 1; fc->current_mode = fc->requested_mode; diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c index e9dd95f..0dbd032 100644 --- a/drivers/net/igbvf/netdev.c +++ b/drivers/net/igbvf/netdev.c @@ -2763,7 +2763,8 @@ static int __devinit igbvf_probe(struct pci_dev *pdev, err = hw->mac.ops.reset_hw(hw); if (err) { dev_info(&pdev->dev, - "PF still in reset state, assigning new address\n"); + "PF still in reset state, assigning new address." + " Is the PF interface up?\n"); random_ether_addr(hw->mac.addr); } else { err = hw->mac.ops.read_mac_addr(hw); diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index bd64387..1a2ea62 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -4373,6 +4373,11 @@ static int ixgbe_resume(struct pci_dev *pdev) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); + /* + * pci_restore_state clears dev->state_saved so call + * pci_save_state to restore it. + */ + pci_save_state(pdev); err = pci_enable_device_mem(pdev); if (err) { diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c index dcc67a3..e154677 100644 --- a/drivers/net/pcnet32.c +++ b/drivers/net/pcnet32.c @@ -45,6 +45,7 @@ static const char *const version = #include #include #include +#include #include #include #include @@ -1765,7 +1766,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) /* if the ethernet address is not valid, force to 00:00:00:00:00:00 */ if (!is_valid_ether_addr(dev->perm_addr)) - memset(dev->dev_addr, 0, sizeof(dev->dev_addr)); + memset(dev->dev_addr, 0, ETH_ALEN); if (pcnet32_debug & NETIF_MSG_PROBE) { printk(" %pM", dev->dev_addr); diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c index f983e3b..103e8b0 100644 --- a/drivers/net/sfc/efx.c +++ b/drivers/net/sfc/efx.c @@ -741,14 +741,14 @@ static int efx_probe_port(struct efx_nic *efx) EFX_LOG(efx, "create port\n"); + if (phy_flash_cfg) + efx->phy_mode = PHY_MODE_SPECIAL; + /* Connect up MAC/PHY operations table */ rc = efx->type->probe_port(efx); if (rc) goto err; - if (phy_flash_cfg) - efx->phy_mode = PHY_MODE_SPECIAL; - /* Sanity check MAC address */ if (is_valid_ether_addr(efx->mac_address)) { memcpy(efx->net_dev->dev_addr, efx->mac_address, ETH_ALEN); diff --git a/drivers/net/sfc/falcon.c b/drivers/net/sfc/falcon.c index 17afcd2..9d009c4 100644 --- a/drivers/net/sfc/falcon.c +++ b/drivers/net/sfc/falcon.c @@ -925,6 +925,7 @@ static int falcon_probe_port(struct efx_nic *efx) static void falcon_remove_port(struct efx_nic *efx) { + efx->phy_op->remove(efx); efx_nic_free_buffer(efx, &efx->stats_buffer); } diff --git a/drivers/net/sfc/falcon_xmac.c b/drivers/net/sfc/falcon_xmac.c index 3da933f..8ccab2c 100644 --- a/drivers/net/sfc/falcon_xmac.c +++ b/drivers/net/sfc/falcon_xmac.c @@ -111,16 +111,12 @@ static void falcon_mask_status_intr(struct efx_nic *efx, bool enable) efx_writeo(efx, ®, FR_AB_XM_MGT_INT_MASK); } -/* Get status of XAUI link */ -static bool falcon_xaui_link_ok(struct efx_nic *efx) +static bool falcon_xgxs_link_ok(struct efx_nic *efx) { efx_oword_t reg; bool align_done, link_ok = false; int sync_status; - if (LOOPBACK_INTERNAL(efx)) - return true; - /* Read link status */ efx_reado(efx, ®, FR_AB_XX_CORE_STAT); @@ -135,14 +131,24 @@ static bool falcon_xaui_link_ok(struct efx_nic *efx) EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_DISPERR, FFE_AB_XX_STAT_ALL_LANES); efx_writeo(efx, ®, FR_AB_XX_CORE_STAT); - /* If the link is up, then check the phy side of the xaui link */ - if (efx->link_state.up && link_ok) - if (efx->mdio.mmds & (1 << MDIO_MMD_PHYXS)) - link_ok = efx_mdio_phyxgxs_lane_sync(efx); - return link_ok; } +static bool falcon_xmac_link_ok(struct efx_nic *efx) +{ + /* + * Check MAC's XGXS link status except when using XGMII loopback + * which bypasses the XGXS block. + * If possible, check PHY's XGXS link status except when using + * MAC loopback. + */ + return (efx->loopback_mode == LOOPBACK_XGMII || + falcon_xgxs_link_ok(efx)) && + (!(efx->mdio.mmds & (1 << MDIO_MMD_PHYXS)) || + LOOPBACK_INTERNAL(efx) || + efx_mdio_phyxgxs_lane_sync(efx)); +} + void falcon_reconfigure_xmac_core(struct efx_nic *efx) { unsigned int max_frame_len; @@ -245,9 +251,9 @@ static void falcon_reconfigure_xgxs_core(struct efx_nic *efx) /* Try to bring up the Falcon side of the Falcon-Phy XAUI link */ -static bool falcon_check_xaui_link_up(struct efx_nic *efx, int tries) +static bool falcon_xmac_link_ok_retry(struct efx_nic *efx, int tries) { - bool mac_up = falcon_xaui_link_ok(efx); + bool mac_up = falcon_xmac_link_ok(efx); if (LOOPBACK_MASK(efx) & LOOPBACKS_EXTERNAL(efx) & LOOPBACKS_WS || efx_phy_mode_disabled(efx->phy_mode)) @@ -261,7 +267,7 @@ static bool falcon_check_xaui_link_up(struct efx_nic *efx, int tries) falcon_reset_xaui(efx); udelay(200); - mac_up = falcon_xaui_link_ok(efx); + mac_up = falcon_xmac_link_ok(efx); --tries; } @@ -272,7 +278,7 @@ static bool falcon_check_xaui_link_up(struct efx_nic *efx, int tries) static bool falcon_xmac_check_fault(struct efx_nic *efx) { - return !falcon_check_xaui_link_up(efx, 5); + return !falcon_xmac_link_ok_retry(efx, 5); } static int falcon_reconfigure_xmac(struct efx_nic *efx) @@ -284,7 +290,7 @@ static int falcon_reconfigure_xmac(struct efx_nic *efx) falcon_reconfigure_mac_wrapper(efx); - efx->xmac_poll_required = !falcon_check_xaui_link_up(efx, 5); + efx->xmac_poll_required = !falcon_xmac_link_ok_retry(efx, 5); falcon_mask_status_intr(efx, true); return 0; @@ -357,7 +363,7 @@ void falcon_poll_xmac(struct efx_nic *efx) return; falcon_mask_status_intr(efx, false); - efx->xmac_poll_required = !falcon_check_xaui_link_up(efx, 1); + efx->xmac_poll_required = !falcon_xmac_link_ok_retry(efx, 1); falcon_mask_status_intr(efx, true); } diff --git a/drivers/net/sfc/mcdi_phy.c b/drivers/net/sfc/mcdi_phy.c index 0e1bcc5..eb694af 100644 --- a/drivers/net/sfc/mcdi_phy.c +++ b/drivers/net/sfc/mcdi_phy.c @@ -304,31 +304,47 @@ static u32 mcdi_to_ethtool_media(u32 media) static int efx_mcdi_phy_probe(struct efx_nic *efx) { - struct efx_mcdi_phy_cfg *phy_cfg; + struct efx_mcdi_phy_cfg *phy_data; + u8 outbuf[MC_CMD_GET_LINK_OUT_LEN]; + u32 caps; int rc; - /* TODO: Move phy_data initialisation to - * phy_op->probe/remove, rather than init/fini */ - phy_cfg = kzalloc(sizeof(*phy_cfg), GFP_KERNEL); - if (phy_cfg == NULL) { - rc = -ENOMEM; - goto fail_alloc; - } - rc = efx_mcdi_get_phy_cfg(efx, phy_cfg); + /* Initialise and populate phy_data */ + phy_data = kzalloc(sizeof(*phy_data), GFP_KERNEL); + if (phy_data == NULL) + return -ENOMEM; + + rc = efx_mcdi_get_phy_cfg(efx, phy_data); if (rc != 0) goto fail; - efx->phy_type = phy_cfg->type; + /* Read initial link advertisement */ + BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0); + rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0, + outbuf, sizeof(outbuf), NULL); + if (rc) + goto fail; + + /* Fill out nic state */ + efx->phy_data = phy_data; + efx->phy_type = phy_data->type; - efx->mdio_bus = phy_cfg->channel; - efx->mdio.prtad = phy_cfg->port; - efx->mdio.mmds = phy_cfg->mmd_mask & ~(1 << MC_CMD_MMD_CLAUSE22); + efx->mdio_bus = phy_data->channel; + efx->mdio.prtad = phy_data->port; + efx->mdio.mmds = phy_data->mmd_mask & ~(1 << MC_CMD_MMD_CLAUSE22); efx->mdio.mode_support = 0; - if (phy_cfg->mmd_mask & (1 << MC_CMD_MMD_CLAUSE22)) + if (phy_data->mmd_mask & (1 << MC_CMD_MMD_CLAUSE22)) efx->mdio.mode_support |= MDIO_SUPPORTS_C22; - if (phy_cfg->mmd_mask & ~(1 << MC_CMD_MMD_CLAUSE22)) + if (phy_data->mmd_mask & ~(1 << MC_CMD_MMD_CLAUSE22)) efx->mdio.mode_support |= MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22; + caps = MCDI_DWORD(outbuf, GET_LINK_OUT_CAP); + if (caps & (1 << MC_CMD_PHY_CAP_AN_LBN)) + efx->link_advertising = + mcdi_to_ethtool_cap(phy_data->media, caps); + else + phy_data->forced_cap = caps; + /* Assert that we can map efx -> mcdi loopback modes */ BUILD_BUG_ON(LOOPBACK_NONE != MC_CMD_LOOPBACK_NONE); BUILD_BUG_ON(LOOPBACK_DATA != MC_CMD_LOOPBACK_DATA); @@ -365,46 +381,6 @@ static int efx_mcdi_phy_probe(struct efx_nic *efx) * but by convention we don't */ efx->loopback_modes &= ~(1 << LOOPBACK_NONE); - kfree(phy_cfg); - - return 0; - -fail: - kfree(phy_cfg); -fail_alloc: - return rc; -} - -static int efx_mcdi_phy_init(struct efx_nic *efx) -{ - struct efx_mcdi_phy_cfg *phy_data; - u8 outbuf[MC_CMD_GET_LINK_OUT_LEN]; - u32 caps; - int rc; - - phy_data = kzalloc(sizeof(*phy_data), GFP_KERNEL); - if (phy_data == NULL) - return -ENOMEM; - - rc = efx_mcdi_get_phy_cfg(efx, phy_data); - if (rc != 0) - goto fail; - - efx->phy_data = phy_data; - - BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0); - rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0, - outbuf, sizeof(outbuf), NULL); - if (rc) - goto fail; - - caps = MCDI_DWORD(outbuf, GET_LINK_OUT_CAP); - if (caps & (1 << MC_CMD_PHY_CAP_AN_LBN)) - efx->link_advertising = - mcdi_to_ethtool_cap(phy_data->media, caps); - else - phy_data->forced_cap = caps; - return 0; fail: @@ -504,7 +480,7 @@ static bool efx_mcdi_phy_poll(struct efx_nic *efx) return !efx_link_state_equal(&efx->link_state, &old_state); } -static void efx_mcdi_phy_fini(struct efx_nic *efx) +static void efx_mcdi_phy_remove(struct efx_nic *efx) { struct efx_mcdi_phy_data *phy_data = efx->phy_data; @@ -586,10 +562,11 @@ static int efx_mcdi_phy_set_settings(struct efx_nic *efx, struct ethtool_cmd *ec struct efx_phy_operations efx_mcdi_phy_ops = { .probe = efx_mcdi_phy_probe, - .init = efx_mcdi_phy_init, + .init = efx_port_dummy_op_int, .reconfigure = efx_mcdi_phy_reconfigure, .poll = efx_mcdi_phy_poll, - .fini = efx_mcdi_phy_fini, + .fini = efx_port_dummy_op_void, + .remove = efx_mcdi_phy_remove, .get_settings = efx_mcdi_phy_get_settings, .set_settings = efx_mcdi_phy_set_settings, .run_tests = NULL, diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h index 34c381f..d5aab5b 100644 --- a/drivers/net/sfc/net_driver.h +++ b/drivers/net/sfc/net_driver.h @@ -524,6 +524,7 @@ struct efx_phy_operations { int (*probe) (struct efx_nic *efx); int (*init) (struct efx_nic *efx); void (*fini) (struct efx_nic *efx); + void (*remove) (struct efx_nic *efx); int (*reconfigure) (struct efx_nic *efx); bool (*poll) (struct efx_nic *efx); void (*get_settings) (struct efx_nic *efx, diff --git a/drivers/net/sfc/nic.c b/drivers/net/sfc/nic.c index a577be2..db44224 100644 --- a/drivers/net/sfc/nic.c +++ b/drivers/net/sfc/nic.c @@ -1576,6 +1576,8 @@ void efx_nic_init_common(struct efx_nic *efx) EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_SOFT_EVT_EN, 1); /* Prefetch threshold 2 => fetch when descriptor cache half empty */ EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_PREF_THRESHOLD, 2); + /* Disable hardware watchdog which can misfire */ + EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_PREF_WD_TMR, 0x3fffff); /* Squash TX of packets of 16 bytes or less */ if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) EFX_SET_OWORD_FIELD(temp, FRF_BZ_TX_FLUSH_MIN_LEN_EN, 1); diff --git a/drivers/net/sfc/qt202x_phy.c b/drivers/net/sfc/qt202x_phy.c index 3800fc7..ff8f0a4 100644 --- a/drivers/net/sfc/qt202x_phy.c +++ b/drivers/net/sfc/qt202x_phy.c @@ -33,6 +33,9 @@ #define PCS_FW_HEARTBEAT_REG 0xd7ee #define PCS_FW_HEARTB_LBN 0 #define PCS_FW_HEARTB_WIDTH 8 +#define PCS_FW_PRODUCT_CODE_1 0xd7f0 +#define PCS_FW_VERSION_1 0xd7f3 +#define PCS_FW_BUILD_1 0xd7f6 #define PCS_UC8051_STATUS_REG 0xd7fd #define PCS_UC_STATUS_LBN 0 #define PCS_UC_STATUS_WIDTH 8 @@ -52,14 +55,24 @@ void falcon_qt202x_set_led(struct efx_nic *p, int led, int mode) struct qt202x_phy_data { enum efx_phy_mode phy_mode; + bool bug17190_in_bad_state; + unsigned long bug17190_timer; + u32 firmware_ver; }; #define QT2022C2_MAX_RESET_TIME 500 #define QT2022C2_RESET_WAIT 10 -static int qt2025c_wait_reset(struct efx_nic *efx) +#define QT2025C_MAX_HEARTB_TIME (5 * HZ) +#define QT2025C_HEARTB_WAIT 100 +#define QT2025C_MAX_FWSTART_TIME (25 * HZ / 10) +#define QT2025C_FWSTART_WAIT 100 + +#define BUG17190_INTERVAL (2 * HZ) + +static int qt2025c_wait_heartbeat(struct efx_nic *efx) { - unsigned long timeout = jiffies + 10 * HZ; + unsigned long timeout = jiffies + QT2025C_MAX_HEARTB_TIME; int reg, old_counter = 0; /* Wait for firmware heartbeat to start */ @@ -74,11 +87,25 @@ static int qt2025c_wait_reset(struct efx_nic *efx) old_counter = counter; else if (counter != old_counter) break; - if (time_after(jiffies, timeout)) + if (time_after(jiffies, timeout)) { + /* Some cables have EEPROMs that conflict with the + * PHY's on-board EEPROM so it cannot load firmware */ + EFX_ERR(efx, "If an SFP+ direct attach cable is" + " connected, please check that it complies" + " with the SFP+ specification\n"); return -ETIMEDOUT; - msleep(10); + } + msleep(QT2025C_HEARTB_WAIT); } + return 0; +} + +static int qt2025c_wait_fw_status_good(struct efx_nic *efx) +{ + unsigned long timeout = jiffies + QT2025C_MAX_FWSTART_TIME; + int reg; + /* Wait for firmware status to look good */ for (;;) { reg = efx_mdio_read(efx, MDIO_MMD_PCS, PCS_UC8051_STATUS_REG); @@ -90,7 +117,178 @@ static int qt2025c_wait_reset(struct efx_nic *efx) break; if (time_after(jiffies, timeout)) return -ETIMEDOUT; + msleep(QT2025C_FWSTART_WAIT); + } + + return 0; +} + +static void qt2025c_restart_firmware(struct efx_nic *efx) +{ + /* Restart microcontroller execution of firmware from RAM */ + efx_mdio_write(efx, 3, 0xe854, 0x00c0); + efx_mdio_write(efx, 3, 0xe854, 0x0040); + msleep(50); +} + +static int qt2025c_wait_reset(struct efx_nic *efx) +{ + int rc; + + rc = qt2025c_wait_heartbeat(efx); + if (rc != 0) + return rc; + + rc = qt2025c_wait_fw_status_good(efx); + if (rc == -ETIMEDOUT) { + /* Bug 17689: occasionally heartbeat starts but firmware status + * code never progresses beyond 0x00. Try again, once, after + * restarting execution of the firmware image. */ + EFX_LOG(efx, "bashing QT2025C microcontroller\n"); + qt2025c_restart_firmware(efx); + rc = qt2025c_wait_heartbeat(efx); + if (rc != 0) + return rc; + rc = qt2025c_wait_fw_status_good(efx); + } + + return rc; +} + +static void qt2025c_firmware_id(struct efx_nic *efx) +{ + struct qt202x_phy_data *phy_data = efx->phy_data; + u8 firmware_id[9]; + size_t i; + + for (i = 0; i < sizeof(firmware_id); i++) + firmware_id[i] = efx_mdio_read(efx, MDIO_MMD_PCS, + PCS_FW_PRODUCT_CODE_1 + i); + EFX_INFO(efx, "QT2025C firmware %xr%d v%d.%d.%d.%d [20%02d-%02d-%02d]\n", + (firmware_id[0] << 8) | firmware_id[1], firmware_id[2], + firmware_id[3] >> 4, firmware_id[3] & 0xf, + firmware_id[4], firmware_id[5], + firmware_id[6], firmware_id[7], firmware_id[8]); + phy_data->firmware_ver = ((firmware_id[3] & 0xf0) << 20) | + ((firmware_id[3] & 0x0f) << 16) | + (firmware_id[4] << 8) | firmware_id[5]; +} + +static void qt2025c_bug17190_workaround(struct efx_nic *efx) +{ + struct qt202x_phy_data *phy_data = efx->phy_data; + + /* The PHY can get stuck in a state where it reports PHY_XS and PMA/PMD + * layers up, but PCS down (no block_lock). If we notice this state + * persisting for a couple of seconds, we switch PMA/PMD loopback + * briefly on and then off again, which is normally sufficient to + * recover it. + */ + if (efx->link_state.up || + !efx_mdio_links_ok(efx, MDIO_DEVS_PMAPMD | MDIO_DEVS_PHYXS)) { + phy_data->bug17190_in_bad_state = false; + return; + } + + if (!phy_data->bug17190_in_bad_state) { + phy_data->bug17190_in_bad_state = true; + phy_data->bug17190_timer = jiffies + BUG17190_INTERVAL; + return; + } + + if (time_after_eq(jiffies, phy_data->bug17190_timer)) { + EFX_LOG(efx, "bashing QT2025C PMA/PMD\n"); + efx_mdio_set_flag(efx, MDIO_MMD_PMAPMD, MDIO_CTRL1, + MDIO_PMA_CTRL1_LOOPBACK, true); msleep(100); + efx_mdio_set_flag(efx, MDIO_MMD_PMAPMD, MDIO_CTRL1, + MDIO_PMA_CTRL1_LOOPBACK, false); + phy_data->bug17190_timer = jiffies + BUG17190_INTERVAL; + } +} + +static int qt2025c_select_phy_mode(struct efx_nic *efx) +{ + struct qt202x_phy_data *phy_data = efx->phy_data; + struct falcon_board *board = falcon_board(efx); + int reg, rc, i; + uint16_t phy_op_mode; + + /* Only 2.0.1.0+ PHY firmware supports the more optimal SFP+ + * Self-Configure mode. Don't attempt any switching if we encounter + * older firmware. */ + if (phy_data->firmware_ver < 0x02000100) + return 0; + + /* In general we will get optimal behaviour in "SFP+ Self-Configure" + * mode; however, that powers down most of the PHY when no module is + * present, so we must use a different mode (any fixed mode will do) + * to be sure that loopbacks will work. */ + phy_op_mode = (efx->loopback_mode == LOOPBACK_NONE) ? 0x0038 : 0x0020; + + /* Only change mode if really necessary */ + reg = efx_mdio_read(efx, 1, 0xc319); + if ((reg & 0x0038) == phy_op_mode) + return 0; + EFX_LOG(efx, "Switching PHY to mode 0x%04x\n", phy_op_mode); + + /* This sequence replicates the register writes configured in the boot + * EEPROM (including the differences between board revisions), except + * that the operating mode is changed, and the PHY is prevented from + * unnecessarily reloading the main firmware image again. */ + efx_mdio_write(efx, 1, 0xc300, 0x0000); + /* (Note: this portion of the boot EEPROM sequence, which bit-bashes 9 + * STOPs onto the firmware/module I2C bus to reset it, varies across + * board revisions, as the bus is connected to different GPIO/LED + * outputs on the PHY.) */ + if (board->major == 0 && board->minor < 2) { + efx_mdio_write(efx, 1, 0xc303, 0x4498); + for (i = 0; i < 9; i++) { + efx_mdio_write(efx, 1, 0xc303, 0x4488); + efx_mdio_write(efx, 1, 0xc303, 0x4480); + efx_mdio_write(efx, 1, 0xc303, 0x4490); + efx_mdio_write(efx, 1, 0xc303, 0x4498); + } + } else { + efx_mdio_write(efx, 1, 0xc303, 0x0920); + efx_mdio_write(efx, 1, 0xd008, 0x0004); + for (i = 0; i < 9; i++) { + efx_mdio_write(efx, 1, 0xc303, 0x0900); + efx_mdio_write(efx, 1, 0xd008, 0x0005); + efx_mdio_write(efx, 1, 0xc303, 0x0920); + efx_mdio_write(efx, 1, 0xd008, 0x0004); + } + efx_mdio_write(efx, 1, 0xc303, 0x4900); + } + efx_mdio_write(efx, 1, 0xc303, 0x4900); + efx_mdio_write(efx, 1, 0xc302, 0x0004); + efx_mdio_write(efx, 1, 0xc316, 0x0013); + efx_mdio_write(efx, 1, 0xc318, 0x0054); + efx_mdio_write(efx, 1, 0xc319, phy_op_mode); + efx_mdio_write(efx, 1, 0xc31a, 0x0098); + efx_mdio_write(efx, 3, 0x0026, 0x0e00); + efx_mdio_write(efx, 3, 0x0027, 0x0013); + efx_mdio_write(efx, 3, 0x0028, 0xa528); + efx_mdio_write(efx, 1, 0xd006, 0x000a); + efx_mdio_write(efx, 1, 0xd007, 0x0009); + efx_mdio_write(efx, 1, 0xd008, 0x0004); + /* This additional write is not present in the boot EEPROM. It + * prevents the PHY's internal boot ROM doing another pointless (and + * slow) reload of the firmware image (the microcontroller's code + * memory is not affected by the microcontroller reset). */ + efx_mdio_write(efx, 1, 0xc317, 0x00ff); + efx_mdio_write(efx, 1, 0xc300, 0x0002); + msleep(20); + + /* Restart microcontroller execution of firmware from RAM */ + qt2025c_restart_firmware(efx); + + /* Wait for the microcontroller to be ready again */ + rc = qt2025c_wait_reset(efx); + if (rc < 0) { + EFX_ERR(efx, "PHY microcontroller reset during mode switch " + "timed out\n"); + return rc; } return 0; @@ -137,6 +335,16 @@ static int qt202x_reset_phy(struct efx_nic *efx) static int qt202x_phy_probe(struct efx_nic *efx) { + struct qt202x_phy_data *phy_data; + + phy_data = kzalloc(sizeof(struct qt202x_phy_data), GFP_KERNEL); + if (!phy_data) + return -ENOMEM; + efx->phy_data = phy_data; + phy_data->phy_mode = efx->phy_mode; + phy_data->bug17190_in_bad_state = false; + phy_data->bug17190_timer = 0; + efx->mdio.mmds = QT202X_REQUIRED_DEVS; efx->mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22; efx->loopback_modes = QT202X_LOOPBACKS | FALCON_XMAC_LOOPBACKS; @@ -145,7 +353,6 @@ static int qt202x_phy_probe(struct efx_nic *efx) static int qt202x_phy_init(struct efx_nic *efx) { - struct qt202x_phy_data *phy_data; u32 devid; int rc; @@ -155,17 +362,14 @@ static int qt202x_phy_init(struct efx_nic *efx) return rc; } - phy_data = kzalloc(sizeof(struct qt202x_phy_data), GFP_KERNEL); - if (!phy_data) - return -ENOMEM; - efx->phy_data = phy_data; - devid = efx_mdio_read_id(efx, MDIO_MMD_PHYXS); EFX_INFO(efx, "PHY ID reg %x (OUI %06x model %02x revision %x)\n", devid, efx_mdio_id_oui(devid), efx_mdio_id_model(devid), efx_mdio_id_rev(devid)); - phy_data->phy_mode = efx->phy_mode; + if (efx->phy_type == PHY_TYPE_QT2025C) + qt2025c_firmware_id(efx); + return 0; } @@ -183,6 +387,9 @@ static bool qt202x_phy_poll(struct efx_nic *efx) efx->link_state.fd = true; efx->link_state.fc = efx->wanted_fc; + if (efx->phy_type == PHY_TYPE_QT2025C) + qt2025c_bug17190_workaround(efx); + return efx->link_state.up != was_up; } @@ -191,6 +398,10 @@ static int qt202x_phy_reconfigure(struct efx_nic *efx) struct qt202x_phy_data *phy_data = efx->phy_data; if (efx->phy_type == PHY_TYPE_QT2025C) { + int rc = qt2025c_select_phy_mode(efx); + if (rc) + return rc; + /* There are several different register bits which can * disable TX (and save power) on direct-attach cables * or optical transceivers, varying somewhat between @@ -224,7 +435,7 @@ static void qt202x_phy_get_settings(struct efx_nic *efx, struct ethtool_cmd *ecm mdio45_ethtool_gset(&efx->mdio, ecmd); } -static void qt202x_phy_fini(struct efx_nic *efx) +static void qt202x_phy_remove(struct efx_nic *efx) { /* Free the context block */ kfree(efx->phy_data); @@ -236,7 +447,8 @@ struct efx_phy_operations falcon_qt202x_phy_ops = { .init = qt202x_phy_init, .reconfigure = qt202x_phy_reconfigure, .poll = qt202x_phy_poll, - .fini = qt202x_phy_fini, + .fini = efx_port_dummy_op_void, + .remove = qt202x_phy_remove, .get_settings = qt202x_phy_get_settings, .set_settings = efx_mdio_set_settings, }; diff --git a/drivers/net/sfc/siena.c b/drivers/net/sfc/siena.c index de07a4f..f8c6771 100644 --- a/drivers/net/sfc/siena.c +++ b/drivers/net/sfc/siena.c @@ -133,6 +133,7 @@ static int siena_probe_port(struct efx_nic *efx) void siena_remove_port(struct efx_nic *efx) { + efx->phy_op->remove(efx); efx_nic_free_buffer(efx, &efx->stats_buffer); } diff --git a/drivers/net/sfc/tenxpress.c b/drivers/net/sfc/tenxpress.c index ca11572..3009c29 100644 --- a/drivers/net/sfc/tenxpress.c +++ b/drivers/net/sfc/tenxpress.c @@ -202,10 +202,14 @@ static ssize_t set_phy_short_reach(struct device *dev, int rc; rtnl_lock(); - efx_mdio_set_flag(efx, MDIO_MMD_PMAPMD, MDIO_PMA_10GBT_TXPWR, - MDIO_PMA_10GBT_TXPWR_SHORT, - count != 0 && *buf != '0'); - rc = efx_reconfigure_port(efx); + if (efx->state != STATE_RUNNING) { + rc = -EBUSY; + } else { + efx_mdio_set_flag(efx, MDIO_MMD_PMAPMD, MDIO_PMA_10GBT_TXPWR, + MDIO_PMA_10GBT_TXPWR_SHORT, + count != 0 && *buf != '0'); + rc = efx_reconfigure_port(efx); + } rtnl_unlock(); return rc < 0 ? rc : (ssize_t)count; @@ -298,36 +302,62 @@ static int tenxpress_init(struct efx_nic *efx) return 0; } -static int sfx7101_phy_probe(struct efx_nic *efx) +static int tenxpress_phy_probe(struct efx_nic *efx) { - efx->mdio.mmds = TENXPRESS_REQUIRED_DEVS; - efx->mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22; - efx->loopback_modes = SFX7101_LOOPBACKS | FALCON_XMAC_LOOPBACKS; - return 0; -} + struct tenxpress_phy_data *phy_data; + int rc; + + /* Allocate phy private storage */ + phy_data = kzalloc(sizeof(*phy_data), GFP_KERNEL); + if (!phy_data) + return -ENOMEM; + efx->phy_data = phy_data; + phy_data->phy_mode = efx->phy_mode; + + /* Create any special files */ + if (efx->phy_type == PHY_TYPE_SFT9001B) { + rc = device_create_file(&efx->pci_dev->dev, + &dev_attr_phy_short_reach); + if (rc) + goto fail; + } + + if (efx->phy_type == PHY_TYPE_SFX7101) { + efx->mdio.mmds = TENXPRESS_REQUIRED_DEVS; + efx->mdio.mode_support = MDIO_SUPPORTS_C45; + + efx->loopback_modes = SFX7101_LOOPBACKS | FALCON_XMAC_LOOPBACKS; + + efx->link_advertising = (ADVERTISED_TP | ADVERTISED_Autoneg | + ADVERTISED_10000baseT_Full); + } else { + efx->mdio.mmds = TENXPRESS_REQUIRED_DEVS; + efx->mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22; + + efx->loopback_modes = (SFT9001_LOOPBACKS | + FALCON_XMAC_LOOPBACKS | + FALCON_GMAC_LOOPBACKS); + + efx->link_advertising = (ADVERTISED_TP | ADVERTISED_Autoneg | + ADVERTISED_10000baseT_Full | + ADVERTISED_1000baseT_Full | + ADVERTISED_100baseT_Full); + } -static int sft9001_phy_probe(struct efx_nic *efx) -{ - efx->mdio.mmds = TENXPRESS_REQUIRED_DEVS; - efx->mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22; - efx->loopback_modes = (SFT9001_LOOPBACKS | FALCON_XMAC_LOOPBACKS | - FALCON_GMAC_LOOPBACKS); return 0; + +fail: + kfree(efx->phy_data); + efx->phy_data = NULL; + return rc; } static int tenxpress_phy_init(struct efx_nic *efx) { - struct tenxpress_phy_data *phy_data; - int rc = 0; + int rc; falcon_board(efx)->type->init_phy(efx); - phy_data = kzalloc(sizeof(*phy_data), GFP_KERNEL); - if (!phy_data) - return -ENOMEM; - efx->phy_data = phy_data; - phy_data->phy_mode = efx->phy_mode; - if (!(efx->phy_mode & PHY_MODE_SPECIAL)) { if (efx->phy_type == PHY_TYPE_SFT9001A) { int reg; @@ -341,44 +371,27 @@ static int tenxpress_phy_init(struct efx_nic *efx) rc = efx_mdio_wait_reset_mmds(efx, TENXPRESS_REQUIRED_DEVS); if (rc < 0) - goto fail; + return rc; rc = efx_mdio_check_mmds(efx, TENXPRESS_REQUIRED_DEVS, 0); if (rc < 0) - goto fail; + return rc; } rc = tenxpress_init(efx); if (rc < 0) - goto fail; + return rc; - /* Initialise advertising flags */ - efx->link_advertising = (ADVERTISED_TP | ADVERTISED_Autoneg | - ADVERTISED_10000baseT_Full); - if (efx->phy_type != PHY_TYPE_SFX7101) - efx->link_advertising |= (ADVERTISED_1000baseT_Full | - ADVERTISED_100baseT_Full); + /* Reinitialise flow control settings */ efx_link_set_wanted_fc(efx, efx->wanted_fc); efx_mdio_an_reconfigure(efx); - if (efx->phy_type == PHY_TYPE_SFT9001B) { - rc = device_create_file(&efx->pci_dev->dev, - &dev_attr_phy_short_reach); - if (rc) - goto fail; - } - schedule_timeout_uninterruptible(HZ / 5); /* 200ms */ /* Let XGXS and SerDes out of reset */ falcon_reset_xaui(efx); return 0; - - fail: - kfree(efx->phy_data); - efx->phy_data = NULL; - return rc; } /* Perform a "special software reset" on the PHY. The caller is @@ -589,25 +602,26 @@ static bool tenxpress_phy_poll(struct efx_nic *efx) return !efx_link_state_equal(&efx->link_state, &old_state); } -static void tenxpress_phy_fini(struct efx_nic *efx) +static void sfx7101_phy_fini(struct efx_nic *efx) { int reg; + /* Power down the LNPGA */ + reg = (1 << PMA_PMD_LNPGA_POWERDOWN_LBN); + efx_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_XCONTROL_REG, reg); + + /* Waiting here ensures that the board fini, which can turn + * off the power to the PHY, won't get run until the LNPGA + * powerdown has been given long enough to complete. */ + schedule_timeout_uninterruptible(LNPGA_PDOWN_WAIT); /* 200 ms */ +} + +static void tenxpress_phy_remove(struct efx_nic *efx) +{ if (efx->phy_type == PHY_TYPE_SFT9001B) device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_short_reach); - if (efx->phy_type == PHY_TYPE_SFX7101) { - /* Power down the LNPGA */ - reg = (1 << PMA_PMD_LNPGA_POWERDOWN_LBN); - efx_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_XCONTROL_REG, reg); - - /* Waiting here ensures that the board fini, which can turn - * off the power to the PHY, won't get run until the LNPGA - * powerdown has been given long enough to complete. */ - schedule_timeout_uninterruptible(LNPGA_PDOWN_WAIT); /* 200 ms */ - } - kfree(efx->phy_data); efx->phy_data = NULL; } @@ -819,11 +833,12 @@ static void sft9001_set_npage_adv(struct efx_nic *efx, u32 advertising) } struct efx_phy_operations falcon_sfx7101_phy_ops = { - .probe = sfx7101_phy_probe, + .probe = tenxpress_phy_probe, .init = tenxpress_phy_init, .reconfigure = tenxpress_phy_reconfigure, .poll = tenxpress_phy_poll, - .fini = tenxpress_phy_fini, + .fini = sfx7101_phy_fini, + .remove = tenxpress_phy_remove, .get_settings = tenxpress_get_settings, .set_settings = tenxpress_set_settings, .set_npage_adv = sfx7101_set_npage_adv, @@ -832,11 +847,12 @@ struct efx_phy_operations falcon_sfx7101_phy_ops = { }; struct efx_phy_operations falcon_sft9001_phy_ops = { - .probe = sft9001_phy_probe, + .probe = tenxpress_phy_probe, .init = tenxpress_phy_init, .reconfigure = tenxpress_phy_reconfigure, .poll = tenxpress_phy_poll, - .fini = tenxpress_phy_fini, + .fini = efx_port_dummy_op_void, + .remove = tenxpress_phy_remove, .get_settings = tenxpress_get_settings, .set_settings = tenxpress_set_settings, .set_npage_adv = sft9001_set_npage_adv, diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c index e669f94..a8b70ef 100644 --- a/drivers/net/sfc/tx.c +++ b/drivers/net/sfc/tx.c @@ -821,8 +821,6 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) EFX_TXQ_MASK]; efx_tsoh_free(tx_queue, buffer); EFX_BUG_ON_PARANOID(buffer->skb); - buffer->len = 0; - buffer->continuation = true; if (buffer->unmap_len) { unmap_addr = (buffer->dma_addr + buffer->len - buffer->unmap_len); @@ -836,6 +834,8 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) PCI_DMA_TODEVICE); buffer->unmap_len = 0; } + buffer->len = 0; + buffer->continuation = true; } } diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 01e99f2..2834a01 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -849,13 +849,13 @@ static void tun_sock_write_space(struct sock *sk) if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible_sync(sk->sk_sleep); - tun = container_of(sk, struct tun_sock, sk)->tun; + tun = tun_sk(sk)->tun; kill_fasync(&tun->fasync, SIGIO, POLL_OUT); } static void tun_sock_destruct(struct sock *sk) { - free_netdev(container_of(sk, struct tun_sock, sk)->tun->dev); + free_netdev(tun_sk(sk)->tun->dev); } static struct proto tun_proto = { @@ -990,7 +990,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) sk->sk_write_space = tun_sock_write_space; sk->sk_sndbuf = INT_MAX; - container_of(sk, struct tun_sock, sk)->tun = tun; + tun_sk(sk)->tun = tun; security_tun_dev_post_create(sk); diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c index afaf088..41ad2f3 100644 --- a/drivers/net/ucc_geth.c +++ b/drivers/net/ucc_geth.c @@ -1563,7 +1563,10 @@ static int ugeth_disable(struct ucc_geth_private *ugeth, enum comm_dir mode) static void ugeth_quiesce(struct ucc_geth_private *ugeth) { - /* Wait for and prevent any further xmits. */ + /* Prevent any further xmits, plus detach the device. */ + netif_device_detach(ugeth->ndev); + + /* Wait for any current xmits to finish. */ netif_tx_disable(ugeth->ndev); /* Disable the interrupt to avoid NAPI rescheduling. */ @@ -1577,7 +1580,7 @@ static void ugeth_activate(struct ucc_geth_private *ugeth) { napi_enable(&ugeth->napi); enable_irq(ugeth->ug_info->uf_info.irq); - netif_tx_wake_all_queues(ugeth->ndev); + netif_device_attach(ugeth->ndev); } /* Called every time the controller might need to be made @@ -1648,25 +1651,28 @@ static void adjust_link(struct net_device *dev) ugeth->oldspeed = phydev->speed; } - /* - * To change the MAC configuration we need to disable the - * controller. To do so, we have to either grab ugeth->lock, - * which is a bad idea since 'graceful stop' commands might - * take quite a while, or we can quiesce driver's activity. - */ - ugeth_quiesce(ugeth); - ugeth_disable(ugeth, COMM_DIR_RX_AND_TX); - - out_be32(&ug_regs->maccfg2, tempval); - out_be32(&uf_regs->upsmr, upsmr); - - ugeth_enable(ugeth, COMM_DIR_RX_AND_TX); - ugeth_activate(ugeth); - if (!ugeth->oldlink) { new_state = 1; ugeth->oldlink = 1; } + + if (new_state) { + /* + * To change the MAC configuration we need to disable + * the controller. To do so, we have to either grab + * ugeth->lock, which is a bad idea since 'graceful + * stop' commands might take quite a while, or we can + * quiesce driver's activity. + */ + ugeth_quiesce(ugeth); + ugeth_disable(ugeth, COMM_DIR_RX_AND_TX); + + out_be32(&ug_regs->maccfg2, tempval); + out_be32(&uf_regs->upsmr, upsmr); + + ugeth_enable(ugeth, COMM_DIR_RX_AND_TX); + ugeth_activate(ugeth); + } } else if (ugeth->oldlink) { new_state = 1; ugeth->oldlink = 0; @@ -3273,7 +3279,7 @@ static int ucc_geth_tx(struct net_device *dev, u8 txQ) /* Handle the transmitted buffer and release */ /* the BD to be used with the current frame */ - if ((bd == ugeth->txBd[txQ]) && (netif_queue_stopped(dev) == 0)) + if (bd == ugeth->txBd[txQ]) /* queue empty? */ break; dev->stats.tx_packets++; diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c index 593e01f..611b804 100644 --- a/drivers/net/via-rhine.c +++ b/drivers/net/via-rhine.c @@ -102,6 +102,7 @@ static const int multicast_filter_limit = 32; #include #include #include +#include #include /* Processor type for cache alignment. */ #include #include @@ -389,6 +390,7 @@ struct rhine_private { struct net_device *dev; struct napi_struct napi; spinlock_t lock; + struct work_struct reset_task; /* Frequently used values: keep some adjacent for cache effect. */ u32 quirks; @@ -407,6 +409,7 @@ struct rhine_private { static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int location, int value); static int rhine_open(struct net_device *dev); +static void rhine_reset_task(struct work_struct *work); static void rhine_tx_timeout(struct net_device *dev); static netdev_tx_t rhine_start_tx(struct sk_buff *skb, struct net_device *dev); @@ -775,6 +778,8 @@ static int __devinit rhine_init_one(struct pci_dev *pdev, dev->irq = pdev->irq; spin_lock_init(&rp->lock); + INIT_WORK(&rp->reset_task, rhine_reset_task); + rp->mii_if.dev = dev; rp->mii_if.mdio_read = mdio_read; rp->mii_if.mdio_write = mdio_write; @@ -1179,22 +1184,18 @@ static int rhine_open(struct net_device *dev) return 0; } -static void rhine_tx_timeout(struct net_device *dev) +static void rhine_reset_task(struct work_struct *work) { - struct rhine_private *rp = netdev_priv(dev); - void __iomem *ioaddr = rp->base; - - printk(KERN_WARNING "%s: Transmit timed out, status %4.4x, PHY status " - "%4.4x, resetting...\n", - dev->name, ioread16(ioaddr + IntrStatus), - mdio_read(dev, rp->mii_if.phy_id, MII_BMSR)); + struct rhine_private *rp = container_of(work, struct rhine_private, + reset_task); + struct net_device *dev = rp->dev; /* protect against concurrent rx interrupts */ disable_irq(rp->pdev->irq); napi_disable(&rp->napi); - spin_lock(&rp->lock); + spin_lock_bh(&rp->lock); /* clear all descriptors */ free_tbufs(dev); @@ -1206,7 +1207,7 @@ static void rhine_tx_timeout(struct net_device *dev) rhine_chip_reset(dev); init_registers(dev); - spin_unlock(&rp->lock); + spin_unlock_bh(&rp->lock); enable_irq(rp->pdev->irq); dev->trans_start = jiffies; @@ -1214,6 +1215,19 @@ static void rhine_tx_timeout(struct net_device *dev) netif_wake_queue(dev); } +static void rhine_tx_timeout(struct net_device *dev) +{ + struct rhine_private *rp = netdev_priv(dev); + void __iomem *ioaddr = rp->base; + + printk(KERN_WARNING "%s: Transmit timed out, status %4.4x, PHY status " + "%4.4x, resetting...\n", + dev->name, ioread16(ioaddr + IntrStatus), + mdio_read(dev, rp->mii_if.phy_id, MII_BMSR)); + + schedule_work(&rp->reset_task); +} + static netdev_tx_t rhine_start_tx(struct sk_buff *skb, struct net_device *dev) { @@ -1830,10 +1844,11 @@ static int rhine_close(struct net_device *dev) struct rhine_private *rp = netdev_priv(dev); void __iomem *ioaddr = rp->base; - spin_lock_irq(&rp->lock); - - netif_stop_queue(dev); napi_disable(&rp->napi); + cancel_work_sync(&rp->reset_task); + netif_stop_queue(dev); + + spin_lock_irq(&rp->lock); if (debug > 1) printk(KERN_DEBUG "%s: Shutting down ethercard, " diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c index f1c4b2a..0fdfd58 100644 --- a/drivers/net/vxge/vxge-main.c +++ b/drivers/net/vxge/vxge-main.c @@ -4087,21 +4087,21 @@ vxge_probe(struct pci_dev *pdev, const struct pci_device_id *pre) goto _exit0; } - if (!pci_set_dma_mask(pdev, 0xffffffffffffffffULL)) { + if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { vxge_debug_ll_config(VXGE_TRACE, "%s : using 64bit DMA", __func__); high_dma = 1; if (pci_set_consistent_dma_mask(pdev, - 0xffffffffffffffffULL)) { + DMA_BIT_MASK(64))) { vxge_debug_init(VXGE_ERR, "%s : unable to obtain 64bit DMA for " "consistent allocations", __func__); ret = -ENOMEM; goto _exit1; } - } else if (!pci_set_dma_mask(pdev, 0xffffffffUL)) { + } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { vxge_debug_ll_config(VXGE_TRACE, "%s : using 32bit DMA", __func__); } else { diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index a4c086f..e63b7c4 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -1903,17 +1903,6 @@ accept: rxs->noise = sc->ah->ah_noise_floor; rxs->signal = rxs->noise + rs.rs_rssi; - /* An rssi of 35 indicates you should be able use - * 54 Mbps reliably. A more elaborate scheme can be used - * here but it requires a map of SNR/throughput for each - * possible mode used */ - rxs->qual = rs.rs_rssi * 100 / 35; - - /* rssi can be more than 35 though, anything above that - * should be considered at 100% */ - if (rxs->qual > 100) - rxs->qual = 100; - rxs->antenna = rs.rs_antenna; rxs->rate_idx = ath5k_hw_to_driver_rix(sc, rs.rs_rate); rxs->flag |= ath5k_rx_decrypted(sc, ds, skb, &rs); @@ -2381,6 +2370,9 @@ ath5k_init(struct ath5k_softc *sc) */ ath5k_stop_locked(sc); + /* Set PHY calibration interval */ + ah->ah_cal_intval = ath5k_calinterval; + /* * The basic interface to setting the hardware in a good * state is ``reset''. On return the hardware is known to @@ -2408,10 +2400,6 @@ ath5k_init(struct ath5k_softc *sc) /* Set ack to be sent at low bit-rates */ ath5k_hw_set_ack_bitrate_high(ah, false); - - /* Set PHY calibration inteval */ - ah->ah_cal_intval = ath5k_calinterval; - ret = 0; done: mmiowb(); diff --git a/drivers/net/wireless/ath/ath9k/mac.c b/drivers/net/wireless/ath/ath9k/mac.c index 71b84d9..efc420c 100644 --- a/drivers/net/wireless/ath/ath9k/mac.c +++ b/drivers/net/wireless/ath/ath9k/mac.c @@ -186,7 +186,7 @@ bool ath9k_hw_stoptxdma(struct ath_hw *ah, u32 q) wait = wait_time; while (ath9k_hw_numtxpending(ah, q)) { if ((--wait) == 0) { - ath_print(common, ATH_DBG_QUEUE, + ath_print(common, ATH_DBG_FATAL, "Failed to stop TX DMA in 100 " "msec after killing last frame\n"); break; diff --git a/drivers/net/wireless/ath/ath9k/mac.h b/drivers/net/wireless/ath/ath9k/mac.h index 0c87771..e185479 100644 --- a/drivers/net/wireless/ath/ath9k/mac.h +++ b/drivers/net/wireless/ath/ath9k/mac.h @@ -77,6 +77,9 @@ #define ATH9K_TXERR_XTXOP 0x08 #define ATH9K_TXERR_TIMER_EXPIRED 0x10 #define ATH9K_TX_ACKED 0x20 +#define ATH9K_TXERR_MASK \ + (ATH9K_TXERR_XRETRY | ATH9K_TXERR_FILT | ATH9K_TXERR_FIFO | \ + ATH9K_TXERR_XTXOP | ATH9K_TXERR_TIMER_EXPIRED) #define ATH9K_TX_BA 0x01 #define ATH9K_TX_PWRMGMT 0x02 diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index c487434..996eb90 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1973,6 +1973,9 @@ int ath_reset(struct ath_softc *sc, bool retry_tx) struct ieee80211_hw *hw = sc->hw; int r; + /* Stop ANI */ + del_timer_sync(&common->ani.timer); + ath9k_hw_set_interrupts(ah, 0); ath_drain_all_txq(sc, retry_tx); ath_stoprecv(sc); @@ -2014,6 +2017,9 @@ int ath_reset(struct ath_softc *sc, bool retry_tx) } } + /* Start ANI */ + ath_start_ani(common); + return r; } @@ -2508,6 +2514,9 @@ static void ath9k_stop(struct ieee80211_hw *hw) return; /* another wiphy still in use */ } + /* Ensure HW is awake when we try to shut it down. */ + ath9k_ps_wakeup(sc); + if (ah->btcoex_hw.enabled) { ath9k_hw_btcoex_disable(ah); if (ah->btcoex_hw.scheme == ATH_BTCOEX_CFG_3WIRE) @@ -2528,6 +2537,9 @@ static void ath9k_stop(struct ieee80211_hw *hw) /* disable HAL and put h/w to sleep */ ath9k_hw_disable(ah); ath9k_hw_configpcipowersave(ah, 1, 1); + ath9k_ps_restore(sc); + + /* Finally, put the chip in FULL SLEEP mode */ ath9k_setpower(sc, ATH9K_PM_FULL_SLEEP); sc->sc_flags |= SC_OP_INVALID; @@ -2641,8 +2653,10 @@ static void ath9k_remove_interface(struct ieee80211_hw *hw, if ((sc->sc_ah->opmode == NL80211_IFTYPE_AP) || (sc->sc_ah->opmode == NL80211_IFTYPE_ADHOC) || (sc->sc_ah->opmode == NL80211_IFTYPE_MESH_POINT)) { + ath9k_ps_wakeup(sc); ath9k_hw_stoptxdma(sc->sc_ah, sc->beacon.beaconq); ath_beacon_return(sc, avp); + ath9k_ps_restore(sc); } sc->sc_flags &= ~SC_OP_BEACONS; @@ -3091,15 +3105,21 @@ static int ath9k_ampdu_action(struct ieee80211_hw *hw, case IEEE80211_AMPDU_RX_STOP: break; case IEEE80211_AMPDU_TX_START: + ath9k_ps_wakeup(sc); ath_tx_aggr_start(sc, sta, tid, ssn); ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); + ath9k_ps_restore(sc); break; case IEEE80211_AMPDU_TX_STOP: + ath9k_ps_wakeup(sc); ath_tx_aggr_stop(sc, sta, tid); ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); + ath9k_ps_restore(sc); break; case IEEE80211_AMPDU_TX_OPERATIONAL: + ath9k_ps_wakeup(sc); ath_tx_aggr_resume(sc, sta, tid); + ath9k_ps_restore(sc); break; default: ath_print(ath9k_hw_common(sc->sc_ah), ATH_DBG_FATAL, diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c index 5321f73..f7af5ea 100644 --- a/drivers/net/wireless/ath/ath9k/pci.c +++ b/drivers/net/wireless/ath/ath9k/pci.c @@ -96,7 +96,7 @@ static void ath_pci_bt_coex_prep(struct ath_common *common) pci_write_config_byte(pdev, ATH_PCIE_CAP_LINK_CTRL, aspm); } -const static struct ath_bus_ops ath_pci_bus_ops = { +static const struct ath_bus_ops ath_pci_bus_ops = { .read_cachesize = ath_pci_read_cachesize, .cleanup = ath_pci_cleanup, .eeprom_read = ath_pci_eeprom_read, diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 2a11cc5..fa12b90 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -1108,11 +1108,11 @@ void ath_drain_all_txq(struct ath_softc *sc, bool retry_tx) if (npend) { int r; - ath_print(common, ATH_DBG_XMIT, + ath_print(common, ATH_DBG_FATAL, "Unable to stop TxDMA. Reset HAL!\n"); spin_lock_bh(&sc->sc_resetlock); - r = ath9k_hw_reset(ah, sc->sc_ah->curchan, true); + r = ath9k_hw_reset(ah, sc->sc_ah->curchan, false); if (r) ath_print(common, ATH_DBG_FATAL, "Unable to reset hardware; reset status %d\n", @@ -1414,17 +1414,9 @@ static void assign_aggr_tid_seqno(struct sk_buff *skb, * For HT capable stations, we save tidno for later use. * We also override seqno set by upper layer with the one * in tx aggregation state. - * - * If fragmentation is on, the sequence number is - * not overridden, since it has been - * incremented by the fragmentation routine. - * - * FIXME: check if the fragmentation threshold exceeds - * IEEE80211 max. */ tid = ATH_AN_2_TID(an, bf->bf_tidno); - hdr->seq_ctrl = cpu_to_le16(tid->seq_next << - IEEE80211_SEQ_SEQ_SHIFT); + hdr->seq_ctrl = cpu_to_le16(tid->seq_next << IEEE80211_SEQ_SEQ_SHIFT); bf->bf_seqno = tid->seq_next; INCR(tid->seq_next, IEEE80211_SEQ_MAX); } @@ -1636,7 +1628,8 @@ static int ath_tx_setup_buffer(struct ieee80211_hw *hw, struct ath_buf *bf, bf->bf_keyix = ATH9K_TXKEYIX_INVALID; } - if (ieee80211_is_data_qos(fc) && (sc->sc_flags & SC_OP_TXAGGR)) + if (ieee80211_is_data_qos(fc) && bf_isht(bf) && + (sc->sc_flags & SC_OP_TXAGGR)) assign_aggr_tid_seqno(skb, bf); bf->bf_mpdu = skb; @@ -1780,7 +1773,8 @@ void ath_tx_cabq(struct ieee80211_hw *hw, struct sk_buff *skb) struct ath_wiphy *aphy = hw->priv; struct ath_softc *sc = aphy->sc; struct ath_common *common = ath9k_hw_common(sc->sc_ah); - int hdrlen, padsize; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + int padpos, padsize; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ath_tx_control txctl; @@ -1792,7 +1786,6 @@ void ath_tx_cabq(struct ieee80211_hw *hw, struct sk_buff *skb) * BSSes. */ if (info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; if (info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT) sc->tx.seq_no += 0x10; hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG); @@ -1800,9 +1793,9 @@ void ath_tx_cabq(struct ieee80211_hw *hw, struct sk_buff *skb) } /* Add the padding after the header if this is not already done */ - hdrlen = ieee80211_get_hdrlen_from_skb(skb); - if (hdrlen & 3) { - padsize = hdrlen % 4; + padpos = ath9k_cmn_padpos(hdr->frame_control); + padsize = padpos & 3; + if (padsize && skb->len>padpos) { if (skb_headroom(skb) < padsize) { ath_print(common, ATH_DBG_XMIT, "TX CABQ padding failed\n"); @@ -1810,7 +1803,7 @@ void ath_tx_cabq(struct ieee80211_hw *hw, struct sk_buff *skb) return; } skb_push(skb, padsize); - memmove(skb->data, skb->data + padsize, hdrlen); + memmove(skb->data, skb->data + padsize, padpos); } txctl.txq = sc->beacon.cabq; @@ -1838,7 +1831,8 @@ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, struct ieee80211_hw *hw = sc->hw; struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); struct ath_common *common = ath9k_hw_common(sc->sc_ah); - int hdrlen, padsize; + struct ieee80211_hdr * hdr = (struct ieee80211_hdr *)skb->data; + int padpos, padsize; ath_print(common, ATH_DBG_XMIT, "TX complete: skb: %p\n", skb); @@ -1853,14 +1847,14 @@ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, tx_info->flags |= IEEE80211_TX_STAT_ACK; } - hdrlen = ieee80211_get_hdrlen_from_skb(skb); - padsize = hdrlen & 3; - if (padsize && hdrlen >= 24) { + padpos = ath9k_cmn_padpos(hdr->frame_control); + padsize = padpos & 3; + if (padsize && skb->len>padpos+padsize) { /* * Remove MAC header padding before giving the frame back to * mac80211. */ - memmove(skb->data + padsize, skb->data, hdrlen); + memmove(skb->data + padsize, skb->data, padpos); skb_pull(skb, padsize); } @@ -2078,7 +2072,7 @@ static void ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq) &txq->axq_q, lastbf->list.prev); txq->axq_depth--; - txok = !(ds->ds_txstat.ts_status & ATH9K_TXERR_FILT); + txok = !(ds->ds_txstat.ts_status & ATH9K_TXERR_MASK); txq->axq_tx_inprogress = false; spin_unlock_bh(&txq->axq_lock); diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c index 027be27..88d1fd0 100644 --- a/drivers/net/wireless/b43/dma.c +++ b/drivers/net/wireless/b43/dma.c @@ -383,160 +383,44 @@ static inline } } -/* Check if a DMA region fits the device constraints. - * Returns true, if the region is OK for usage with this device. */ -static inline bool b43_dma_address_ok(struct b43_dmaring *ring, - dma_addr_t addr, size_t size) -{ - switch (ring->type) { - case B43_DMA_30BIT: - if ((u64)addr + size > (1ULL << 30)) - return 0; - break; - case B43_DMA_32BIT: - if ((u64)addr + size > (1ULL << 32)) - return 0; - break; - case B43_DMA_64BIT: - /* Currently we can't have addresses beyond - * 64bit in the kernel. */ - break; - } - return 1; -} - -#define is_4k_aligned(addr) (((u64)(addr) & 0x0FFFull) == 0) -#define is_8k_aligned(addr) (((u64)(addr) & 0x1FFFull) == 0) - -static void b43_unmap_and_free_ringmem(struct b43_dmaring *ring, void *base, - dma_addr_t dmaaddr, size_t size) -{ - ssb_dma_unmap_single(ring->dev->dev, dmaaddr, size, DMA_TO_DEVICE); - free_pages((unsigned long)base, get_order(size)); -} - -static void * __b43_get_and_map_ringmem(struct b43_dmaring *ring, - dma_addr_t *dmaaddr, size_t size, - gfp_t gfp_flags) -{ - void *base; - - base = (void *)__get_free_pages(gfp_flags, get_order(size)); - if (!base) - return NULL; - memset(base, 0, size); - *dmaaddr = ssb_dma_map_single(ring->dev->dev, base, size, - DMA_TO_DEVICE); - if (ssb_dma_mapping_error(ring->dev->dev, *dmaaddr)) { - free_pages((unsigned long)base, get_order(size)); - return NULL; - } - - return base; -} - -static void * b43_get_and_map_ringmem(struct b43_dmaring *ring, - dma_addr_t *dmaaddr, size_t size) -{ - void *base; - - base = __b43_get_and_map_ringmem(ring, dmaaddr, size, - GFP_KERNEL); - if (!base) { - b43err(ring->dev->wl, "Failed to allocate or map pages " - "for DMA ringmemory\n"); - return NULL; - } - if (!b43_dma_address_ok(ring, *dmaaddr, size)) { - /* The memory does not fit our device constraints. - * Retry with GFP_DMA set to get lower memory. */ - b43_unmap_and_free_ringmem(ring, base, *dmaaddr, size); - base = __b43_get_and_map_ringmem(ring, dmaaddr, size, - GFP_KERNEL | GFP_DMA); - if (!base) { - b43err(ring->dev->wl, "Failed to allocate or map pages " - "in the GFP_DMA region for DMA ringmemory\n"); - return NULL; - } - if (!b43_dma_address_ok(ring, *dmaaddr, size)) { - b43_unmap_and_free_ringmem(ring, base, *dmaaddr, size); - b43err(ring->dev->wl, "Failed to allocate DMA " - "ringmemory that fits device constraints\n"); - return NULL; - } - } - /* We expect the memory to be 4k aligned, at least. */ - if (B43_WARN_ON(!is_4k_aligned(*dmaaddr))) { - b43_unmap_and_free_ringmem(ring, base, *dmaaddr, size); - return NULL; - } - - return base; -} - static int alloc_ringmemory(struct b43_dmaring *ring) { - unsigned int required; - void *base; - dma_addr_t dmaaddr; - - /* There are several requirements to the descriptor ring memory: - * - The memory region needs to fit the address constraints for the - * device (same as for frame buffers). - * - For 30/32bit DMA devices, the descriptor ring must be 4k aligned. - * - For 64bit DMA devices, the descriptor ring must be 8k aligned. + gfp_t flags = GFP_KERNEL; + + /* The specs call for 4K buffers for 30- and 32-bit DMA with 4K + * alignment and 8K buffers for 64-bit DMA with 8K alignment. Testing + * has shown that 4K is sufficient for the latter as long as the buffer + * does not cross an 8K boundary. + * + * For unknown reasons - possibly a hardware error - the BCM4311 rev + * 02, which uses 64-bit DMA, needs the ring buffer in very low memory, + * which accounts for the GFP_DMA flag below. + * + * The flags here must match the flags in free_ringmemory below! */ - if (ring->type == B43_DMA_64BIT) - required = ring->nr_slots * sizeof(struct b43_dmadesc64); - else - required = ring->nr_slots * sizeof(struct b43_dmadesc32); - if (B43_WARN_ON(required > 0x1000)) + flags |= GFP_DMA; + ring->descbase = ssb_dma_alloc_consistent(ring->dev->dev, + B43_DMA_RINGMEMSIZE, + &(ring->dmabase), flags); + if (!ring->descbase) { + b43err(ring->dev->wl, "DMA ringmemory allocation failed\n"); return -ENOMEM; - - ring->alloc_descsize = 0x1000; - base = b43_get_and_map_ringmem(ring, &dmaaddr, ring->alloc_descsize); - if (!base) - return -ENOMEM; - ring->alloc_descbase = base; - ring->alloc_dmabase = dmaaddr; - - if ((ring->type != B43_DMA_64BIT) || is_8k_aligned(dmaaddr)) { - /* We're on <=32bit DMA, or we already got 8k aligned memory. - * That's all we need, so we're fine. */ - ring->descbase = base; - ring->dmabase = dmaaddr; - return 0; - } - b43_unmap_and_free_ringmem(ring, base, dmaaddr, ring->alloc_descsize); - - /* Ok, we failed at the 8k alignment requirement. - * Try to force-align the memory region now. */ - ring->alloc_descsize = 0x2000; - base = b43_get_and_map_ringmem(ring, &dmaaddr, ring->alloc_descsize); - if (!base) - return -ENOMEM; - ring->alloc_descbase = base; - ring->alloc_dmabase = dmaaddr; - - if (is_8k_aligned(dmaaddr)) { - /* We're already 8k aligned. That Ok, too. */ - ring->descbase = base; - ring->dmabase = dmaaddr; - return 0; } - /* Force-align it to 8k */ - ring->descbase = (void *)((u8 *)base + 0x1000); - ring->dmabase = dmaaddr + 0x1000; - B43_WARN_ON(!is_8k_aligned(ring->dmabase)); + memset(ring->descbase, 0, B43_DMA_RINGMEMSIZE); return 0; } static void free_ringmemory(struct b43_dmaring *ring) { - b43_unmap_and_free_ringmem(ring, ring->alloc_descbase, - ring->alloc_dmabase, ring->alloc_descsize); + gfp_t flags = GFP_KERNEL; + + if (ring->type == B43_DMA_64BIT) + flags |= GFP_DMA; + + ssb_dma_free_consistent(ring->dev->dev, B43_DMA_RINGMEMSIZE, + ring->descbase, ring->dmabase, flags); } /* Reset the RX DMA channel */ @@ -646,14 +530,29 @@ static bool b43_dma_mapping_error(struct b43_dmaring *ring, if (unlikely(ssb_dma_mapping_error(ring->dev->dev, addr))) return 1; - if (!b43_dma_address_ok(ring, addr, buffersize)) { - /* We can't support this address. Unmap it again. */ - unmap_descbuffer(ring, addr, buffersize, dma_to_device); - return 1; + switch (ring->type) { + case B43_DMA_30BIT: + if ((u64)addr + buffersize > (1ULL << 30)) + goto address_error; + break; + case B43_DMA_32BIT: + if ((u64)addr + buffersize > (1ULL << 32)) + goto address_error; + break; + case B43_DMA_64BIT: + /* Currently we can't have addresses beyond + * 64bit in the kernel. */ + break; } /* The address is OK. */ return 0; + +address_error: + /* We can't support this address. Unmap it again. */ + unmap_descbuffer(ring, addr, buffersize, dma_to_device); + + return 1; } static bool b43_rx_buffer_is_poisoned(struct b43_dmaring *ring, struct sk_buff *skb) @@ -715,9 +614,6 @@ static int setup_rx_descbuffer(struct b43_dmaring *ring, meta->dmaaddr = dmaaddr; ring->ops->fill_descriptor(ring, desc, dmaaddr, ring->rx_buffersize, 0, 0, 0); - ssb_dma_sync_single_for_device(ring->dev->dev, - ring->alloc_dmabase, - ring->alloc_descsize, DMA_TO_DEVICE); return 0; } @@ -1354,9 +1250,6 @@ static int dma_tx_fragment(struct b43_dmaring *ring, } /* Now transfer the whole frame. */ wmb(); - ssb_dma_sync_single_for_device(ring->dev->dev, - ring->alloc_dmabase, - ring->alloc_descsize, DMA_TO_DEVICE); ops->poke_tx(ring, next_slot(ring, slot)); return 0; diff --git a/drivers/net/wireless/b43/dma.h b/drivers/net/wireless/b43/dma.h index e607b39..f7ab37c 100644 --- a/drivers/net/wireless/b43/dma.h +++ b/drivers/net/wireless/b43/dma.h @@ -157,6 +157,7 @@ struct b43_dmadesc_generic { } __attribute__ ((__packed__)); /* Misc DMA constants */ +#define B43_DMA_RINGMEMSIZE PAGE_SIZE #define B43_DMA0_RX_FRAMEOFFSET 30 /* DMA engine tuning knobs */ @@ -246,12 +247,6 @@ struct b43_dmaring { /* The QOS priority assigned to this ring. Only used for TX rings. * This is the mac80211 "queue" value. */ u8 queue_prio; - /* Pointers and size of the originally allocated and mapped memory - * region for the descriptor ring. */ - void *alloc_descbase; - dma_addr_t alloc_dmabase; - unsigned int alloc_descsize; - /* Pointer to our wireless device. */ struct b43_wldev *dev; #ifdef CONFIG_B43_DEBUG /* Maximum number of used slots. */ diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c index 7da1dab..234891d 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945.c @@ -681,19 +681,13 @@ static void iwl3945_rx_reply_rx(struct iwl_priv *priv, snr = rx_stats_sig_avg / rx_stats_noise_diff; rx_status.noise = rx_status.signal - iwl3945_calc_db_from_ratio(snr); - rx_status.qual = iwl3945_calc_sig_qual(rx_status.signal, - rx_status.noise); - - /* If noise info not available, calculate signal quality indicator (%) - * using just the dBm signal level. */ } else { rx_status.noise = priv->last_rx_noise; - rx_status.qual = iwl3945_calc_sig_qual(rx_status.signal, 0); } - IWL_DEBUG_STATS(priv, "Rssi %d noise %d qual %d sig_avg %d noise_diff %d\n", - rx_status.signal, rx_status.noise, rx_status.qual, + IWL_DEBUG_STATS(priv, "Rssi %d noise %d sig_avg %d noise_diff %d\n", + rx_status.signal, rx_status.noise, rx_stats_sig_avg, rx_stats_noise_diff); header = (struct ieee80211_hdr *)IWL_RX_DATA(pkt); @@ -1835,8 +1829,7 @@ static int iwl3945_send_rxon_assoc(struct iwl_priv *priv) rc = -EIO; } - priv->alloc_rxb_page--; - free_pages(cmd.reply_page, priv->hw_params.rx_page_order); + iwl_free_pages(priv, cmd.reply_page); return rc; } @@ -2836,6 +2829,7 @@ static struct iwl_cfg iwl3945_bg_cfg = { .use_isr_legacy = true, .ht_greenfield_support = false, .led_compensation = 64, + .broken_powersave = true, }; static struct iwl_cfg iwl3945_abg_cfg = { @@ -2852,6 +2846,7 @@ static struct iwl_cfg iwl3945_abg_cfg = { .use_isr_legacy = true, .ht_greenfield_support = false, .led_compensation = 64, + .broken_powersave = true, }; struct pci_device_id iwl3945_hw_card_ids[] = { diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.h b/drivers/net/wireless/iwlwifi/iwl-3945.h index ecc23ec..531fa12 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.h +++ b/drivers/net/wireless/iwlwifi/iwl-3945.h @@ -222,7 +222,6 @@ struct iwl3945_ibss_seq { * *****************************************************************************/ extern int iwl3945_calc_db_from_ratio(int sig_ratio); -extern int iwl3945_calc_sig_qual(int rssi_dbm, int noise_dbm); extern void iwl3945_rx_replenish(void *data); extern void iwl3945_rx_queue_reset(struct iwl_priv *priv, struct iwl_rx_queue *rxq); extern unsigned int iwl3945_fill_beacon_frame(struct iwl_priv *priv, diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c index 386513b..484c5fd 100644 --- a/drivers/net/wireless/iwlwifi/iwl-4965.c +++ b/drivers/net/wireless/iwlwifi/iwl-4965.c @@ -1204,7 +1204,7 @@ static int iwl4965_fill_txpower_tbl(struct iwl_priv *priv, u8 band, u16 channel, iwl4965_interpolate_chan(priv, channel, &ch_eeprom_info); /* calculate tx gain adjustment based on power supply voltage */ - voltage = priv->calib_info->voltage; + voltage = le16_to_cpu(priv->calib_info->voltage); init_voltage = (s32)le32_to_cpu(priv->card_alive_init.voltage); voltage_compensation = iwl4965_get_voltage_compensation(voltage, init_voltage); diff --git a/drivers/net/wireless/iwlwifi/iwl-5000-hw.h b/drivers/net/wireless/iwlwifi/iwl-5000-hw.h index 4ef6804..bc056e9 100644 --- a/drivers/net/wireless/iwlwifi/iwl-5000-hw.h +++ b/drivers/net/wireless/iwlwifi/iwl-5000-hw.h @@ -92,11 +92,15 @@ static inline s32 iwl_temp_calib_to_offset(struct iwl_priv *priv) { - u16 *temp_calib = (u16 *)iwl_eeprom_query_addr(priv, - EEPROM_5000_TEMPERATURE); - /* offset = temperature - voltage / coef */ - s32 offset = (s32)(temp_calib[0] - temp_calib[1] / IWL_5150_VOLTAGE_TO_TEMPERATURE_COEFF); - return offset; + u16 temperature, voltage; + __le16 *temp_calib = + (__le16 *)iwl_eeprom_query_addr(priv, EEPROM_5000_TEMPERATURE); + + temperature = le16_to_cpu(temp_calib[0]); + voltage = le16_to_cpu(temp_calib[1]); + + /* offset = temp - volt / coeff */ + return (s32)(temperature - voltage / IWL_5150_VOLTAGE_TO_TEMPERATURE_COEFF); } /* Fixed (non-configurable) rx data from phy */ diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c index e2f8615..33a5866 100644 --- a/drivers/net/wireless/iwlwifi/iwl-5000.c +++ b/drivers/net/wireless/iwlwifi/iwl-5000.c @@ -333,14 +333,15 @@ static void iwl5000_set_ct_threshold(struct iwl_priv *priv) static int iwl5000_set_Xtal_calib(struct iwl_priv *priv) { struct iwl_calib_xtal_freq_cmd cmd; - u16 *xtal_calib = (u16 *)iwl_eeprom_query_addr(priv, EEPROM_5000_XTAL); + __le16 *xtal_calib = + (__le16 *)iwl_eeprom_query_addr(priv, EEPROM_5000_XTAL); cmd.hdr.op_code = IWL_PHY_CALIBRATE_CRYSTAL_FRQ_CMD; cmd.hdr.first_group = 0; cmd.hdr.groups_num = 1; cmd.hdr.data_valid = 1; - cmd.cap_pin1 = (u8)xtal_calib[0]; - cmd.cap_pin2 = (u8)xtal_calib[1]; + cmd.cap_pin1 = le16_to_cpu(xtal_calib[0]); + cmd.cap_pin2 = le16_to_cpu(xtal_calib[1]); return iwl_calib_set(&priv->calib_results[IWL_CALIB_XTAL], (u8 *)&cmd, sizeof(cmd)); } diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c index fe511cb..b93e491 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c @@ -150,7 +150,7 @@ static s32 expected_tpt_mimo3_40MHz[4][IWL_RATE_COUNT] = { }; /* mbps, mcs */ -const static struct iwl_rate_mcs_info iwl_rate_mcs[IWL_RATE_COUNT] = { +static const struct iwl_rate_mcs_info iwl_rate_mcs[IWL_RATE_COUNT] = { { "1", "BPSK DSSS"}, { "2", "QPSK DSSS"}, {"5.5", "BPSK CCK"}, diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index b8377ef..1c9866d 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -1842,7 +1842,7 @@ void iwl_dump_nic_event_log(struct iwl_priv *priv, bool full_log) } #ifdef CONFIG_IWLWIFI_DEBUG - if (!(iwl_get_debug_level(priv) & IWL_DL_FW_ERRORS)) + if (!(iwl_get_debug_level(priv) & IWL_DL_FW_ERRORS) && !full_log) size = (size > DEFAULT_DUMP_EVENT_LOG_ENTRIES) ? DEFAULT_DUMP_EVENT_LOG_ENTRIES : size; #else @@ -3173,7 +3173,6 @@ static int iwl_init_drv(struct iwl_priv *priv) priv->ibss_beacon = NULL; - spin_lock_init(&priv->lock); spin_lock_init(&priv->sta_lock); spin_lock_init(&priv->hcmd_lock); @@ -3361,10 +3360,11 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) (unsigned long long) pci_resource_len(pdev, 0)); IWL_DEBUG_INFO(priv, "pci_resource_base = %p\n", priv->hw_base); - /* this spin lock will be used in apm_ops.init and EEPROM access + /* these spin locks will be used in apm_ops.init and EEPROM access * we should init now */ spin_lock_init(&priv->reg_lock); + spin_lock_init(&priv->lock); iwl_hw_detect(priv); IWL_INFO(priv, "Detected Intel Wireless WiFi Link %s REV=0x%X\n", priv->cfg->name, priv->hw_rev); diff --git a/drivers/net/wireless/iwlwifi/iwl-csr.h b/drivers/net/wireless/iwlwifi/iwl-csr.h index a7bfae0..1ec8cb4 100644 --- a/drivers/net/wireless/iwlwifi/iwl-csr.h +++ b/drivers/net/wireless/iwlwifi/iwl-csr.h @@ -77,8 +77,7 @@ * The MAC (uCode processor, etc.) does not need to be powered up for accessing * the CSR registers. * - * NOTE: Newer devices using one-time-programmable (OTP) memory - * require device to be awake in order to read this memory + * NOTE: Device does need to be awake in order to read this memory * via CSR_EEPROM and CSR_OTP registers */ #define CSR_BASE (0x000) @@ -111,9 +110,8 @@ /* * EEPROM and OTP (one-time-programmable) memory reads * - * NOTE: For (newer) devices using OTP, device must be awake, initialized via - * apm_ops.init() in order to read. Older devices (3945/4965/5000) - * use EEPROM and do not require this. + * NOTE: Device must be awake, initialized via apm_ops.init(), + * in order to read. */ #define CSR_EEPROM_REG (CSR_BASE+0x02c) #define CSR_EEPROM_GP (CSR_BASE+0x030) diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h index 2673e9a..165d1f6 100644 --- a/drivers/net/wireless/iwlwifi/iwl-dev.h +++ b/drivers/net/wireless/iwlwifi/iwl-dev.h @@ -1168,7 +1168,7 @@ struct iwl_priv { u32 last_beacon_time; u64 last_tsf; - /* eeprom */ + /* eeprom -- this is in the card's little endian byte order */ u8 *eeprom; int nvm_device_type; struct iwl_eeprom_calib_info *calib_info; @@ -1353,4 +1353,15 @@ static inline int is_channel_ibss(const struct iwl_channel_info *ch) return ((ch->flags & EEPROM_CHANNEL_IBSS)) ? 1 : 0; } +static inline void __iwl_free_pages(struct iwl_priv *priv, struct page *page) +{ + __free_pages(page, priv->hw_params.rx_page_order); + priv->alloc_rxb_page--; +} + +static inline void iwl_free_pages(struct iwl_priv *priv, unsigned long page) +{ + free_pages(page, priv->hw_params.rx_page_order); + priv->alloc_rxb_page--; +} #endif /* __iwl_dev_h__ */ diff --git a/drivers/net/wireless/iwlwifi/iwl-eeprom.c b/drivers/net/wireless/iwlwifi/iwl-eeprom.c index 3946e5c..4a30969 100644 --- a/drivers/net/wireless/iwlwifi/iwl-eeprom.c +++ b/drivers/net/wireless/iwlwifi/iwl-eeprom.c @@ -370,7 +370,7 @@ static int iwl_init_otp_access(struct iwl_priv *priv) return ret; } -static int iwl_read_otp_word(struct iwl_priv *priv, u16 addr, u16 *eeprom_data) +static int iwl_read_otp_word(struct iwl_priv *priv, u16 addr, __le16 *eeprom_data) { int ret = 0; u32 r; @@ -404,7 +404,7 @@ static int iwl_read_otp_word(struct iwl_priv *priv, u16 addr, u16 *eeprom_data) CSR_OTP_GP_REG_ECC_CORR_STATUS_MSK); IWL_ERR(priv, "Correctable OTP ECC error, continue read\n"); } - *eeprom_data = le16_to_cpu((__force __le16)(r >> 16)); + *eeprom_data = cpu_to_le16(r >> 16); return 0; } @@ -413,7 +413,8 @@ static int iwl_read_otp_word(struct iwl_priv *priv, u16 addr, u16 *eeprom_data) */ static bool iwl_is_otp_empty(struct iwl_priv *priv) { - u16 next_link_addr = 0, link_value; + u16 next_link_addr = 0; + __le16 link_value; bool is_empty = false; /* locate the beginning of OTP link list */ @@ -443,7 +444,8 @@ static bool iwl_is_otp_empty(struct iwl_priv *priv) static int iwl_find_otp_image(struct iwl_priv *priv, u16 *validblockaddr) { - u16 next_link_addr = 0, link_value = 0, valid_addr; + u16 next_link_addr = 0, valid_addr; + __le16 link_value = 0; int usedblocks = 0; /* set addressing mode to absolute to traverse the link list */ @@ -463,7 +465,7 @@ static int iwl_find_otp_image(struct iwl_priv *priv, * check for more block on the link list */ valid_addr = next_link_addr; - next_link_addr = link_value * sizeof(u16); + next_link_addr = le16_to_cpu(link_value) * sizeof(u16); IWL_DEBUG_INFO(priv, "OTP blocks %d addr 0x%x\n", usedblocks, next_link_addr); if (iwl_read_otp_word(priv, next_link_addr, &link_value)) @@ -497,7 +499,7 @@ static int iwl_find_otp_image(struct iwl_priv *priv, */ int iwl_eeprom_init(struct iwl_priv *priv) { - u16 *e; + __le16 *e; u32 gp = iwl_read32(priv, CSR_EEPROM_GP); int sz; int ret; @@ -516,12 +518,9 @@ int iwl_eeprom_init(struct iwl_priv *priv) ret = -ENOMEM; goto alloc_err; } - e = (u16 *)priv->eeprom; + e = (__le16 *)priv->eeprom; - if (priv->nvm_device_type == NVM_DEVICE_TYPE_OTP) { - /* OTP reads require powered-up chip */ - priv->cfg->ops->lib->apm_ops.init(priv); - } + priv->cfg->ops->lib->apm_ops.init(priv); ret = priv->cfg->ops->lib->eeprom_ops.verify_signature(priv); if (ret < 0) { @@ -562,7 +561,7 @@ int iwl_eeprom_init(struct iwl_priv *priv) } for (addr = validblockaddr; addr < validblockaddr + sz; addr += sizeof(u16)) { - u16 eeprom_data; + __le16 eeprom_data; ret = iwl_read_otp_word(priv, addr, &eeprom_data); if (ret) @@ -570,13 +569,6 @@ int iwl_eeprom_init(struct iwl_priv *priv) e[cache_addr / 2] = eeprom_data; cache_addr += sizeof(u16); } - - /* - * Now that OTP reads are complete, reset chip to save - * power until we load uCode during "up". - */ - priv->cfg->ops->lib->apm_ops.stop(priv); - } else { /* eeprom is an array of 16bit values */ for (addr = 0; addr < sz; addr += sizeof(u16)) { @@ -594,7 +586,7 @@ int iwl_eeprom_init(struct iwl_priv *priv) goto done; } r = _iwl_read_direct32(priv, CSR_EEPROM_REG); - e[addr / 2] = le16_to_cpu((__force __le16)(r >> 16)); + e[addr / 2] = cpu_to_le16(r >> 16); } } ret = 0; @@ -603,6 +595,8 @@ done: err: if (ret) iwl_eeprom_free(priv); + /* Reset chip to save power until we load uCode during "up". */ + priv->cfg->ops->lib->apm_ops.stop(priv); alloc_err: return ret; } @@ -755,7 +749,8 @@ static int iwl_mod_ht40_chan_info(struct iwl_priv *priv, ch_info->ht40_eeprom = *eeprom_ch; ch_info->ht40_max_power_avg = eeprom_ch->max_power_avg; ch_info->ht40_flags = eeprom_ch->flags; - ch_info->ht40_extension_channel &= ~clear_ht40_extension_channel; + if (eeprom_ch->flags & EEPROM_CHANNEL_VALID) + ch_info->ht40_extension_channel &= ~clear_ht40_extension_channel; return 0; } diff --git a/drivers/net/wireless/iwlwifi/iwl-eeprom.h b/drivers/net/wireless/iwlwifi/iwl-eeprom.h index 5cd2b66..0cd9c02 100644 --- a/drivers/net/wireless/iwlwifi/iwl-eeprom.h +++ b/drivers/net/wireless/iwlwifi/iwl-eeprom.h @@ -137,7 +137,7 @@ struct iwl_eeprom_channel { * */ struct iwl_eeprom_enhanced_txpwr { - u16 common; + __le16 common; s8 chain_a_max; s8 chain_b_max; s8 chain_c_max; @@ -360,7 +360,7 @@ struct iwl_eeprom_calib_subband_info { struct iwl_eeprom_calib_info { u8 saturation_power24; /* half-dBm (e.g. "34" = 17 dBm) */ u8 saturation_power52; /* half-dBm */ - s16 voltage; /* signed */ + __le16 voltage; /* signed */ struct iwl_eeprom_calib_subband_info band_info[EEPROM_TX_POWER_BANDS]; } __attribute__ ((packed)); diff --git a/drivers/net/wireless/iwlwifi/iwl-hcmd.c b/drivers/net/wireless/iwlwifi/iwl-hcmd.c index a231659..30e9ea6 100644 --- a/drivers/net/wireless/iwlwifi/iwl-hcmd.c +++ b/drivers/net/wireless/iwlwifi/iwl-hcmd.c @@ -234,7 +234,7 @@ cancel: } fail: if (cmd->reply_page) { - free_pages(cmd->reply_page, priv->hw_params.rx_page_order); + iwl_free_pages(priv, cmd->reply_page); cmd->reply_page = 0; } out: diff --git a/drivers/net/wireless/iwlwifi/iwl-rx.c b/drivers/net/wireless/iwlwifi/iwl-rx.c index 6090bc1..6f36b6e 100644 --- a/drivers/net/wireless/iwlwifi/iwl-rx.c +++ b/drivers/net/wireless/iwlwifi/iwl-rx.c @@ -345,10 +345,8 @@ void iwl_rx_queue_free(struct iwl_priv *priv, struct iwl_rx_queue *rxq) pci_unmap_page(priv->pci_dev, rxq->pool[i].page_dma, PAGE_SIZE << priv->hw_params.rx_page_order, PCI_DMA_FROMDEVICE); - __free_pages(rxq->pool[i].page, - priv->hw_params.rx_page_order); + __iwl_free_pages(priv, rxq->pool[i].page); rxq->pool[i].page = NULL; - priv->alloc_rxb_page--; } } @@ -416,9 +414,7 @@ void iwl_rx_queue_reset(struct iwl_priv *priv, struct iwl_rx_queue *rxq) pci_unmap_page(priv->pci_dev, rxq->pool[i].page_dma, PAGE_SIZE << priv->hw_params.rx_page_order, PCI_DMA_FROMDEVICE); - priv->alloc_rxb_page--; - __free_pages(rxq->pool[i].page, - priv->hw_params.rx_page_order); + __iwl_free_pages(priv, rxq->pool[i].page); rxq->pool[i].page = NULL; } list_add_tail(&rxq->pool[i].list, &rxq->rx_used); @@ -654,47 +650,6 @@ void iwl_reply_statistics(struct iwl_priv *priv, } EXPORT_SYMBOL(iwl_reply_statistics); -#define PERFECT_RSSI (-20) /* dBm */ -#define WORST_RSSI (-95) /* dBm */ -#define RSSI_RANGE (PERFECT_RSSI - WORST_RSSI) - -/* Calculate an indication of rx signal quality (a percentage, not dBm!). - * See http://www.ces.clemson.edu/linux/signal_quality.shtml for info - * about formulas used below. */ -static int iwl_calc_sig_qual(int rssi_dbm, int noise_dbm) -{ - int sig_qual; - int degradation = PERFECT_RSSI - rssi_dbm; - - /* If we get a noise measurement, use signal-to-noise ratio (SNR) - * as indicator; formula is (signal dbm - noise dbm). - * SNR at or above 40 is a great signal (100%). - * Below that, scale to fit SNR of 0 - 40 dB within 0 - 100% indicator. - * Weakest usable signal is usually 10 - 15 dB SNR. */ - if (noise_dbm) { - if (rssi_dbm - noise_dbm >= 40) - return 100; - else if (rssi_dbm < noise_dbm) - return 0; - sig_qual = ((rssi_dbm - noise_dbm) * 5) / 2; - - /* Else use just the signal level. - * This formula is a least squares fit of data points collected and - * compared with a reference system that had a percentage (%) display - * for signal quality. */ - } else - sig_qual = (100 * (RSSI_RANGE * RSSI_RANGE) - degradation * - (15 * RSSI_RANGE + 62 * degradation)) / - (RSSI_RANGE * RSSI_RANGE); - - if (sig_qual > 100) - sig_qual = 100; - else if (sig_qual < 1) - sig_qual = 0; - - return sig_qual; -} - /* Calc max signal level (dBm) among 3 possible receivers */ static inline int iwl_calc_rssi(struct iwl_priv *priv, struct iwl_rx_phy_res *rx_resp) @@ -1105,11 +1060,8 @@ void iwl_rx_reply_rx(struct iwl_priv *priv, if (iwl_is_associated(priv) && !test_bit(STATUS_SCANNING, &priv->status)) { rx_status.noise = priv->last_rx_noise; - rx_status.qual = iwl_calc_sig_qual(rx_status.signal, - rx_status.noise); } else { rx_status.noise = IWL_NOISE_MEAS_NOT_AVAILABLE; - rx_status.qual = iwl_calc_sig_qual(rx_status.signal, 0); } /* Reset beacon noise level if not associated. */ @@ -1122,8 +1074,8 @@ void iwl_rx_reply_rx(struct iwl_priv *priv, iwl_dbg_report_frame(priv, phy_res, len, header, 1); #endif iwl_dbg_log_rx_data_frame(priv, len, header); - IWL_DEBUG_STATS_LIMIT(priv, "Rssi %d, noise %d, qual %d, TSF %llu\n", - rx_status.signal, rx_status.noise, rx_status.qual, + IWL_DEBUG_STATS_LIMIT(priv, "Rssi %d, noise %d, TSF %llu\n", + rx_status.signal, rx_status.noise, (unsigned long long)rx_status.mactime); /* diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c index a2b2b83..fa1c89b 100644 --- a/drivers/net/wireless/iwlwifi/iwl-scan.c +++ b/drivers/net/wireless/iwlwifi/iwl-scan.c @@ -144,8 +144,7 @@ static int iwl_send_scan_abort(struct iwl_priv *priv) clear_bit(STATUS_SCAN_HW, &priv->status); } - priv->alloc_rxb_page--; - free_pages(cmd.reply_page, priv->hw_params.rx_page_order); + iwl_free_pages(priv, cmd.reply_page); return ret; } diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c index cd6a690..cde09a8 100644 --- a/drivers/net/wireless/iwlwifi/iwl-sta.c +++ b/drivers/net/wireless/iwlwifi/iwl-sta.c @@ -164,9 +164,7 @@ int iwl_send_add_sta(struct iwl_priv *priv, break; } } - - priv->alloc_rxb_page--; - free_pages(cmd.reply_page, priv->hw_params.rx_page_order); + iwl_free_pages(priv, cmd.reply_page); return ret; } @@ -391,9 +389,7 @@ static int iwl_send_remove_station(struct iwl_priv *priv, const u8 *addr, break; } } - - priv->alloc_rxb_page--; - free_pages(cmd.reply_page, priv->hw_params.rx_page_order); + iwl_free_pages(priv, cmd.reply_page); return ret; } diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index 00da5e1..87ce2bd 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -407,13 +407,14 @@ void iwl_hw_txq_ctx_free(struct iwl_priv *priv) int txq_id; /* Tx queues */ - if (priv->txq) + if (priv->txq) { for (txq_id = 0; txq_id < priv->hw_params.max_txq_num; txq_id++) if (txq_id == IWL_CMD_QUEUE_NUM) iwl_cmd_queue_free(priv); else iwl_tx_queue_free(priv, txq_id); + } iwl_free_dma_ptr(priv, &priv->kw); iwl_free_dma_ptr(priv, &priv->scd_bc_tbls); diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 2a28a1f..f8e4e4b 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -548,6 +548,9 @@ static int iwl3945_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) txq = &priv->txq[txq_id]; q = &txq->q; + if ((iwl_queue_space(q) < q->high_mark)) + goto drop; + spin_lock_irqsave(&priv->lock, flags); idx = get_cmd_index(q, q->write_ptr, 0); @@ -812,7 +815,7 @@ static int iwl3945_get_measurement(struct iwl_priv *priv, break; } - free_pages(cmd.reply_page, priv->hw_params.rx_page_order); + iwl_free_pages(priv, cmd.reply_page); return rc; } @@ -1198,9 +1201,7 @@ void iwl3945_rx_queue_reset(struct iwl_priv *priv, struct iwl_rx_queue *rxq) pci_unmap_page(priv->pci_dev, rxq->pool[i].page_dma, PAGE_SIZE << priv->hw_params.rx_page_order, PCI_DMA_FROMDEVICE); - priv->alloc_rxb_page--; - __free_pages(rxq->pool[i].page, - priv->hw_params.rx_page_order); + __iwl_free_pages(priv, rxq->pool[i].page); rxq->pool[i].page = NULL; } list_add_tail(&rxq->pool[i].list, &rxq->rx_used); @@ -1247,10 +1248,8 @@ static void iwl3945_rx_queue_free(struct iwl_priv *priv, struct iwl_rx_queue *rx pci_unmap_page(priv->pci_dev, rxq->pool[i].page_dma, PAGE_SIZE << priv->hw_params.rx_page_order, PCI_DMA_FROMDEVICE); - __free_pages(rxq->pool[i].page, - priv->hw_params.rx_page_order); + __iwl_free_pages(priv, rxq->pool[i].page); rxq->pool[i].page = NULL; - priv->alloc_rxb_page--; } } @@ -1300,47 +1299,6 @@ int iwl3945_calc_db_from_ratio(int sig_ratio) return (int)ratio2dB[sig_ratio]; } -#define PERFECT_RSSI (-20) /* dBm */ -#define WORST_RSSI (-95) /* dBm */ -#define RSSI_RANGE (PERFECT_RSSI - WORST_RSSI) - -/* Calculate an indication of rx signal quality (a percentage, not dBm!). - * See http://www.ces.clemson.edu/linux/signal_quality.shtml for info - * about formulas used below. */ -int iwl3945_calc_sig_qual(int rssi_dbm, int noise_dbm) -{ - int sig_qual; - int degradation = PERFECT_RSSI - rssi_dbm; - - /* If we get a noise measurement, use signal-to-noise ratio (SNR) - * as indicator; formula is (signal dbm - noise dbm). - * SNR at or above 40 is a great signal (100%). - * Below that, scale to fit SNR of 0 - 40 dB within 0 - 100% indicator. - * Weakest usable signal is usually 10 - 15 dB SNR. */ - if (noise_dbm) { - if (rssi_dbm - noise_dbm >= 40) - return 100; - else if (rssi_dbm < noise_dbm) - return 0; - sig_qual = ((rssi_dbm - noise_dbm) * 5) / 2; - - /* Else use just the signal level. - * This formula is a least squares fit of data points collected and - * compared with a reference system that had a percentage (%) display - * for signal quality. */ - } else - sig_qual = (100 * (RSSI_RANGE * RSSI_RANGE) - degradation * - (15 * RSSI_RANGE + 62 * degradation)) / - (RSSI_RANGE * RSSI_RANGE); - - if (sig_qual > 100) - sig_qual = 100; - else if (sig_qual < 1) - sig_qual = 0; - - return sig_qual; -} - /** * iwl3945_rx_handle - Main entry function for receiving responses from uCode * @@ -1688,7 +1646,7 @@ void iwl3945_dump_nic_event_log(struct iwl_priv *priv, bool full_log) } #ifdef CONFIG_IWLWIFI_DEBUG - if (!(iwl_get_debug_level(priv) & IWL_DL_FW_ERRORS)) + if (!(iwl_get_debug_level(priv) & IWL_DL_FW_ERRORS) && !full_log) size = (size > DEFAULT_IWL3945_DUMP_EVENT_LOG_ENTRIES) ? DEFAULT_IWL3945_DUMP_EVENT_LOG_ENTRIES : size; #else @@ -3867,7 +3825,6 @@ static int iwl3945_init_drv(struct iwl_priv *priv) priv->retry_rate = 1; priv->ibss_beacon = NULL; - spin_lock_init(&priv->lock); spin_lock_init(&priv->sta_lock); spin_lock_init(&priv->hcmd_lock); @@ -3936,9 +3893,11 @@ static int iwl3945_setup_mac(struct iwl_priv *priv) /* Tell mac80211 our characteristics */ hw->flags = IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_NOISE_DBM | - IEEE80211_HW_SPECTRUM_MGMT | - IEEE80211_HW_SUPPORTS_PS | - IEEE80211_HW_SUPPORTS_DYNAMIC_PS; + IEEE80211_HW_SPECTRUM_MGMT; + + if (!priv->cfg->broken_powersave) + hw->flags |= IEEE80211_HW_SUPPORTS_PS | + IEEE80211_HW_SUPPORTS_DYNAMIC_PS; hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | @@ -4057,10 +4016,11 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e * PCI Tx retries from interfering with C3 CPU state */ pci_write_config_byte(pdev, 0x41, 0x00); - /* this spin lock will be used in apm_ops.init and EEPROM access + /* these spin locks will be used in apm_ops.init and EEPROM access * we should init now */ spin_lock_init(&priv->reg_lock); + spin_lock_init(&priv->lock); /*********************** * 4. Read EEPROM diff --git a/drivers/net/wireless/iwmc3200wifi/iwm.h b/drivers/net/wireless/iwmc3200wifi/iwm.h index 5a26bb0..8428111 100644 --- a/drivers/net/wireless/iwmc3200wifi/iwm.h +++ b/drivers/net/wireless/iwmc3200wifi/iwm.h @@ -268,7 +268,7 @@ struct iwm_priv { struct sk_buff_head rx_list; struct list_head rx_tickets; - struct list_head rx_packets[IWM_RX_ID_HASH]; + struct list_head rx_packets[IWM_RX_ID_HASH + 1]; struct workqueue_struct *rx_wq; struct work_struct rx_worker; @@ -349,7 +349,7 @@ int iwm_up(struct iwm_priv *iwm); int iwm_down(struct iwm_priv *iwm); /* TX API */ -u16 iwm_tid_to_queue(u16 tid); +int iwm_tid_to_queue(u16 tid); void iwm_tx_credit_inc(struct iwm_priv *iwm, int id, int total_freed_pages); void iwm_tx_worker(struct work_struct *work); int iwm_xmit_frame(struct sk_buff *skb, struct net_device *netdev); diff --git a/drivers/net/wireless/iwmc3200wifi/netdev.c b/drivers/net/wireless/iwmc3200wifi/netdev.c index e4f0f87..c4c0d23 100644 --- a/drivers/net/wireless/iwmc3200wifi/netdev.c +++ b/drivers/net/wireless/iwmc3200wifi/netdev.c @@ -76,7 +76,7 @@ static int iwm_stop(struct net_device *ndev) */ static const u16 iwm_1d_to_queue[8] = { 1, 0, 0, 1, 2, 2, 3, 3 }; -u16 iwm_tid_to_queue(u16 tid) +int iwm_tid_to_queue(u16 tid) { if (tid > IWM_UMAC_TID_NR - 2) return -EINVAL; diff --git a/drivers/net/wireless/iwmc3200wifi/rx.c b/drivers/net/wireless/iwmc3200wifi/rx.c index 1c57c1f..6d6ed74 100644 --- a/drivers/net/wireless/iwmc3200wifi/rx.c +++ b/drivers/net/wireless/iwmc3200wifi/rx.c @@ -1126,7 +1126,7 @@ static int iwm_ntf_stop_resume_tx(struct iwm_priv *iwm, u8 *buf, if (!stop) { struct iwm_tx_queue *txq; - u16 queue = iwm_tid_to_queue(bit); + int queue = iwm_tid_to_queue(bit); if (queue < 0) continue; diff --git a/drivers/net/wireless/libertas/mesh.c b/drivers/net/wireless/libertas/mesh.c index 2f91c9b..92b7a35 100644 --- a/drivers/net/wireless/libertas/mesh.c +++ b/drivers/net/wireless/libertas/mesh.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -351,8 +352,7 @@ int lbs_add_mesh(struct lbs_private *priv) mesh_dev->netdev_ops = &mesh_netdev_ops; mesh_dev->ethtool_ops = &lbs_ethtool_ops; - memcpy(mesh_dev->dev_addr, priv->dev->dev_addr, - sizeof(priv->dev->dev_addr)); + memcpy(mesh_dev->dev_addr, priv->dev->dev_addr, ETH_ALEN); SET_NETDEV_DEV(priv->mesh_dev, priv->dev->dev.parent); diff --git a/drivers/net/wireless/libertas/scan.c b/drivers/net/wireless/libertas/scan.c index c6a6c04..b0b1c78 100644 --- a/drivers/net/wireless/libertas/scan.c +++ b/drivers/net/wireless/libertas/scan.c @@ -567,11 +567,8 @@ int lbs_scan_networks(struct lbs_private *priv, int full_scan) chan_count = lbs_scan_create_channel_list(priv, chan_list); netif_stop_queue(priv->dev); - netif_carrier_off(priv->dev); - if (priv->mesh_dev) { + if (priv->mesh_dev) netif_stop_queue(priv->mesh_dev); - netif_carrier_off(priv->mesh_dev); - } /* Prepare to continue an interrupted scan */ lbs_deb_scan("chan_count %d, scan_channel %d\n", @@ -635,16 +632,13 @@ out2: priv->scan_channel = 0; out: - if (priv->connect_status == LBS_CONNECTED) { - netif_carrier_on(priv->dev); - if (!priv->tx_pending_len) - netif_wake_queue(priv->dev); - } - if (priv->mesh_dev && (priv->mesh_connect_status == LBS_CONNECTED)) { - netif_carrier_on(priv->mesh_dev); - if (!priv->tx_pending_len) - netif_wake_queue(priv->mesh_dev); - } + if (priv->connect_status == LBS_CONNECTED && !priv->tx_pending_len) + netif_wake_queue(priv->dev); + + if (priv->mesh_dev && (priv->mesh_connect_status == LBS_CONNECTED) && + !priv->tx_pending_len) + netif_wake_queue(priv->mesh_dev); + kfree(chan_list); lbs_deb_leave_args(LBS_DEB_SCAN, "ret %d", ret); diff --git a/drivers/net/wireless/libertas/wext.c b/drivers/net/wireless/libertas/wext.c index a8eb9e1..4b1aab5 100644 --- a/drivers/net/wireless/libertas/wext.c +++ b/drivers/net/wireless/libertas/wext.c @@ -2025,10 +2025,8 @@ static int lbs_get_essid(struct net_device *dev, struct iw_request_info *info, if (priv->connect_status == LBS_CONNECTED) { memcpy(extra, priv->curbssparams.ssid, priv->curbssparams.ssid_len); - extra[priv->curbssparams.ssid_len] = '\0'; } else { memset(extra, 0, 32); - extra[priv->curbssparams.ssid_len] = '\0'; } /* * If none, we may want to get the one that was set diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c index 019431d..26a1abd 100644 --- a/drivers/net/wireless/libertas_tf/main.c +++ b/drivers/net/wireless/libertas_tf/main.c @@ -495,7 +495,6 @@ int lbtf_rx(struct lbtf_private *priv, struct sk_buff *skb) stats.band = IEEE80211_BAND_2GHZ; stats.signal = prxpd->snr; stats.noise = prxpd->nf; - stats.qual = prxpd->snr - prxpd->nf; /* Marvell rate index has a hole at value 4 */ if (prxpd->rx_rate > 4) --prxpd->rx_rate; diff --git a/drivers/net/wireless/orinoco/wext.c b/drivers/net/wireless/orinoco/wext.c index 7698fdd..31ca241 100644 --- a/drivers/net/wireless/orinoco/wext.c +++ b/drivers/net/wireless/orinoco/wext.c @@ -23,7 +23,7 @@ #define MAX_RID_LEN 1024 /* Helper routine to record keys - * Do not call from interrupt context */ + * It is called under orinoco_lock so it may not sleep */ static int orinoco_set_key(struct orinoco_private *priv, int index, enum orinoco_alg alg, const u8 *key, int key_len, const u8 *seq, int seq_len) @@ -32,14 +32,14 @@ static int orinoco_set_key(struct orinoco_private *priv, int index, kzfree(priv->keys[index].seq); if (key_len) { - priv->keys[index].key = kzalloc(key_len, GFP_KERNEL); + priv->keys[index].key = kzalloc(key_len, GFP_ATOMIC); if (!priv->keys[index].key) goto nomem; } else priv->keys[index].key = NULL; if (seq_len) { - priv->keys[index].seq = kzalloc(seq_len, GFP_KERNEL); + priv->keys[index].seq = kzalloc(seq_len, GFP_ATOMIC); if (!priv->keys[index].seq) goto free_key; } else diff --git a/drivers/net/wireless/rt2x00/rt2800.h b/drivers/net/wireless/rt2x00/rt2800.h index c5fe867..1a7eae3 100644 --- a/drivers/net/wireless/rt2x00/rt2800.h +++ b/drivers/net/wireless/rt2x00/rt2800.h @@ -1323,7 +1323,7 @@ #define PAIRWISE_KEY_ENTRY(__idx) \ ( PAIRWISE_KEY_TABLE_BASE + ((__idx) * sizeof(struct hw_key_entry)) ) #define MAC_IVEIV_ENTRY(__idx) \ - ( MAC_IVEIV_TABLE_BASE + ((__idx) & sizeof(struct mac_iveiv_entry)) ) + ( MAC_IVEIV_TABLE_BASE + ((__idx) * sizeof(struct mac_iveiv_entry)) ) #define MAC_WCID_ATTR_ENTRY(__idx) \ ( MAC_WCID_ATTRIBUTE_BASE + ((__idx) * sizeof(u32)) ) #define SHARED_KEY_ENTRY(__idx) \ diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index eb1e1d0..27bf887 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -37,7 +37,7 @@ #include #include "rt2x00.h" -#ifdef CONFIG_RT2800USB +#if defined(CONFIG_RT2800USB) || defined(CONFIG_RT2800USB_MODULE) #include "rt2x00usb.h" #endif #include "rt2800lib.h" @@ -1121,7 +1121,7 @@ int rt2800_init_registers(struct rt2x00_dev *rt2x00dev) if (rt2x00_intf_is_usb(rt2x00dev)) { rt2800_register_write(rt2x00dev, USB_DMA_CFG, 0x00000000); -#ifdef CONFIG_RT2800USB +#if defined(CONFIG_RT2800USB) || defined(CONFIG_RT2800USB_MODULE) rt2x00usb_vendor_request_sw(rt2x00dev, USB_DEVICE_MODE, 0, USB_MODE_RESET, REGISTER_TIMEOUT); #endif @@ -2022,6 +2022,12 @@ int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev) u16 eeprom; /* + * Disable powersaving as default on PCI devices. + */ + if (rt2x00_intf_is_pci(rt2x00dev)) + rt2x00dev->hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT; + + /* * Initialize all hw fields. */ rt2x00dev->hw->flags = @@ -2074,8 +2080,7 @@ int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev) IEEE80211_HT_CAP_SGI_20 | IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_TX_STBC | - IEEE80211_HT_CAP_RX_STBC | - IEEE80211_HT_CAP_PSMP_SUPPORT; + IEEE80211_HT_CAP_RX_STBC; spec->ht.ampdu_factor = 3; spec->ht.ampdu_density = 4; spec->ht.mcs.tx_params = @@ -2140,8 +2145,8 @@ static void rt2800_get_tkip_seq(struct ieee80211_hw *hw, u8 hw_key_idx, rt2800_register_multiread(rt2x00dev, offset, &iveiv_entry, sizeof(iveiv_entry)); - memcpy(&iveiv_entry.iv[0], iv16, sizeof(iv16)); - memcpy(&iveiv_entry.iv[4], iv32, sizeof(iv32)); + memcpy(iv16, &iveiv_entry.iv[0], sizeof(*iv16)); + memcpy(iv32, &iveiv_entry.iv[4], sizeof(*iv32)); } static int rt2800_set_rts_threshold(struct ieee80211_hw *hw, u32 value) diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c index af85d18..ab95346 100644 --- a/drivers/net/wireless/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/rt2x00/rt2800usb.c @@ -922,6 +922,7 @@ static struct usb_device_id rt2800usb_device_table[] = { { USB_DEVICE(0x1737, 0x0070), USB_DEVICE_DATA(&rt2800usb_ops) }, { USB_DEVICE(0x1737, 0x0071), USB_DEVICE_DATA(&rt2800usb_ops) }, { USB_DEVICE(0x1737, 0x0077), USB_DEVICE_DATA(&rt2800usb_ops) }, + { USB_DEVICE(0x1737, 0x0079), USB_DEVICE_DATA(&rt2800usb_ops) }, /* Logitec */ { USB_DEVICE(0x0789, 0x0162), USB_DEVICE_DATA(&rt2800usb_ops) }, { USB_DEVICE(0x0789, 0x0163), USB_DEVICE_DATA(&rt2800usb_ops) }, diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c index 687e17d..0ca5893 100644 --- a/drivers/net/wireless/rt2x00/rt61pci.c +++ b/drivers/net/wireless/rt2x00/rt61pci.c @@ -2539,6 +2539,11 @@ static int rt61pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev) unsigned int i; /* + * Disable powersaving as default. + */ + rt2x00dev->hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT; + + /* * Initialize all hw fields. */ rt2x00dev->hw->flags = diff --git a/drivers/net/wireless/rtl818x/rtl8180_dev.c b/drivers/net/wireless/rtl818x/rtl8180_dev.c index a1a3dd1..8a40a14 100644 --- a/drivers/net/wireless/rtl818x/rtl8180_dev.c +++ b/drivers/net/wireless/rtl818x/rtl8180_dev.c @@ -132,7 +132,6 @@ static void rtl8180_handle_rx(struct ieee80211_hw *dev) rx_status.antenna = (flags2 >> 15) & 1; /* TODO: improve signal/rssi reporting */ - rx_status.qual = flags2 & 0xFF; rx_status.signal = (flags2 >> 8) & 0x7F; /* XXX: is this correct? */ rx_status.rate_idx = (flags >> 20) & 0xF; diff --git a/drivers/net/wireless/wl12xx/wl1251_boot.c b/drivers/net/wireless/wl12xx/wl1251_boot.c index 2e733e7..28a8086 100644 --- a/drivers/net/wireless/wl12xx/wl1251_boot.c +++ b/drivers/net/wireless/wl12xx/wl1251_boot.c @@ -256,7 +256,7 @@ int wl1251_boot_run_firmware(struct wl1251 *wl) } } - if (loop >= INIT_LOOP) { + if (loop > INIT_LOOP) { wl1251_error("timeout waiting for the hardware to " "complete initialization"); return -EIO; diff --git a/drivers/net/wireless/wl12xx/wl1271_cmd.c b/drivers/net/wireless/wl12xx/wl1271_cmd.c index 886a9bc..c3385b3 100644 --- a/drivers/net/wireless/wl12xx/wl1271_cmd.c +++ b/drivers/net/wireless/wl12xx/wl1271_cmd.c @@ -777,7 +777,7 @@ out: return ret; } -static int wl1271_build_basic_rates(char *rates, u8 band) +static int wl1271_build_basic_rates(u8 *rates, u8 band) { u8 index = 0; @@ -804,7 +804,7 @@ static int wl1271_build_basic_rates(char *rates, u8 band) return index; } -static int wl1271_build_extended_rates(char *rates, u8 band) +static int wl1271_build_extended_rates(u8 *rates, u8 band) { u8 index = 0; diff --git a/drivers/net/wireless/zd1211rw/zd_chip.c b/drivers/net/wireless/zd1211rw/zd_chip.c index dfa1b9b..7ca95c4 100644 --- a/drivers/net/wireless/zd1211rw/zd_chip.c +++ b/drivers/net/wireless/zd1211rw/zd_chip.c @@ -1325,151 +1325,11 @@ int zd_chip_set_basic_rates(struct zd_chip *chip, u16 cr_rates) return r; } -static int ofdm_qual_db(u8 status_quality, u8 zd_rate, unsigned int size) -{ - static const u16 constants[] = { - 715, 655, 585, 540, 470, 410, 360, 315, - 270, 235, 205, 175, 150, 125, 105, 85, - 65, 50, 40, 25, 15 - }; - - int i; - u32 x; - - /* It seems that their quality parameter is somehow per signal - * and is now transferred per bit. - */ - switch (zd_rate) { - case ZD_OFDM_RATE_6M: - case ZD_OFDM_RATE_12M: - case ZD_OFDM_RATE_24M: - size *= 2; - break; - case ZD_OFDM_RATE_9M: - case ZD_OFDM_RATE_18M: - case ZD_OFDM_RATE_36M: - case ZD_OFDM_RATE_54M: - size *= 4; - size /= 3; - break; - case ZD_OFDM_RATE_48M: - size *= 3; - size /= 2; - break; - default: - return -EINVAL; - } - - x = (10000 * status_quality)/size; - for (i = 0; i < ARRAY_SIZE(constants); i++) { - if (x > constants[i]) - break; - } - - switch (zd_rate) { - case ZD_OFDM_RATE_6M: - case ZD_OFDM_RATE_9M: - i += 3; - break; - case ZD_OFDM_RATE_12M: - case ZD_OFDM_RATE_18M: - i += 5; - break; - case ZD_OFDM_RATE_24M: - case ZD_OFDM_RATE_36M: - i += 9; - break; - case ZD_OFDM_RATE_48M: - case ZD_OFDM_RATE_54M: - i += 15; - break; - default: - return -EINVAL; - } - - return i; -} - -static int ofdm_qual_percent(u8 status_quality, u8 zd_rate, unsigned int size) -{ - int r; - - r = ofdm_qual_db(status_quality, zd_rate, size); - ZD_ASSERT(r >= 0); - if (r < 0) - r = 0; - - r = (r * 100)/29; - return r <= 100 ? r : 100; -} - -static unsigned int log10times100(unsigned int x) -{ - static const u8 log10[] = { - 0, - 0, 30, 47, 60, 69, 77, 84, 90, 95, 100, - 104, 107, 111, 114, 117, 120, 123, 125, 127, 130, - 132, 134, 136, 138, 139, 141, 143, 144, 146, 147, - 149, 150, 151, 153, 154, 155, 156, 157, 159, 160, - 161, 162, 163, 164, 165, 166, 167, 168, 169, 169, - 170, 171, 172, 173, 174, 174, 175, 176, 177, 177, - 178, 179, 179, 180, 181, 181, 182, 183, 183, 184, - 185, 185, 186, 186, 187, 188, 188, 189, 189, 190, - 190, 191, 191, 192, 192, 193, 193, 194, 194, 195, - 195, 196, 196, 197, 197, 198, 198, 199, 199, 200, - 200, 200, 201, 201, 202, 202, 202, 203, 203, 204, - 204, 204, 205, 205, 206, 206, 206, 207, 207, 207, - 208, 208, 208, 209, 209, 210, 210, 210, 211, 211, - 211, 212, 212, 212, 213, 213, 213, 213, 214, 214, - 214, 215, 215, 215, 216, 216, 216, 217, 217, 217, - 217, 218, 218, 218, 219, 219, 219, 219, 220, 220, - 220, 220, 221, 221, 221, 222, 222, 222, 222, 223, - 223, 223, 223, 224, 224, 224, 224, - }; - - return x < ARRAY_SIZE(log10) ? log10[x] : 225; -} - -enum { - MAX_CCK_EVM_DB = 45, -}; - -static int cck_evm_db(u8 status_quality) -{ - return (20 * log10times100(status_quality)) / 100; -} - -static int cck_snr_db(u8 status_quality) -{ - int r = MAX_CCK_EVM_DB - cck_evm_db(status_quality); - ZD_ASSERT(r >= 0); - return r; -} - -static int cck_qual_percent(u8 status_quality) -{ - int r; - - r = cck_snr_db(status_quality); - r = (100*r)/17; - return r <= 100 ? r : 100; -} - static inline u8 zd_rate_from_ofdm_plcp_header(const void *rx_frame) { return ZD_OFDM | zd_ofdm_plcp_header_rate(rx_frame); } -u8 zd_rx_qual_percent(const void *rx_frame, unsigned int size, - const struct rx_status *status) -{ - return (status->frame_status&ZD_RX_OFDM) ? - ofdm_qual_percent(status->signal_quality_ofdm, - zd_rate_from_ofdm_plcp_header(rx_frame), - size) : - cck_qual_percent(status->signal_quality_cck); -} - /** * zd_rx_rate - report zd-rate * @rx_frame - received frame diff --git a/drivers/net/wireless/zd1211rw/zd_chip.h b/drivers/net/wireless/zd1211rw/zd_chip.h index 9fd8f35..f8bbf7d 100644 --- a/drivers/net/wireless/zd1211rw/zd_chip.h +++ b/drivers/net/wireless/zd1211rw/zd_chip.h @@ -929,9 +929,6 @@ static inline int zd_get_beacon_interval(struct zd_chip *chip, u32 *interval) struct rx_status; -u8 zd_rx_qual_percent(const void *rx_frame, unsigned int size, - const struct rx_status *status); - u8 zd_rx_rate(const void *rx_frame, const struct rx_status *status); struct zd_mc_hash { diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index cf51e8f..8ebf5c3 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -828,9 +828,6 @@ int zd_mac_rx(struct ieee80211_hw *hw, const u8 *buffer, unsigned int length) stats.freq = zd_channels[_zd_chip_get_channel(&mac->chip) - 1].center_freq; stats.band = IEEE80211_BAND_2GHZ; stats.signal = status->signal_strength; - stats.qual = zd_rx_qual_percent(buffer, - length - sizeof(struct rx_status), - status); rate = zd_rx_rate(buffer, status); diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h index bd588eb..8e210cd 100644 --- a/drivers/pci/hotplug/shpchp.h +++ b/drivers/pci/hotplug/shpchp.h @@ -121,7 +121,7 @@ struct controller { #define PCI_DEVICE_ID_AMD_GOLAM_7450 0x7450 #define PCI_DEVICE_ID_AMD_POGO_7458 0x7458 -/* AMD PCIX bridge registers */ +/* AMD PCI-X bridge registers */ #define PCIX_MEM_BASE_LIMIT_OFFSET 0x1C #define PCIX_MISCII_OFFSET 0x48 #define PCIX_MISC_BRIDGE_ERRORS_OFFSET 0x80 diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index e56f9be..4173125 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -305,7 +305,7 @@ struct device_domain_info { int segment; /* PCI domain */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ - struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ + struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ }; @@ -1604,7 +1604,7 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, return ret; parent = parent->bus->self; } - if (pci_is_pcie(tmp)) /* this is a PCIE-to-PCI bridge */ + if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */ return domain_context_mapping_one(domain, pci_domain_nr(tmp->subordinate), tmp->subordinate->number, 0, @@ -3325,7 +3325,7 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu, parent->devfn); parent = parent->bus->self; } - if (pci_is_pcie(tmp)) /* this is a PCIE-to-PCI bridge */ + if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */ iommu_detach_dev(iommu, tmp->subordinate->number, 0); else /* this is a legacy PCI bridge */ diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 8b65a48..95b8491 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -528,7 +528,7 @@ int set_msi_sid(struct irte *irte, struct pci_dev *dev) bridge = pci_find_upstream_pcie_bridge(dev); if (bridge) { - if (pci_is_pcie(bridge))/* this is a PCIE-to-PCI/PCIX bridge */ + if (pci_is_pcie(bridge))/* this is a PCIe-to-PCI/PCIX bridge */ set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16, (bridge->bus->number << 8) | dev->bus->number); else /* this is a legacy PCI bridge */ diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index cc617dd..7e28295 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -112,11 +112,7 @@ static bool acpi_pci_can_wakeup(struct pci_dev *dev) static void acpi_pci_propagate_wakeup_enable(struct pci_bus *bus, bool enable) { while (bus->parent) { - struct pci_dev *bridge = bus->self; - int ret; - - ret = acpi_pm_device_sleep_wake(&bridge->dev, enable); - if (!ret || pci_is_pcie(bridge)) + if (!acpi_pm_device_sleep_wake(&bus->self->dev, enable)) return; bus = bus->parent; } @@ -131,9 +127,7 @@ static int acpi_pci_sleep_wake(struct pci_dev *dev, bool enable) if (acpi_pci_can_wakeup(dev)) return acpi_pm_device_sleep_wake(&dev->dev, enable); - if (!pci_is_pcie(dev)) - acpi_pci_propagate_wakeup_enable(dev->bus, enable); - + acpi_pci_propagate_wakeup_enable(dev->bus, enable); return 0; } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 0bc27e0..0906599 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1153,11 +1153,11 @@ pci_disable_device(struct pci_dev *dev) /** * pcibios_set_pcie_reset_state - set reset state for device dev - * @dev: the PCI-E device reset + * @dev: the PCIe device reset * @state: Reset state to enter into * * - * Sets the PCI-E reset state for the device. This is the default + * Sets the PCIe reset state for the device. This is the default * implementation. Architecture implementations can override this. */ int __attribute__ ((weak)) pcibios_set_pcie_reset_state(struct pci_dev *dev, @@ -1168,7 +1168,7 @@ int __attribute__ ((weak)) pcibios_set_pcie_reset_state(struct pci_dev *dev, /** * pci_set_pcie_reset_state - set reset state for device dev - * @dev: the PCI-E device reset + * @dev: the PCIe device reset * @state: Reset state to enter into * * @@ -2296,6 +2296,10 @@ static int pci_dev_reset(struct pci_dev *dev, int probe) down(&dev->dev.sem); } + rc = pci_dev_specific_reset(dev, probe); + if (rc != -ENOTTY) + goto done; + rc = pcie_flr(dev, probe); if (rc != -ENOTTY) goto done; @@ -2779,6 +2783,11 @@ int __attribute__ ((weak)) pci_ext_cfg_avail(struct pci_dev *dev) return 1; } +void __weak pci_fixup_cardbus(struct pci_bus *bus) +{ +} +EXPORT_SYMBOL(pci_fixup_cardbus); + static int __init pci_setup(char *str) { while (str) { diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 33ed8e0..fbd0e3a 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -313,4 +313,12 @@ static inline int pci_resource_alignment(struct pci_dev *dev, extern void pci_enable_acs(struct pci_dev *dev); +struct pci_dev_reset_methods { + u16 vendor; + u16 device; + int (*reset)(struct pci_dev *dev, int probe); +}; + +extern int pci_dev_specific_reset(struct pci_dev *dev, int probe); + #endif /* DRIVERS_PCI_H */ diff --git a/drivers/pci/pcie/aer/Kconfig.debug b/drivers/pci/pcie/aer/Kconfig.debug index b8c925c..9142949 100644 --- a/drivers/pci/pcie/aer/Kconfig.debug +++ b/drivers/pci/pcie/aer/Kconfig.debug @@ -3,14 +3,14 @@ # config PCIEAER_INJECT - tristate "PCIE AER error injector support" + tristate "PCIe AER error injector support" depends on PCIEAER default n help This enables PCI Express Root Port Advanced Error Reporting (AER) software error injector. - Debuging PCIE AER code is quite difficult because it is hard + Debugging PCIe AER code is quite difficult because it is hard to trigger various real hardware errors. Software based error injection can fake almost all kinds of errors with the help of a user space helper tool aer-inject, which can be diff --git a/drivers/pci/pcie/aer/aer_inject.c b/drivers/pci/pcie/aer/aer_inject.c index 7fcd533..797d478 100644 --- a/drivers/pci/pcie/aer/aer_inject.c +++ b/drivers/pci/pcie/aer/aer_inject.c @@ -1,7 +1,7 @@ /* - * PCIE AER software error injection support. + * PCIe AER software error injection support. * - * Debuging PCIE AER code is quite difficult because it is hard to + * Debuging PCIe AER code is quite difficult because it is hard to * trigger various real hardware errors. Software based error * injection can fake almost all kinds of errors with the help of a * user space helper tool aer-inject, which can be gotten from: @@ -484,5 +484,5 @@ static void __exit aer_inject_exit(void) module_init(aer_inject_init); module_exit(aer_inject_exit); -MODULE_DESCRIPTION("PCIE AER software error injector"); +MODULE_DESCRIPTION("PCIe AER software error injector"); MODULE_LICENSE("GPL"); diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 97a3459..21f215f 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -155,7 +155,7 @@ static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev) mutex_init(&rpc->rpc_mutex); init_waitqueue_head(&rpc->wait_release); - /* Use PCIE bus function to store rpc into PCIE device */ + /* Use PCIe bus function to store rpc into PCIe device */ set_service_data(dev, rpc); return rpc; diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c index 8edb2f3..0481408 100644 --- a/drivers/pci/pcie/aer/aerdrv_acpi.c +++ b/drivers/pci/pcie/aer/aerdrv_acpi.c @@ -24,7 +24,7 @@ * * @return: Zero on success. Nonzero otherwise. * - * Invoked when PCIE bus loads AER service driver. To avoid conflict with + * Invoked when PCIe bus loads AER service driver. To avoid conflict with * BIOS AER support requires BIOS to yield AER control to OS native driver. **/ int aer_osc_setup(struct pcie_device *pciedev) diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index ae672ca..c843a79 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -587,7 +587,7 @@ static void handle_error_source(struct pcie_device *aerdev, * aer_enable_rootport - enable Root Port's interrupts when receiving messages * @rpc: pointer to a Root Port data structure * - * Invoked when PCIE bus loads AER service driver. + * Invoked when PCIe bus loads AER service driver. */ void aer_enable_rootport(struct aer_rpc *rpc) { @@ -597,7 +597,7 @@ void aer_enable_rootport(struct aer_rpc *rpc) u32 reg32; pos = pci_pcie_cap(pdev); - /* Clear PCIE Capability's Device Status */ + /* Clear PCIe Capability's Device Status */ pci_read_config_word(pdev, pos+PCI_EXP_DEVSTA, ®16); pci_write_config_word(pdev, pos+PCI_EXP_DEVSTA, reg16); @@ -631,7 +631,7 @@ void aer_enable_rootport(struct aer_rpc *rpc) * disable_root_aer - disable Root Port's interrupts when receiving messages * @rpc: pointer to a Root Port data structure * - * Invoked when PCIE bus unloads AER service driver. + * Invoked when PCIe bus unloads AER service driver. */ static void disable_root_aer(struct aer_rpc *rpc) { diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c index 44acde7..9d3e4c8 100644 --- a/drivers/pci/pcie/aer/aerdrv_errprint.c +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c @@ -184,7 +184,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) if (info->status == 0) { AER_PR(info, dev, - "PCIE Bus Error: severity=%s, type=Unaccessible, " + "PCIe Bus Error: severity=%s, type=Unaccessible, " "id=%04x(Unregistered Agent ID)\n", aer_error_severity_string[info->severity], id); } else { @@ -194,7 +194,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) agent = AER_GET_AGENT(info->severity, info->status); AER_PR(info, dev, - "PCIE Bus Error: severity=%s, type=%s, id=%04x(%s)\n", + "PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n", aer_error_severity_string[info->severity], aer_error_layer[layer], id, aer_agent_string[agent]); diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 5a01fc7..be53d98 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1,6 +1,6 @@ /* * File: drivers/pci/pcie/aspm.c - * Enabling PCIE link L0s/L1 state and Clock Power Management + * Enabling PCIe link L0s/L1 state and Clock Power Management * * Copyright (C) 2007 Intel * Copyright (C) Zhang Yanmin (yanmin.zhang@intel.com) @@ -499,7 +499,7 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev) int pos; u32 reg32; /* - * Some functions in a slot might not all be PCIE functions, + * Some functions in a slot might not all be PCIe functions, * very strange. Disable ASPM for the whole slot */ list_for_each_entry(child, &pdev->subordinate->devices, bus_list) { diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index a49452e..34d6517 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -24,7 +24,7 @@ */ #define DRIVER_VERSION "v1.0" #define DRIVER_AUTHOR "tom.l.nguyen@intel.com" -#define DRIVER_DESC "PCIE Port Bus Driver" +#define DRIVER_DESC "PCIe Port Bus Driver" MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 7cfa7c3..c746943 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2629,14 +2629,86 @@ static int __init pci_apply_final_quirks(void) if (!pci_cache_line_size) { printk(KERN_DEBUG "PCI: CLS %u bytes, default %u\n", cls << 2, pci_dfl_cache_line_size << 2); - pci_cache_line_size = cls; + pci_cache_line_size = cls ? cls : pci_dfl_cache_line_size; } return 0; } fs_initcall_sync(pci_apply_final_quirks); + +/* + * Followings are device-specific reset methods which can be used to + * reset a single function if other methods (e.g. FLR, PM D0->D3) are + * not available. + */ +static int reset_intel_generic_dev(struct pci_dev *dev, int probe) +{ + int pos; + + /* only implement PCI_CLASS_SERIAL_USB at present */ + if (dev->class == PCI_CLASS_SERIAL_USB) { + pos = pci_find_capability(dev, PCI_CAP_ID_VNDR); + if (!pos) + return -ENOTTY; + + if (probe) + return 0; + + pci_write_config_byte(dev, pos + 0x4, 1); + msleep(100); + + return 0; + } else { + return -ENOTTY; + } +} + +static int reset_intel_82599_sfp_virtfn(struct pci_dev *dev, int probe) +{ + int pos; + + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (!pos) + return -ENOTTY; + + if (probe) + return 0; + + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_BCR_FLR); + msleep(100); + + return 0; +} + +#define PCI_DEVICE_ID_INTEL_82599_SFP_VF 0x10ed + +static const struct pci_dev_reset_methods pci_dev_reset_methods[] = { + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82599_SFP_VF, + reset_intel_82599_sfp_virtfn }, + { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, + reset_intel_generic_dev }, + { 0 } +}; + +int pci_dev_specific_reset(struct pci_dev *dev, int probe) +{ + const struct pci_dev_reset_methods *i; + + for (i = pci_dev_reset_methods; i->reset; i++) { + if ((i->vendor == dev->vendor || + i->vendor == (u16)PCI_ANY_ID) && + (i->device == dev->device || + i->device == (u16)PCI_ANY_ID)) + return i->reset(dev, probe); + } + + return -ENOTTY; +} + #else void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) {} +int pci_dev_specific_reset(struct pci_dev *dev, int probe) { return -ENOTTY; } #endif EXPORT_SYMBOL(pci_fixup_device); diff --git a/drivers/pci/search.c b/drivers/pci/search.c index 6dae871..4a471dc 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -15,9 +15,9 @@ DECLARE_RWSEM(pci_bus_sem); /* - * find the upstream PCIE-to-PCI bridge of a PCI device + * find the upstream PCIe-to-PCI bridge of a PCI device * if the device is PCIE, return NULL - * if the device isn't connected to a PCIE bridge (that is its parent is a + * if the device isn't connected to a PCIe bridge (that is its parent is a * legacy PCI bridge and the bridge is directly connected to bus 0), return its * parent */ @@ -37,7 +37,7 @@ pci_find_upstream_pcie_bridge(struct pci_dev *pdev) tmp = pdev; continue; } - /* PCI device should connect to a PCIE bridge */ + /* PCI device should connect to a PCIe bridge */ if (pdev->pcie_type != PCI_EXP_TYPE_PCI_BRIDGE) { /* Busted hardware? */ WARN_ON_ONCE(1); diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c index cdf50f3..d99f846 100644 --- a/drivers/pcmcia/cardbus.c +++ b/drivers/pcmcia/cardbus.c @@ -222,7 +222,7 @@ int __ref cb_alloc(struct pcmcia_socket *s) unsigned int max, pass; s->functions = pci_scan_slot(bus, PCI_DEVFN(0, 0)); -/* pcibios_fixup_bus(bus); */ + pci_fixup_cardbus(bus); max = bus->secondary; for (pass = 0; pass < 2; pass++) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index ec4faff..db32c25 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -231,8 +231,36 @@ config THINKPAD_ACPI This driver was formerly known as ibm-acpi. + Extra functionality will be available if the rfkill (CONFIG_RFKILL) + and/or ALSA (CONFIG_SND) subsystems are available in the kernel. + Note that if you want ThinkPad-ACPI to be built-in instead of + modular, ALSA and rfkill will also have to be built-in. + If you have an IBM or Lenovo ThinkPad laptop, say Y or M here. +config THINKPAD_ACPI_ALSA_SUPPORT + bool "Console audio control ALSA interface" + depends on THINKPAD_ACPI + depends on SND + depends on SND = y || THINKPAD_ACPI = SND + default y + ---help--- + Enables monitoring of the built-in console audio output control + (headphone and speakers), which is operated by the mute and (in + some ThinkPad models) volume hotkeys. + + If this option is enabled, ThinkPad-ACPI will export an ALSA card + with a single read-only mixer control, which should be used for + on-screen-display feedback purposes by the Desktop Environment. + + Optionally, the driver will also allow software control (the + ALSA mixer will be made read-write). Please refer to the driver + documentation for details. + + All IBM models have both volume and mute control. Newer Lenovo + models only have mute control (the volume hotkeys are just normal + keys and volume control is done through the main HDA mixer). + config THINKPAD_ACPI_DEBUGFACILITIES bool "Maintainer debug facilities" depends on THINKPAD_ACPI diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c index 916ccb2..1b1dddb 100644 --- a/drivers/platform/x86/dell-wmi.c +++ b/drivers/platform/x86/dell-wmi.c @@ -202,8 +202,13 @@ static void dell_wmi_notify(u32 value, void *context) struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL }; static struct key_entry *key; union acpi_object *obj; + acpi_status status; - wmi_get_event_data(value, &response); + status = wmi_get_event_data(value, &response); + if (status != AE_OK) { + printk(KERN_INFO "dell-wmi: bad event status 0x%x\n", status); + return; + } obj = (union acpi_object *)response.pointer; @@ -323,8 +328,9 @@ static int __init dell_wmi_input_setup(void) static int __init dell_wmi_init(void) { int err; + acpi_status status; - if (wmi_has_guid(DELL_EVENT_GUID)) { + if (!wmi_has_guid(DELL_EVENT_GUID)) { printk(KERN_WARNING "dell-wmi: No known WMI GUID found\n"); return -ENODEV; } @@ -336,14 +342,14 @@ static int __init dell_wmi_init(void) if (err) return err; - err = wmi_install_notify_handler(DELL_EVENT_GUID, + status = wmi_install_notify_handler(DELL_EVENT_GUID, dell_wmi_notify, NULL); - if (err) { + if (ACPI_FAILURE(status)) { input_unregister_device(dell_wmi_input_dev); printk(KERN_ERR "dell-wmi: Unable to register notify handler - %d\n", - err); - return err; + status); + return -ENODEV; } return 0; diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 8781d8f..5b648f0 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -338,8 +338,13 @@ static void hp_wmi_notify(u32 value, void *context) static struct key_entry *key; union acpi_object *obj; int eventcode; + acpi_status status; - wmi_get_event_data(value, &response); + status = wmi_get_event_data(value, &response); + if (status != AE_OK) { + printk(KERN_INFO "hp-wmi: bad event status 0x%x\n", status); + return; + } obj = (union acpi_object *)response.pointer; @@ -581,7 +586,7 @@ static int __init hp_wmi_init(void) if (wmi_has_guid(HPWMI_EVENT_GUID)) { err = wmi_install_notify_handler(HPWMI_EVENT_GUID, hp_wmi_notify, NULL); - if (!err) + if (ACPI_SUCCESS(err)) hp_wmi_input_setup(); } diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c index 7f77f90..f5f70d4 100644 --- a/drivers/platform/x86/msi-wmi.c +++ b/drivers/platform/x86/msi-wmi.c @@ -149,8 +149,13 @@ static void msi_wmi_notify(u32 value, void *context) static struct key_entry *key; union acpi_object *obj; ktime_t cur; + acpi_status status; - wmi_get_event_data(value, &response); + status = wmi_get_event_data(value, &response); + if (status != AE_OK) { + printk(KERN_INFO DRV_PFX "bad event status 0x%x\n", status); + return; + } obj = (union acpi_object *)response.pointer; @@ -236,7 +241,7 @@ static int __init msi_wmi_init(void) } err = wmi_install_notify_handler(MSIWMI_EVENT_GUID, msi_wmi_notify, NULL); - if (err) + if (ACPI_FAILURE(err)) return -EINVAL; err = msi_wmi_input_setup(); diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 448c8ae..e67e4fe 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -6384,11 +6384,13 @@ static struct ibm_struct brightness_driver_data = { * and we leave them unchanged. */ +#ifdef CONFIG_THINKPAD_ACPI_ALSA_SUPPORT + #define TPACPI_ALSA_DRVNAME "ThinkPad EC" #define TPACPI_ALSA_SHRTNAME "ThinkPad Console Audio Control" #define TPACPI_ALSA_MIXERNAME TPACPI_ALSA_SHRTNAME -static int alsa_index = SNDRV_DEFAULT_IDX1; +static int alsa_index = ~((1 << (SNDRV_CARDS - 3)) - 1); /* last three slots */ static char *alsa_id = "ThinkPadEC"; static int alsa_enable = SNDRV_DEFAULT_ENABLE1; @@ -6705,10 +6707,11 @@ static int __init volume_create_alsa_mixer(void) rc = snd_card_create(alsa_index, alsa_id, THIS_MODULE, sizeof(struct tpacpi_alsa_data), &card); - if (rc < 0) - return rc; - if (!card) - return -ENOMEM; + if (rc < 0 || !card) { + printk(TPACPI_ERR + "Failed to create ALSA card structures: %d\n", rc); + return 1; + } BUG_ON(!card->private_data); data = card->private_data; @@ -6741,8 +6744,9 @@ static int __init volume_create_alsa_mixer(void) rc = snd_ctl_add(card, ctl_vol); if (rc < 0) { printk(TPACPI_ERR - "Failed to create ALSA volume control\n"); - goto err_out; + "Failed to create ALSA volume control: %d\n", + rc); + goto err_exit; } data->ctl_vol_id = &ctl_vol->id; } @@ -6750,22 +6754,25 @@ static int __init volume_create_alsa_mixer(void) ctl_mute = snd_ctl_new1(&volume_alsa_control_mute, NULL); rc = snd_ctl_add(card, ctl_mute); if (rc < 0) { - printk(TPACPI_ERR "Failed to create ALSA mute control\n"); - goto err_out; + printk(TPACPI_ERR "Failed to create ALSA mute control: %d\n", + rc); + goto err_exit; } data->ctl_mute_id = &ctl_mute->id; snd_card_set_dev(card, &tpacpi_pdev->dev); rc = snd_card_register(card); - -err_out: if (rc < 0) { - snd_card_free(card); - card = NULL; + printk(TPACPI_ERR "Failed to register ALSA card: %d\n", rc); + goto err_exit; } alsa_card = card; - return rc; + return 0; + +err_exit: + snd_card_free(card); + return 1; } #define TPACPI_VOL_Q_MUTEONLY 0x0001 /* Mute-only control available */ @@ -7016,6 +7023,28 @@ static struct ibm_struct volume_driver_data = { .shutdown = volume_shutdown, }; +#else /* !CONFIG_THINKPAD_ACPI_ALSA_SUPPORT */ + +#define alsa_card NULL + +static void inline volume_alsa_notify_change(void) +{ +} + +static int __init volume_init(struct ibm_init_struct *iibm) +{ + printk(TPACPI_INFO + "volume: disabled as there is no ALSA support in this kernel\n"); + + return 1; +} + +static struct ibm_struct volume_driver_data = { + .name = "volume", +}; + +#endif /* CONFIG_THINKPAD_ACPI_ALSA_SUPPORT */ + /************************************************************************* * Fan subdriver */ @@ -8738,6 +8767,7 @@ MODULE_PARM_DESC(hotkey_report_mode, "used for backwards compatibility with userspace, " "see documentation"); +#ifdef CONFIG_THINKPAD_ACPI_ALSA_SUPPORT module_param_named(volume_mode, volume_mode, uint, 0444); MODULE_PARM_DESC(volume_mode, "Selects volume control strategy: " @@ -8760,6 +8790,7 @@ module_param_named(id, alsa_id, charp, 0444); MODULE_PARM_DESC(id, "ALSA id for the ACPI EC Mixer"); module_param_named(enable, alsa_enable, bool, 0444); MODULE_PARM_DESC(enable, "Enable the ALSA interface for the ACPI EC Mixer"); +#endif /* CONFIG_THINKPAD_ACPI_ALSA_SUPPORT */ #define TPACPI_PARAM(feature) \ module_param_call(feature, set_ibm_param, NULL, NULL, 0); \ diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 9f93d6c..b104302 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -492,8 +492,7 @@ wmi_notify_handler handler, void *data) if (!guid || !handler) return AE_BAD_PARAMETER; - find_guid(guid, &block); - if (!block) + if (!find_guid(guid, &block)) return AE_NOT_EXIST; if (block->handler) @@ -521,8 +520,7 @@ acpi_status wmi_remove_notify_handler(const char *guid) if (!guid) return AE_BAD_PARAMETER; - find_guid(guid, &block); - if (!block) + if (!find_guid(guid, &block)) return AE_NOT_EXIST; if (!block->handler) @@ -716,6 +714,22 @@ static int wmi_class_init(void) return ret; } +static bool guid_already_parsed(const char *guid_string) +{ + struct guid_block *gblock; + struct wmi_block *wblock; + struct list_head *p; + + list_for_each(p, &wmi_blocks.list) { + wblock = list_entry(p, struct wmi_block, list); + gblock = &wblock->gblock; + + if (strncmp(gblock->guid, guid_string, 16) == 0) + return true; + } + return false; +} + /* * Parse the _WDG method for the GUID data blocks */ @@ -725,6 +739,7 @@ static __init acpi_status parse_wdg(acpi_handle handle) union acpi_object *obj; struct guid_block *gblock; struct wmi_block *wblock; + char guid_string[37]; acpi_status status; u32 i, total; @@ -747,6 +762,19 @@ static __init acpi_status parse_wdg(acpi_handle handle) memcpy(gblock, obj->buffer.pointer, obj->buffer.length); for (i = 0; i < total; i++) { + /* + Some WMI devices, like those for nVidia hooks, have a + duplicate GUID. It's not clear what we should do in this + case yet, so for now, we'll just ignore the duplicate. + Anyone who wants to add support for that device can come + up with a better workaround for the mess then. + */ + if (guid_already_parsed(gblock[i].guid) == true) { + wmi_gtoa(gblock[i].guid, guid_string); + printk(KERN_INFO PREFIX "Skipping duplicate GUID %s\n", + guid_string); + continue; + } wblock = kzalloc(sizeof(struct wmi_block), GFP_KERNEL); if (!wblock) return AE_NO_MEMORY; diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 094ea65..7b2600b 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -5,7 +5,9 @@ have duplicated data). Fix oops in cifs_lookup. Workaround problem mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session. Disable use of server inode numbers when server only partially supports them (e.g. for one server querying inode numbers on -FindFirst fails but QPathInfo queries works). +FindFirst fails but QPathInfo queries works). Fix oops with dfs in +cifs_put_smb_ses. Fix mmap to work on directio mounts (needed +for OpenOffice when on forcedirectio mount e.g.) Version 1.60 ------------- diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 29f1da7..8c6a036 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -758,7 +758,7 @@ const struct file_operations cifs_file_ops = { }; const struct file_operations cifs_file_direct_ops = { - /* no mmap, no aio, no readv - + /* no aio, no readv - BB reevaluate whether they can be done with directio, no cache */ .read = cifs_user_read, .write = cifs_user_write, @@ -767,6 +767,7 @@ const struct file_operations cifs_file_direct_ops = { .lock = cifs_lock, .fsync = cifs_fsync, .flush = cifs_flush, + .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, #ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 63ea83f..3bbcaa7 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2287,12 +2287,12 @@ int cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, char *mount_data_global, const char *devname) { - int rc = 0; + int rc; int xid; struct smb_vol *volume_info; - struct cifsSesInfo *pSesInfo = NULL; - struct cifsTconInfo *tcon = NULL; - struct TCP_Server_Info *srvTcp = NULL; + struct cifsSesInfo *pSesInfo; + struct cifsTconInfo *tcon; + struct TCP_Server_Info *srvTcp; char *full_path; char *mount_data = mount_data_global; #ifdef CONFIG_CIFS_DFS_UPCALL @@ -2301,6 +2301,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, int referral_walks_count = 0; try_mount_again: #endif + rc = 0; + tcon = NULL; + pSesInfo = NULL; + srvTcp = NULL; full_path = NULL; xid = GetXid(); @@ -2597,6 +2601,7 @@ remote_path_check: cleanup_volume_info(&volume_info); referral_walks_count++; + FreeXid(xid); goto try_mount_again; } #else /* No DFS support, return error on mount */ diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 9acf7e8..9ed1bb1 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -28,6 +28,7 @@ config EXT4_FS config EXT4_USE_FOR_EXT23 bool "Use ext4 for ext2/ext3 file systems" + depends on EXT4_FS depends on EXT3_FS=n || EXT2_FS=n default y help diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 4df8621..a60ab9a 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include "ext4.h" diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3a7928f..91ae460 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3023,6 +3023,14 @@ out: return err; } +static void unmap_underlying_metadata_blocks(struct block_device *bdev, + sector_t block, int count) +{ + int i; + for (i = 0; i < count; i++) + unmap_underlying_metadata(bdev, block + i); +} + static int ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, unsigned int max_blocks, @@ -3098,6 +3106,18 @@ out: } else allocated = ret; set_buffer_new(bh_result); + /* + * if we allocated more blocks than requested + * we need to make sure we unmap the extra block + * allocated. The actual needed block will get + * unmapped later when we find the buffer_head marked + * new. + */ + if (allocated > max_blocks) { + unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, + newblock + max_blocks, + allocated - max_blocks); + } map_out: set_buffer_mapped(bh_result); out1: @@ -3190,7 +3210,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * this situation is possible, though, _during_ tree modification; * this is why assert can't be put in ext4_ext_find_extent() */ - BUG_ON(path[depth].p_ext == NULL && depth != 0); + if (path[depth].p_ext == NULL && depth != 0) { + ext4_error(inode->i_sb, __func__, "bad extent address " + "inode: %lu, iblock: %d, depth: %d", + inode->i_ino, iblock, depth); + err = -EIO; + goto out2; + } eh = path[depth].p_hdr; ex = path[depth].p_ext; diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 0b22497..98bd140 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) return ext4_force_commit(inode->i_sb); commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; - if (jbd2_log_start_commit(journal, commit_tid)) + if (jbd2_log_start_commit(journal, commit_tid)) { + /* + * When the journal is on a different device than the + * fs data disk, we need to issue the barrier in + * writeback mode. (In ordered mode, the jbd2 layer + * will take care of issuing the barrier. In + * data=journal, all of the data blocks are written to + * the journal device.) + */ + if (ext4_should_writeback_data(inode) && + (journal->j_fs_dev != journal->j_dev) && + (journal->j_flags & JBD2_BARRIER)) + blkdev_issue_flush(inode->i_sb->s_bdev, NULL); jbd2_log_wait_commit(journal, commit_tid); - else if (journal->j_flags & JBD2_BARRIER) + } else if (journal->j_flags & JBD2_BARRIER) blkdev_issue_flush(inode->i_sb->s_bdev, NULL); return ret; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ab80796..84eeb8f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1043,43 +1043,47 @@ static int ext4_calc_metadata_amount(struct inode *inode, int blocks) return ext4_indirect_calc_metadata_amount(inode, blocks); } +/* + * Called with i_data_sem down, which is important since we can call + * ext4_discard_preallocations() from here. + */ static void ext4_da_update_reserve_space(struct inode *inode, int used) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - int total, mdb, mdb_free, mdb_claim = 0; - - spin_lock(&EXT4_I(inode)->i_block_reservation_lock); - /* recalculate the number of metablocks still need to be reserved */ - total = EXT4_I(inode)->i_reserved_data_blocks - used; - mdb = ext4_calc_metadata_amount(inode, total); - - /* figure out how many metablocks to release */ - BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); - mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; - - if (mdb_free) { - /* Account for allocated meta_blocks */ - mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks; - BUG_ON(mdb_free < mdb_claim); - mdb_free -= mdb_claim; - - /* update fs dirty blocks counter */ + struct ext4_inode_info *ei = EXT4_I(inode); + int mdb_free = 0; + + spin_lock(&ei->i_block_reservation_lock); + if (unlikely(used > ei->i_reserved_data_blocks)) { + ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " + "with only %d reserved data blocks\n", + __func__, inode->i_ino, used, + ei->i_reserved_data_blocks); + WARN_ON(1); + used = ei->i_reserved_data_blocks; + } + + /* Update per-inode reservations */ + ei->i_reserved_data_blocks -= used; + used += ei->i_allocated_meta_blocks; + ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; + ei->i_allocated_meta_blocks = 0; + percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); + + if (ei->i_reserved_data_blocks == 0) { + /* + * We can release all of the reserved metadata blocks + * only when we have written all of the delayed + * allocation blocks. + */ + mdb_free = ei->i_allocated_meta_blocks; percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); - EXT4_I(inode)->i_allocated_meta_blocks = 0; - EXT4_I(inode)->i_reserved_meta_blocks = mdb; + ei->i_allocated_meta_blocks = 0; } - - /* update per-inode reservations */ - BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); - EXT4_I(inode)->i_reserved_data_blocks -= used; - percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim); spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - vfs_dq_claim_block(inode, used + mdb_claim); - - /* - * free those over-booking quota for metadata blocks - */ + /* Update quota subsystem */ + vfs_dq_claim_block(inode, used); if (mdb_free) vfs_dq_release_reservation_block(inode, mdb_free); @@ -1088,7 +1092,8 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) * there aren't any writers on the inode, we can discard the * inode's preallocations. */ - if (!total && (atomic_read(&inode->i_writecount) == 0)) + if ((ei->i_reserved_data_blocks == 0) && + (atomic_read(&inode->i_writecount) == 0)) ext4_discard_preallocations(inode); } @@ -1801,7 +1806,8 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) { int retries = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - unsigned long md_needed, mdblocks, total = 0; + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned long md_needed, md_reserved, total = 0; /* * recalculate the amount of metadata blocks to reserve @@ -1809,35 +1815,44 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) * worse case is one extent per block */ repeat: - spin_lock(&EXT4_I(inode)->i_block_reservation_lock); - total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; - mdblocks = ext4_calc_metadata_amount(inode, total); - BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); - - md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; + spin_lock(&ei->i_block_reservation_lock); + md_reserved = ei->i_reserved_meta_blocks; + md_needed = ext4_calc_metadata_amount(inode, nrblocks); total = md_needed + nrblocks; - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + spin_unlock(&ei->i_block_reservation_lock); /* * Make quota reservation here to prevent quota overflow * later. Real quota accounting is done at pages writeout * time. */ - if (vfs_dq_reserve_block(inode, total)) + if (vfs_dq_reserve_block(inode, total)) { + /* + * We tend to badly over-estimate the amount of + * metadata blocks which are needed, so if we have + * reserved any metadata blocks, try to force out the + * inode and see if we have any better luck. + */ + if (md_reserved && retries++ <= 3) + goto retry; return -EDQUOT; + } if (ext4_claim_free_blocks(sbi, total)) { vfs_dq_release_reservation_block(inode, total); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { + retry: + if (md_reserved) + write_inode_now(inode, (retries == 3)); yield(); goto repeat; } return -ENOSPC; } - spin_lock(&EXT4_I(inode)->i_block_reservation_lock); - EXT4_I(inode)->i_reserved_data_blocks += nrblocks; - EXT4_I(inode)->i_reserved_meta_blocks += md_needed; - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + spin_lock(&ei->i_block_reservation_lock); + ei->i_reserved_data_blocks += nrblocks; + ei->i_reserved_meta_blocks += md_needed; + spin_unlock(&ei->i_block_reservation_lock); return 0; /* success */ } @@ -1845,49 +1860,45 @@ repeat: static void ext4_da_release_space(struct inode *inode, int to_free) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - int total, mdb, mdb_free, release; + struct ext4_inode_info *ei = EXT4_I(inode); if (!to_free) return; /* Nothing to release, exit */ spin_lock(&EXT4_I(inode)->i_block_reservation_lock); - if (!EXT4_I(inode)->i_reserved_data_blocks) { + if (unlikely(to_free > ei->i_reserved_data_blocks)) { /* - * if there is no reserved blocks, but we try to free some - * then the counter is messed up somewhere. - * but since this function is called from invalidate - * page, it's harmless to return without any action + * if there aren't enough reserved blocks, then the + * counter is messed up somewhere. Since this + * function is called from invalidate page, it's + * harmless to return without any action. */ - printk(KERN_INFO "ext4 delalloc try to release %d reserved " - "blocks for inode %lu, but there is no reserved " - "data blocks\n", to_free, inode->i_ino); - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - return; + ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " + "ino %lu, to_free %d with only %d reserved " + "data blocks\n", inode->i_ino, to_free, + ei->i_reserved_data_blocks); + WARN_ON(1); + to_free = ei->i_reserved_data_blocks; } + ei->i_reserved_data_blocks -= to_free; - /* recalculate the number of metablocks still need to be reserved */ - total = EXT4_I(inode)->i_reserved_data_blocks - to_free; - mdb = ext4_calc_metadata_amount(inode, total); - - /* figure out how many metablocks to release */ - BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); - mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; - - release = to_free + mdb_free; - - /* update fs dirty blocks counter for truncate case */ - percpu_counter_sub(&sbi->s_dirtyblocks_counter, release); + if (ei->i_reserved_data_blocks == 0) { + /* + * We can release all of the reserved metadata blocks + * only when we have written all of the delayed + * allocation blocks. + */ + to_free += ei->i_allocated_meta_blocks; + ei->i_allocated_meta_blocks = 0; + } - /* update per-inode reservations */ - BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); - EXT4_I(inode)->i_reserved_data_blocks -= to_free; + /* update fs dirty blocks counter */ + percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); - BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); - EXT4_I(inode)->i_reserved_meta_blocks = mdb; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - vfs_dq_release_reservation_block(inode, release); + vfs_dq_release_reservation_block(inode, to_free); } static void ext4_da_page_release_reservation(struct page *page, @@ -2967,8 +2978,7 @@ retry: out_writepages: if (!no_nrwrite_index_update) wbc->no_nrwrite_index_update = 0; - if (wbc->nr_to_write > nr_to_writebump) - wbc->nr_to_write -= nr_to_writebump; + wbc->nr_to_write -= nr_to_writebump; wbc->range_start = range_start; trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); return ret; @@ -2993,11 +3003,18 @@ static int ext4_nonda_switch(struct super_block *sb) if (2 * free_blocks < 3 * dirty_blocks || free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { /* - * free block count is less that 150% of dirty blocks - * or free blocks is less that watermark + * free block count is less than 150% of dirty blocks + * or free blocks is less than watermark */ return 1; } + /* + * Even if we don't switch but are nearing capacity, + * start pushing delalloc when 1/2 of free blocks are dirty. + */ + if (free_blocks < 2 * dirty_blocks) + writeback_inodes_sb_if_idle(sb); + return 0; } diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 0ca8110..436521c 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include "ext4_jbd2.h" diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6ed9aa9..7cccb35 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2174,9 +2174,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, struct super_block *sb = sbi->s_buddy_cache->i_sb; return snprintf(buf, PAGE_SIZE, "%llu\n", - sbi->s_kbytes_written + + (unsigned long long)(sbi->s_kbytes_written + ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - - EXT4_SB(sb)->s_sectors_written_start) >> 1)); + EXT4_SB(sb)->s_sectors_written_start) >> 1))); } static ssize_t inode_readahead_blks_store(struct ext4_attr *a, @@ -4005,6 +4005,7 @@ static inline void unregister_as_ext2(void) { unregister_filesystem(&ext2_fs_type); } +MODULE_ALIAS("ext2"); #else static inline void register_as_ext2(void) { } static inline void unregister_as_ext2(void) { } @@ -4031,6 +4032,7 @@ static inline void unregister_as_ext3(void) { unregister_filesystem(&ext3_fs_type); } +MODULE_ALIAS("ext3"); #else static inline void register_as_ext3(void) { } static inline void unregister_as_ext3(void) { } diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 83218be..f3a2f7e 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1332,6 +1332,8 @@ retry: goto cleanup; kfree(b_entry_name); kfree(buffer); + b_entry_name = NULL; + buffer = NULL; brelse(is->iloc.bh); kfree(is); kfree(bs); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 49bc1b8..1a7c42c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -242,6 +242,7 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, /** * bdi_start_writeback - start writeback * @bdi: the backing device to write from + * @sb: write inodes from this super_block * @nr_pages: the number of pages to write * * Description: @@ -1187,6 +1188,23 @@ void writeback_inodes_sb(struct super_block *sb) EXPORT_SYMBOL(writeback_inodes_sb); /** + * writeback_inodes_sb_if_idle - start writeback if none underway + * @sb: the superblock + * + * Invoke writeback_inodes_sb if no writeback is currently underway. + * Returns 1 if writeback was started, 0 if not. + */ +int writeback_inodes_sb_if_idle(struct super_block *sb) +{ + if (!writeback_in_progress(sb->s_bdi)) { + writeback_inodes_sb(sb); + return 1; + } else + return 0; +} +EXPORT_SYMBOL(writeback_inodes_sb_if_idle); + +/** * sync_inodes_sb - sync sb inode pages * @sb: the superblock * diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index ca0f5eb..8868493 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -22,6 +22,7 @@ #include #include #include +#include #include /* @@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal) journal->j_tail_sequence = first_tid; journal->j_tail = blocknr; spin_unlock(&journal->j_state_lock); + + /* + * If there is an external journal, we need to make sure that + * any data blocks that were recently written out --- perhaps + * by jbd2_log_do_checkpoint() --- are flushed out before we + * drop the transactions from the external journal. It's + * unlikely this will be necessary, especially with a + * appropriately sized journal, but we need this to guarantee + * correctness. Fortunately jbd2_cleanup_journal_tail() + * doesn't get called all that often. + */ + if ((journal->j_fs_dev != journal->j_dev) && + (journal->j_flags & JBD2_BARRIER)) + blkdev_issue_flush(journal->j_fs_dev, NULL); if (!(journal->j_flags & JBD2_ABORT)) jbd2_journal_update_superblock(journal, 1); return 0; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6a10238..1bc74b6 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal, ret = err; spin_lock(&journal->j_list_lock); J_ASSERT(jinode->i_transaction == commit_transaction); + commit_transaction->t_flushed_data_blocks = 1; jinode->i_flags &= ~JI_COMMIT_RUNNING; wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); } @@ -708,8 +709,17 @@ start_journal_io: } } - /* Done it all: now write the commit record asynchronously. */ + /* + * If the journal is not located on the file system device, + * then we must flush the file system device before we issue + * the commit record + */ + if (commit_transaction->t_flushed_data_blocks && + (journal->j_fs_dev != journal->j_dev) && + (journal->j_flags & JBD2_BARRIER)) + blkdev_issue_flush(journal->j_fs_dev, NULL); + /* Done it all: now write the commit record asynchronously. */ if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { err = journal_submit_commit_record(journal, commit_transaction, @@ -720,13 +730,6 @@ start_journal_io: blkdev_issue_flush(journal->j_dev, NULL); } - /* - * This is the right place to wait for data buffers both for ASYNC - * and !ASYNC commit. If commit is ASYNC, we need to wait only after - * the commit block went to disk (which happens above). If commit is - * SYNC, we need to wait for data buffers before we start writing - * commit block, which happens below in such setting. - */ err = journal_finish_inode_data_buffers(journal, commit_transaction); if (err) { printk(KERN_WARNING diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 17af879..ac0d027 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -814,7 +814,7 @@ static journal_t * journal_init_common (void) journal_t *journal; int err; - journal = kzalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL); + journal = kzalloc(sizeof(*journal), GFP_KERNEL); if (!journal) goto fail; diff --git a/fs/namei.c b/fs/namei.c index 68921d9..b55440b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -232,6 +232,7 @@ int generic_permission(struct inode *inode, int mask, /* * Searching includes executable on directories, else just read. */ + mask &= MAY_READ | MAY_WRITE | MAY_EXEC; if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) if (capable(CAP_DAC_READ_SEARCH)) return 0; diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 6854957..65c8727 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -1277,7 +1277,10 @@ int reiserfs_init_bitmap_cache(struct super_block *sb) struct reiserfs_bitmap_info *bitmap; unsigned int bmap_nr = reiserfs_bmap_count(sb); + /* Avoid lock recursion in fault case */ + reiserfs_write_unlock(sb); bitmap = vmalloc(sizeof(*bitmap) * bmap_nr); + reiserfs_write_lock(sb); if (bitmap == NULL) return -ENOMEM; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 290ae38..1150ebb 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -31,11 +31,12 @@ void reiserfs_delete_inode(struct inode *inode) JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); struct reiserfs_transaction_handle th; + int depth; int err; truncate_inode_pages(&inode->i_data, 0); - reiserfs_write_lock(inode->i_sb); + depth = reiserfs_write_lock_once(inode->i_sb); /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ @@ -74,7 +75,7 @@ void reiserfs_delete_inode(struct inode *inode) out: clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ inode->i_blocks = 0; - reiserfs_write_unlock(inode->i_sb); + reiserfs_write_unlock_once(inode->i_sb, depth); } static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 2f8a7e7..83ac4d3 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2009,10 +2009,11 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, destroy_workqueue(commit_wq); commit_wq = NULL; } - reiserfs_write_lock(sb); free_journal_ram(sb); + reiserfs_write_lock(sb); + return 0; } @@ -2758,11 +2759,18 @@ int journal_init(struct super_block *sb, const char *j_dev_name, struct reiserfs_journal *journal; struct reiserfs_journal_list *jl; char b[BDEVNAME_SIZE]; + int ret; + /* + * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS + * dependency inversion warnings. + */ + reiserfs_write_unlock(sb); journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal)); if (!journal) { reiserfs_warning(sb, "journal-1256", "unable to get memory for journal structure"); + reiserfs_write_lock(sb); return 1; } memset(journal, 0, sizeof(struct reiserfs_journal)); @@ -2771,10 +2779,12 @@ int journal_init(struct super_block *sb, const char *j_dev_name, INIT_LIST_HEAD(&journal->j_working_list); INIT_LIST_HEAD(&journal->j_journal_list); journal->j_persistent_trans = 0; - if (reiserfs_allocate_list_bitmaps(sb, - journal->j_list_bitmap, - reiserfs_bmap_count(sb))) + ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap, + reiserfs_bmap_count(sb)); + reiserfs_write_lock(sb); + if (ret) goto free_and_return; + allocate_bitmap_nodes(sb); /* reserved for journal area support */ diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c index ee2cfc0..b87aa2c 100644 --- a/fs/reiserfs/lock.c +++ b/fs/reiserfs/lock.c @@ -86,3 +86,12 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller) reiserfs_panic(sb, "%s called without kernel lock held %d", caller); } + +#ifdef CONFIG_REISERFS_CHECK +void reiserfs_lock_check_recursive(struct super_block *sb) +{ + struct reiserfs_sb_info *sb_i = REISERFS_SB(sb); + + WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n"); +} +#endif diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index e296ff7..9d4dcf0 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -921,6 +921,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) struct reiserfs_transaction_handle th; int jbegin_count; unsigned long savelink; + int depth; inode = dentry->d_inode; @@ -932,7 +933,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); - reiserfs_write_lock(dir->i_sb); + depth = reiserfs_write_lock_once(dir->i_sb); retval = journal_begin(&th, dir->i_sb, jbegin_count); if (retval) goto out_unlink; @@ -993,7 +994,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) retval = journal_end(&th, dir->i_sb, jbegin_count); reiserfs_check_path(&path); - reiserfs_write_unlock(dir->i_sb); + reiserfs_write_unlock_once(dir->i_sb, depth); return retval; end_unlink: @@ -1003,7 +1004,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) if (err) retval = err; out_unlink: - reiserfs_write_unlock(dir->i_sb); + reiserfs_write_unlock_once(dir->i_sb, depth); return retval; } diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 8c7033a..c3b004e 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -83,7 +83,8 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry) BUG_ON(!mutex_is_locked(&dir->i_mutex)); vfs_dq_init(dir); - mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); + reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex, + I_MUTEX_CHILD, dir->i_sb); error = dir->i_op->unlink(dir, dentry); mutex_unlock(&dentry->d_inode->i_mutex); @@ -98,7 +99,8 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry) BUG_ON(!mutex_is_locked(&dir->i_mutex)); vfs_dq_init(dir); - mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); + reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex, + I_MUTEX_CHILD, dir->i_sb); dentry_unhash(dentry); error = dir->i_op->rmdir(dir, dentry); if (!error) @@ -235,16 +237,22 @@ static int reiserfs_for_each_xattr(struct inode *inode, if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1) return 0; + reiserfs_write_unlock(inode->i_sb); dir = open_xa_dir(inode, XATTR_REPLACE); if (IS_ERR(dir)) { err = PTR_ERR(dir); + reiserfs_write_lock(inode->i_sb); goto out; } else if (!dir->d_inode) { err = 0; + reiserfs_write_lock(inode->i_sb); goto out_dir; } mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); + + reiserfs_write_lock(inode->i_sb); + buf.xadir = dir; err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos); while ((err == 0 || err == -ENOSPC) && buf.count) { @@ -283,8 +291,9 @@ static int reiserfs_for_each_xattr(struct inode *inode, err = journal_begin(&th, inode->i_sb, blocks); if (!err) { int jerror; - mutex_lock_nested(&dir->d_parent->d_inode->i_mutex, - I_MUTEX_XATTR); + reiserfs_mutex_lock_nested_safe( + &dir->d_parent->d_inode->i_mutex, + I_MUTEX_XATTR, inode->i_sb); err = action(dir, data); jerror = journal_end(&th, inode->i_sb, blocks); mutex_unlock(&dir->d_parent->d_inode->i_mutex); @@ -480,11 +489,16 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th, if (!buffer) return lookup_and_delete_xattr(inode, name); + reiserfs_write_unlock(inode->i_sb); dentry = xattr_lookup(inode, name, flags); - if (IS_ERR(dentry)) + if (IS_ERR(dentry)) { + reiserfs_write_lock(inode->i_sb); return PTR_ERR(dentry); + } - down_write(&REISERFS_I(inode)->i_xattr_sem); + down_read(&REISERFS_I(inode)->i_xattr_sem); + + reiserfs_write_lock(inode->i_sb); xahash = xattr_hash(buffer, buffer_size); while (buffer_pos < buffer_size || buffer_pos == 0) { diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ce945d4..3692425 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -251,6 +251,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n, void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); void __init acpi_s4_no_nvs(void); +void __init acpi_set_sci_en_on_resume(void); #endif /* CONFIG_PM_SLEEP */ struct acpi_osc_context { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 784a919..9b98173 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -845,7 +845,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) * blk_rq_err_bytes() : bytes left till the next error boundary * blk_rq_sectors() : sectors left in the entire request * blk_rq_cur_sectors() : sectors left in the current segment - * blk_rq_err_sectors() : sectors left till the next error boundary */ static inline sector_t blk_rq_pos(const struct request *rq) { @@ -874,11 +873,6 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) return blk_rq_cur_bytes(rq) >> 9; } -static inline unsigned int blk_rq_err_sectors(const struct request *rq) -{ - return blk_rq_err_bytes(rq) >> 9; -} - /* * Request issue related functions. */ @@ -1116,11 +1110,18 @@ static inline int queue_alignment_offset(struct request_queue *q) return q->limits.alignment_offset; } +static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t offset) +{ + unsigned int granularity = max(lim->physical_block_size, lim->io_min); + + offset &= granularity - 1; + return (granularity + lim->alignment_offset - offset) & (granularity - 1); +} + static inline int queue_sector_alignment_offset(struct request_queue *q, sector_t sector) { - return ((sector << 9) - q->limits.alignment_offset) - & (q->limits.io_min - 1); + return queue_limit_alignment_offset(&q->limits, sector << 9); } static inline int bdev_alignment_offset(struct block_device *bdev) diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index c6b3ca3..1f716d9 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -340,6 +340,9 @@ struct fw_cdev_send_response { * The @closure field is passed back to userspace in the response event. * The @handle field is an out parameter, returning a handle to the allocated * range to be used for later deallocation of the range. + * + * The address range is allocated on all local nodes. The address allocation + * is exclusive except for the FCP command and response registers. */ struct fw_cdev_allocate { __u64 offset; diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 9416a46..a0e6715 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -248,8 +248,8 @@ typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode, void *data, size_t length, void *callback_data); /* - * Important note: The callback must guarantee that either fw_send_response() - * or kfree() is called on the @request. + * Important note: Except for the FCP registers, the callback must guarantee + * that either fw_send_response() or kfree() is called on the @request. */ typedef void (*fw_address_callback_t)(struct fw_card *card, struct fw_request *request, diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index d9724a2..163c840 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -832,7 +832,7 @@ struct ieee80211_ht_cap { #define IEEE80211_HT_CAP_DELAY_BA 0x0400 #define IEEE80211_HT_CAP_MAX_AMSDU 0x0800 #define IEEE80211_HT_CAP_DSSSCCK40 0x1000 -#define IEEE80211_HT_CAP_PSMP_SUPPORT 0x2000 +#define IEEE80211_HT_CAP_RESERVED 0x2000 #define IEEE80211_HT_CAP_40MHZ_INTOLERANT 0x4000 #define IEEE80211_HT_CAP_LSIG_TXOP_PROT 0x8000 diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 699e85c..b230492 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -81,6 +81,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) #define IN_DEV_MFORWARD(in_dev) IN_DEV_ANDCONF((in_dev), MC_FORWARDING) #define IN_DEV_RPFILTER(in_dev) IN_DEV_MAXCONF((in_dev), RP_FILTER) +#define IN_DEV_SRC_VMARK(in_dev) IN_DEV_ORCONF((in_dev), SRC_VMARK) #define IN_DEV_SOURCE_ROUTE(in_dev) IN_DEV_ANDCONF((in_dev), \ ACCEPT_SOURCE_ROUTE) #define IN_DEV_ACCEPT_LOCAL(in_dev) IN_DEV_ORCONF((in_dev), ACCEPT_LOCAL) diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f1011f7..638ce45 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -653,6 +653,7 @@ struct transaction_s * waiting for it to finish. */ unsigned int t_synchronous_commit:1; + unsigned int t_flushed_data_blocks:1; /* * For use by the filesystem to store fs-specific data diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 3d44e9c..7c6b32a 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -81,7 +81,7 @@ union { \ } /** - * INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO + * INIT_KFIFO - Initialize a kfifo declared by DECLARE_KFIFO * @name: name of the declared kfifo datatype */ #define INIT_KFIFO(name) \ diff --git a/include/linux/pci.h b/include/linux/pci.h index bf1e670..5da0690 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -566,6 +566,9 @@ void pcibios_align_resource(void *, struct resource *, resource_size_t, resource_size_t); void pcibios_update_irq(struct pci_dev *, int irq); +/* Weak but can be overriden by arch */ +void pci_fixup_cardbus(struct pci_bus *); + /* Generic PCI functions used internally */ extern struct pci_bus *pci_find_bus(int domain, int busnr); diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index c96c185..1ba3cf6 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -62,6 +62,12 @@ void reiserfs_write_unlock(struct super_block *s); int reiserfs_write_lock_once(struct super_block *s); void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); +#ifdef CONFIG_REISERFS_CHECK +void reiserfs_lock_check_recursive(struct super_block *s); +#else +static inline void reiserfs_lock_check_recursive(struct super_block *s) { } +#endif + /* * Several mutexes depend on the write lock. * However sometimes we want to relax the write lock while we hold @@ -92,11 +98,31 @@ void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); static inline void reiserfs_mutex_lock_safe(struct mutex *m, struct super_block *s) { + reiserfs_lock_check_recursive(s); reiserfs_write_unlock(s); mutex_lock(m); reiserfs_write_lock(s); } +static inline void +reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, + struct super_block *s) +{ + reiserfs_lock_check_recursive(s); + reiserfs_write_unlock(s); + mutex_lock_nested(m, subclass); + reiserfs_write_lock(s); +} + +static inline void +reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s) +{ + reiserfs_lock_check_recursive(s); + reiserfs_write_unlock(s); + down_read(sem); + reiserfs_write_lock(s); +} + /* * When we schedule, we usually want to also release the write lock, * according to the previous bkl based locking scheme of reiserfs. diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 65793e9..207466a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -195,7 +195,7 @@ struct perf_event_attr; static const struct syscall_metadata __used \ __attribute__((__aligned__(4))) \ __attribute__((section("__syscalls_metadata"))) \ - __syscall_meta_##sname = { \ + __syscall_meta__##sname = { \ .name = "sys_"#sname, \ .nb_args = 0, \ .enter_event = &event_enter__##sname, \ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 877ba03..bd27fbc 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -482,6 +482,7 @@ enum NET_IPV4_CONF_ARP_ACCEPT=21, NET_IPV4_CONF_ARP_NOTIFY=22, NET_IPV4_CONF_ACCEPT_LOCAL=23, + NET_IPV4_CONF_SRC_VMARK=24, __NET_IPV4_CONF_MAX }; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index c18c008..76e8903 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -70,6 +70,7 @@ struct writeback_control { struct bdi_writeback; int inode_wait(void *); void writeback_inodes_sb(struct super_block *); +int writeback_inodes_sb_if_idle(struct super_block *); void sync_inodes_sb(struct super_block *); void writeback_inodes_wbc(struct writeback_control *wbc); long wb_do_writeback(struct bdi_writeback *wb, int force_wait); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2aff490..0bf3697 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -547,7 +547,6 @@ enum mac80211_rx_flags { * unspecified depending on the hardware capabilities flags * @IEEE80211_HW_SIGNAL_* * @noise: noise when receiving this frame, in dBm. - * @qual: overall signal quality indication, in percent (0-100). * @antenna: antenna used * @rate_idx: index of data rate into band's supported rates or MCS index if * HT rates are use (RX_FLAG_HT) @@ -559,7 +558,6 @@ struct ieee80211_rx_status { int freq; int signal; int noise; - int __deprecated qual; int antenna; int rate_idx; int flag; @@ -1737,6 +1735,12 @@ static inline void ieee80211_rx_ni(struct ieee80211_hw *hw, local_bh_enable(); } +/* + * The TX headroom reserved by mac80211 for its own tx_status functions. + * This is enough for the radiotap header. + */ +#define IEEE80211_TX_STATUS_HEADROOM 13 + /** * ieee80211_tx_status - transmit status callback * diff --git a/include/scsi/libsrp.h b/include/scsi/libsrp.h index 07e3add..f4105c9 100644 --- a/include/scsi/libsrp.h +++ b/include/scsi/libsrp.h @@ -2,6 +2,7 @@ #define __LIBSRP_H__ #include +#include #include #include #include diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 7352315..c6fe03e 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -414,7 +414,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ - sizeof(field.item), 0, FILTER_OTHER); \ + sizeof(field.item), \ + is_signed_type(type), FILTER_OTHER); \ if (ret) \ return ret; @@ -422,8 +423,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #define __dynamic_array(type, item, len) \ ret = trace_define_field(event_call, "__data_loc " #type "[]", #item, \ offsetof(typeof(field), __data_loc_##item), \ - sizeof(field.__data_loc_##item), 0, \ - FILTER_OTHER); + sizeof(field.__data_loc_##item), \ + is_signed_type(type), FILTER_OTHER); #undef __string #define __string(item, src) __dynamic_array(char, item, -1) diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index dbcbf6a..50dbd59 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -388,7 +389,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, if (!cpu_events) return ERR_PTR(-ENOMEM); - for_each_possible_cpu(cpu) { + get_online_cpus(); + for_each_online_cpu(cpu) { pevent = per_cpu_ptr(cpu_events, cpu); bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); @@ -399,18 +401,20 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, goto fail; } } + put_online_cpus(); return cpu_events; fail: - for_each_possible_cpu(cpu) { + for_each_online_cpu(cpu) { pevent = per_cpu_ptr(cpu_events, cpu); if (IS_ERR(*pevent)) break; unregister_hw_breakpoint(*pevent); } + put_online_cpus(); + free_percpu(cpu_events); - /* return the error if any */ return ERR_PTR(err); } EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e5342a3..b7df302 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1035,7 +1035,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp) /* Pre-allocate memory for max kretprobe instances */ if (rp->maxactive <= 0) { #ifdef CONFIG_PREEMPT - rp->maxactive = max(10, 2 * num_possible_cpus()); + rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus()); #else rp->maxactive = num_possible_cpus(); #endif diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 1f38270..603c0d8 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -5148,7 +5148,7 @@ int perf_event_init_task(struct task_struct *child) GFP_KERNEL); if (!child_ctx) { ret = -ENOMEM; - goto exit; + break; } __perf_event_init_context(child_ctx, child); @@ -5164,7 +5164,7 @@ int perf_event_init_task(struct task_struct *child) } } - if (inherited_all) { + if (child_ctx && inherited_all) { /* * Mark the child context as a clone of the parent * context, or of whatever the parent is a clone of. @@ -5184,7 +5184,6 @@ int perf_event_init_task(struct task_struct *child) get_ctx(child_ctx->parent_ctx); } -exit: mutex_unlock(&parent_ctx->mutex); perf_unpin_context(parent_ctx); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d006554..6c22d8a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -12,17 +12,17 @@ config NOP_TRACER config HAVE_FTRACE_NMI_ENTER bool help - See Documentation/trace/ftrace-implementation.txt + See Documentation/trace/ftrace-design.txt config HAVE_FUNCTION_TRACER bool help - See Documentation/trace/ftrace-implementation.txt + See Documentation/trace/ftrace-design.txt config HAVE_FUNCTION_GRAPH_TRACER bool help - See Documentation/trace/ftrace-implementation.txt + See Documentation/trace/ftrace-design.txt config HAVE_FUNCTION_GRAPH_FP_TEST bool @@ -34,17 +34,17 @@ config HAVE_FUNCTION_GRAPH_FP_TEST config HAVE_FUNCTION_TRACE_MCOUNT_TEST bool help - See Documentation/trace/ftrace-implementation.txt + See Documentation/trace/ftrace-design.txt config HAVE_DYNAMIC_FTRACE bool help - See Documentation/trace/ftrace-implementation.txt + See Documentation/trace/ftrace-design.txt config HAVE_FTRACE_MCOUNT_RECORD bool help - See Documentation/trace/ftrace-implementation.txt + See Documentation/trace/ftrace-design.txt config HAVE_HW_BRANCH_TRACER bool @@ -52,7 +52,7 @@ config HAVE_HW_BRANCH_TRACER config HAVE_SYSCALL_TRACEPOINTS bool help - See Documentation/trace/ftrace-implementation.txt + See Documentation/trace/ftrace-design.txt config TRACER_MAX_TRACE bool @@ -83,7 +83,7 @@ config RING_BUFFER_ALLOW_SWAP # This allows those options to appear when no other tracer is selected. But the # options do not appear when something else selects it. We need the two options # GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the -# hidding of the automatic options. +# hiding of the automatic options. config TRACING bool @@ -119,7 +119,7 @@ menuconfig FTRACE bool "Tracers" default y if DEBUG_KERNEL help - Enable the kernel tracing infrastructure. + Enable the kernel tracing infrastructure. if FTRACE @@ -133,7 +133,7 @@ config FUNCTION_TRACER help Enable the kernel to trace every kernel function. This is done by using a compiler feature to insert a small, 5-byte No-Operation - instruction to the beginning of every kernel function, which NOP + instruction at the beginning of every kernel function, which NOP sequence is then dynamically patched into a tracer call when tracing is enabled by the administrator. If it's runtime disabled (the bootup default), then the overhead of the instructions is very @@ -150,7 +150,7 @@ config FUNCTION_GRAPH_TRACER and its entry. Its first purpose is to trace the duration of functions and draw a call graph for each thread with some information like - the return value. This is done by setting the current return + the return value. This is done by setting the current return address on the current task structure into a stack of calls. @@ -173,7 +173,7 @@ config IRQSOFF_TRACER echo 0 > /sys/kernel/debug/tracing/tracing_max_latency - (Note that kernel size and overhead increases with this option + (Note that kernel size and overhead increase with this option enabled. This option and the preempt-off timing option can be used together or separately.) @@ -186,7 +186,7 @@ config PREEMPT_TRACER select TRACER_MAX_TRACE select RING_BUFFER_ALLOW_SWAP help - This option measures the time spent in preemption off critical + This option measures the time spent in preemption-off critical sections, with microsecond accuracy. The default measurement method is a maximum search, which is @@ -195,7 +195,7 @@ config PREEMPT_TRACER echo 0 > /sys/kernel/debug/tracing/tracing_max_latency - (Note that kernel size and overhead increases with this option + (Note that kernel size and overhead increase with this option enabled. This option and the irqs-off timing option can be used together or separately.) @@ -222,7 +222,7 @@ config ENABLE_DEFAULT_TRACERS depends on !GENERIC_TRACER select TRACING help - This tracer hooks to various trace points in the kernel + This tracer hooks to various trace points in the kernel, allowing the user to pick and choose which trace point they want to trace. It also includes the sched_switch tracer plugin. @@ -265,19 +265,19 @@ choice The likely/unlikely profiler only looks at the conditions that are annotated with a likely or unlikely macro. - The "all branch" profiler will profile every if statement in the + The "all branch" profiler will profile every if-statement in the kernel. This profiler will also enable the likely/unlikely - profiler as well. + profiler. - Either of the above profilers add a bit of overhead to the system. - If unsure choose "No branch profiling". + Either of the above profilers adds a bit of overhead to the system. + If unsure, choose "No branch profiling". config BRANCH_PROFILE_NONE bool "No branch profiling" help - No branch profiling. Branch profiling adds a bit of overhead. - Only enable it if you want to analyse the branching behavior. - Otherwise keep it disabled. + No branch profiling. Branch profiling adds a bit of overhead. + Only enable it if you want to analyse the branching behavior. + Otherwise keep it disabled. config PROFILE_ANNOTATED_BRANCHES bool "Trace likely/unlikely profiler" @@ -288,7 +288,7 @@ config PROFILE_ANNOTATED_BRANCHES /sys/kernel/debug/tracing/profile_annotated_branch - Note: this will add a significant overhead, only turn this + Note: this will add a significant overhead; only turn this on if you need to profile the system's use of these macros. config PROFILE_ALL_BRANCHES @@ -305,7 +305,7 @@ config PROFILE_ALL_BRANCHES This configuration, when enabled, will impose a great overhead on the system. This should only be enabled when the system - is to be analyzed + is to be analyzed in much detail. endchoice config TRACING_BRANCHES @@ -335,7 +335,7 @@ config POWER_TRACER depends on X86 select GENERIC_TRACER help - This tracer helps developers to analyze and optimize the kernels + This tracer helps developers to analyze and optimize the kernel's power management decisions, specifically the C-state and P-state behavior. @@ -391,14 +391,14 @@ config HW_BRANCH_TRACER select GENERIC_TRACER help This tracer records all branches on the system in a circular - buffer giving access to the last N branches for each cpu. + buffer, giving access to the last N branches for each cpu. config KMEMTRACE bool "Trace SLAB allocations" select GENERIC_TRACER help kmemtrace provides tracing for slab allocator functions, such as - kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected + kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected data is then fed to the userspace application in order to analyse allocation hotspots, internal fragmentation and so on, making it possible to see how well an allocator performs, as well as debug @@ -417,15 +417,15 @@ config WORKQUEUE_TRACER bool "Trace workqueues" select GENERIC_TRACER help - The workqueue tracer provides some statistical informations + The workqueue tracer provides some statistical information about each cpu workqueue thread such as the number of the works inserted and executed since their creation. It can help - to evaluate the amount of work each of them have to perform. + to evaluate the amount of work each of them has to perform. For example it can help a developer to decide whether he should - choose a per cpu workqueue instead of a singlethreaded one. + choose a per-cpu workqueue instead of a singlethreaded one. config BLK_DEV_IO_TRACE - bool "Support for tracing block io actions" + bool "Support for tracing block IO actions" depends on SYSFS depends on BLOCK select RELAY @@ -456,15 +456,15 @@ config KPROBE_EVENT select TRACING default y help - This allows the user to add tracing events (similar to tracepoints) on the fly - via the ftrace interface. See Documentation/trace/kprobetrace.txt - for more details. + This allows the user to add tracing events (similar to tracepoints) + on the fly via the ftrace interface. See + Documentation/trace/kprobetrace.txt for more details. Those events can be inserted wherever kprobes can probe, and record various register and memory values. - This option is also required by perf-probe subcommand of perf tools. If - you want to use perf tools, this option is strongly recommended. + This option is also required by perf-probe subcommand of perf tools. + If you want to use perf tools, this option is strongly recommended. config DYNAMIC_FTRACE bool "enable/disable ftrace tracepoints dynamically" @@ -472,32 +472,32 @@ config DYNAMIC_FTRACE depends on HAVE_DYNAMIC_FTRACE default y help - This option will modify all the calls to ftrace dynamically - (will patch them out of the binary image and replaces them - with a No-Op instruction) as they are called. A table is - created to dynamically enable them again. + This option will modify all the calls to ftrace dynamically + (will patch them out of the binary image and replace them + with a No-Op instruction) as they are called. A table is + created to dynamically enable them again. - This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise - has native performance as long as no tracing is active. + This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but + otherwise has native performance as long as no tracing is active. - The changes to the code are done by a kernel thread that - wakes up once a second and checks to see if any ftrace calls - were made. If so, it runs stop_machine (stops all CPUS) - and modifies the code to jump over the call to ftrace. + The changes to the code are done by a kernel thread that + wakes up once a second and checks to see if any ftrace calls + were made. If so, it runs stop_machine (stops all CPUS) + and modifies the code to jump over the call to ftrace. config FUNCTION_PROFILER bool "Kernel function profiler" depends on FUNCTION_TRACER default n help - This option enables the kernel function profiler. A file is created - in debugfs called function_profile_enabled which defaults to zero. - When a 1 is echoed into this file profiling begins, and when a - zero is entered, profiling stops. A file in the trace_stats - directory called functions, that show the list of functions that - have been hit and their counters. + This option enables the kernel function profiler. A file is created + in debugfs called function_profile_enabled which defaults to zero. + When a 1 is echoed into this file profiling begins, and when a + zero is entered, profiling stops. A "functions" file is created in + the trace_stats directory; this file shows the list of functions that + have been hit and their counters. - If in doubt, say N + If in doubt, say N. config FTRACE_MCOUNT_RECORD def_bool y @@ -556,8 +556,8 @@ config RING_BUFFER_BENCHMARK tristate "Ring buffer benchmark stress tester" depends on RING_BUFFER help - This option creates a test to stress the ring buffer and bench mark it. - It creates its own ring buffer such that it will not interfer with + This option creates a test to stress the ring buffer and benchmark it. + It creates its own ring buffer such that it will not interfere with any other users of the ring buffer (such as ftrace). It then creates a producer and consumer that will run for 10 seconds and sleep for 10 seconds. Each interval it will print out the number of events @@ -566,7 +566,7 @@ config RING_BUFFER_BENCHMARK It does not disable interrupts or raise its priority, so it may be affected by processes that are running. - If unsure, say N + If unsure, say N. endif # FTRACE diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8b9f20a..0df1b0f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3949,7 +3949,7 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, if (!!(topt->flags->val & topt->opt->bit) != val) { mutex_lock(&trace_types_lock); ret = __set_tracer_option(current_trace, topt->flags, - topt->opt, val); + topt->opt, !val); mutex_unlock(&trace_types_lock); if (ret) return ret; diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 458e5bf..d4fa5dc 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -158,7 +158,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ - sizeof(field.item), 0, FILTER_OTHER); \ + sizeof(field.item), \ + is_signed_type(type), FILTER_OTHER); \ if (ret) \ return ret; @@ -168,8 +169,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), \ container.item), \ - sizeof(field.container.item), 0, \ - FILTER_OTHER); \ + sizeof(field.container.item), \ + is_signed_type(type), FILTER_OTHER); \ if (ret) \ return ret; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 375f81a..6ea90c0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1201,10 +1201,11 @@ static int __probe_event_show_format(struct trace_seq *s, #undef SHOW_FIELD #define SHOW_FIELD(type, item, name) \ do { \ - ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \ - "offset:%u;\tsize:%u;\n", name, \ + ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \ + "offset:%u;\tsize:%u;\tsigned:%d;\n", name,\ (unsigned int)offsetof(typeof(field), item),\ - (unsigned int)sizeof(type)); \ + (unsigned int)sizeof(type), \ + is_signed_type(type)); \ if (!ret) \ return 0; \ } while (0) diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index faf37fa..94103cd 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -26,12 +26,13 @@ #include #include "trace_output.h" -#include "trace_stat.h" #include "trace.h" #include #include +#include + /* * For now, let us restrict the no. of symbols traced simultaneously to number * of available hardware breakpoint registers. @@ -44,7 +45,7 @@ struct trace_ksym { struct perf_event **ksym_hbp; struct perf_event_attr attr; #ifdef CONFIG_PROFILE_KSYM_TRACER - unsigned long counter; + atomic64_t counter; #endif struct hlist_node ksym_hlist; }; @@ -69,9 +70,8 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) rcu_read_lock(); hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { - if ((entry->attr.bp_addr == hbp_hit_addr) && - (entry->counter <= MAX_UL_INT)) { - entry->counter++; + if (entry->attr.bp_addr == hbp_hit_addr) { + atomic64_inc(&entry->counter); break; } } @@ -197,7 +197,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) entry->attr.bp_addr = addr; entry->attr.bp_len = HW_BREAKPOINT_LEN_4; - ret = -EAGAIN; entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr, ksym_hbp_handler); @@ -300,8 +299,8 @@ static ssize_t ksym_trace_filter_write(struct file *file, * 2: echo 0 > ksym_trace_filter * 3: echo "*:---" > ksym_trace_filter */ - if (!buf[0] || !strcmp(buf, "0") || - !strcmp(buf, "*:---")) { + if (!input_string[0] || !strcmp(input_string, "0") || + !strcmp(input_string, "*:---")) { __ksym_trace_reset(); ret = 0; goto out; @@ -444,102 +443,77 @@ struct tracer ksym_tracer __read_mostly = .print_line = ksym_trace_output }; -__init static int init_ksym_trace(void) -{ - struct dentry *d_tracer; - struct dentry *entry; - - d_tracer = tracing_init_dentry(); - ksym_filter_entry_count = 0; - - entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer, - NULL, &ksym_tracing_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'ksym_trace_filter' file\n"); - - return register_tracer(&ksym_tracer); -} -device_initcall(init_ksym_trace); - - #ifdef CONFIG_PROFILE_KSYM_TRACER -static int ksym_tracer_stat_headers(struct seq_file *m) +static int ksym_profile_show(struct seq_file *m, void *v) { + struct hlist_node *node; + struct trace_ksym *entry; + int access_type = 0; + char fn_name[KSYM_NAME_LEN]; + seq_puts(m, " Access Type "); seq_puts(m, " Symbol Counter\n"); seq_puts(m, " ----------- "); seq_puts(m, " ------ -------\n"); - return 0; -} -static int ksym_tracer_stat_show(struct seq_file *m, void *v) -{ - struct hlist_node *stat = v; - struct trace_ksym *entry; - int access_type = 0; - char fn_name[KSYM_NAME_LEN]; + rcu_read_lock(); + hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { - entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); + access_type = entry->attr.bp_type; - access_type = entry->attr.bp_type; + switch (access_type) { + case HW_BREAKPOINT_R: + seq_puts(m, " R "); + break; + case HW_BREAKPOINT_W: + seq_puts(m, " W "); + break; + case HW_BREAKPOINT_R | HW_BREAKPOINT_W: + seq_puts(m, " RW "); + break; + default: + seq_puts(m, " NA "); + } - switch (access_type) { - case HW_BREAKPOINT_R: - seq_puts(m, " R "); - break; - case HW_BREAKPOINT_W: - seq_puts(m, " W "); - break; - case HW_BREAKPOINT_R | HW_BREAKPOINT_W: - seq_puts(m, " RW "); - break; - default: - seq_puts(m, " NA "); + if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0) + seq_printf(m, " %-36s", fn_name); + else + seq_printf(m, " %-36s", ""); + seq_printf(m, " %15llu\n", + (unsigned long long)atomic64_read(&entry->counter)); } - - if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0) - seq_printf(m, " %-36s", fn_name); - else - seq_printf(m, " %-36s", ""); - seq_printf(m, " %15lu\n", entry->counter); + rcu_read_unlock(); return 0; } -static void *ksym_tracer_stat_start(struct tracer_stat *trace) +static int ksym_profile_open(struct inode *node, struct file *file) { - return ksym_filter_head.first; -} - -static void * -ksym_tracer_stat_next(void *v, int idx) -{ - struct hlist_node *stat = v; - - return stat->next; + return single_open(file, ksym_profile_show, NULL); } -static struct tracer_stat ksym_tracer_stats = { - .name = "ksym_tracer", - .stat_start = ksym_tracer_stat_start, - .stat_next = ksym_tracer_stat_next, - .stat_headers = ksym_tracer_stat_headers, - .stat_show = ksym_tracer_stat_show +static const struct file_operations ksym_profile_fops = { + .open = ksym_profile_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, }; +#endif /* CONFIG_PROFILE_KSYM_TRACER */ -__init static int ksym_tracer_stat_init(void) +__init static int init_ksym_trace(void) { - int ret; + struct dentry *d_tracer; - ret = register_stat_tracer(&ksym_tracer_stats); - if (ret) { - printk(KERN_WARNING "Warning: could not register " - "ksym tracer stats\n"); - return 1; - } + d_tracer = tracing_init_dentry(); - return 0; + trace_create_file("ksym_trace_filter", 0644, d_tracer, + NULL, &ksym_tracing_fops); + +#ifdef CONFIG_PROFILE_KSYM_TRACER + trace_create_file("ksym_profile", 0444, d_tracer, + NULL, &ksym_profile_fops); +#endif + + return register_tracer(&ksym_tracer); } -fs_initcall(ksym_tracer_stat_init); -#endif /* CONFIG_PROFILE_KSYM_TRACER */ +device_initcall(init_ksym_trace); diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d9b08e0..cf90620 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -670,12 +670,13 @@ static int device_dma_allocations(struct device *dev) return count; } -static int dma_debug_device_change(struct notifier_block *nb, - unsigned long action, void *data) +static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; int count; + if (global_disable) + return 0; switch (action) { case BUS_NOTIFY_UNBOUND_DRIVER: @@ -697,6 +698,9 @@ void dma_debug_add_bus(struct bus_type *bus) { struct notifier_block *nb; + if (global_disable) + return; + nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); if (nb == NULL) { pr_err("dma_debug_add_bus: out of memory\n"); diff --git a/mm/mmap.c b/mm/mmap.c index d9c77b2..ee22989 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1043,6 +1043,46 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, } EXPORT_SYMBOL(do_mmap_pgoff); +SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, pgoff) +{ + struct file *file = NULL; + unsigned long retval = -EBADF; + + if (!(flags & MAP_ANONYMOUS)) { + if (unlikely(flags & MAP_HUGETLB)) + return -EINVAL; + file = fget(fd); + if (!file) + goto out; + } else if (flags & MAP_HUGETLB) { + struct user_struct *user = NULL; + /* + * VM_NORESERVE is used because the reservations will be + * taken when vm_ops->mmap() is called + * A dummy user value is used because we are not locking + * memory so no accounting is necessary + */ + len = ALIGN(len, huge_page_size(&default_hstate)); + file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, + &user, HUGETLB_ANONHUGE_INODE); + if (IS_ERR(file)) + return PTR_ERR(file); + } + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + + down_write(¤t->mm->mmap_sem); + retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + + if (file) + fput(file); +out: + return retval; +} + /* * Some shared mappigns will want the pages marked read-only * to track write events. If so, we'll downgrade vm_page_prot diff --git a/mm/nommu.c b/mm/nommu.c index 8687973..6f9248f 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1398,6 +1398,31 @@ error_getting_region: } EXPORT_SYMBOL(do_mmap_pgoff); +SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, pgoff) +{ + struct file *file = NULL; + unsigned long retval = -EBADF; + + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + goto out; + } + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + + down_write(¤t->mm->mmap_sem); + retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + + if (file) + fput(file); +out: + return retval; +} + /* * split a vma into two pieces at address 'addr', a new vma is allocated either * for the first part or the tail. diff --git a/mm/slab.c b/mm/slab.c index 7d41f15..7451bda 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -654,7 +654,7 @@ static void init_node_lock_keys(int q) l3 = s->cs_cachep->nodelists[q]; if (!l3 || OFF_SLAB(s->cs_cachep)) - return; + continue; lockdep_set_class(&l3->list_lock, &on_slab_l3_key); alc = l3->alien; /* @@ -665,7 +665,7 @@ static void init_node_lock_keys(int q) * for alloc_alien_cache, */ if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) - return; + continue; for_each_node(r) { if (alc[r]) lockdep_set_class(&alc[r]->lock, diff --git a/mm/util.c b/mm/util.c index b377ce4..7c35ad9 100644 --- a/mm/util.c +++ b/mm/util.c @@ -4,10 +4,6 @@ #include #include #include -#include -#include -#include -#include #include #define CREATE_TRACE_POINTS @@ -272,46 +268,6 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start, } EXPORT_SYMBOL_GPL(get_user_pages_fast); -SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, - unsigned long, prot, unsigned long, flags, - unsigned long, fd, unsigned long, pgoff) -{ - struct file * file = NULL; - unsigned long retval = -EBADF; - - if (!(flags & MAP_ANONYMOUS)) { - if (unlikely(flags & MAP_HUGETLB)) - return -EINVAL; - file = fget(fd); - if (!file) - goto out; - } else if (flags & MAP_HUGETLB) { - struct user_struct *user = NULL; - /* - * VM_NORESERVE is used because the reservations will be - * taken when vm_ops->mmap() is called - * A dummy user value is used because we are not locking - * memory so no accounting is necessary - */ - len = ALIGN(len, huge_page_size(&default_hstate)); - file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, - &user, HUGETLB_ANONHUGE_INODE); - if (IS_ERR(file)) - return PTR_ERR(file); - } - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - - down_write(¤t->mm->mmap_sem); - retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return retval; -} - /* Tracepoints definitions. */ EXPORT_TRACEPOINT_SYMBOL(kmalloc); EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index a23b45f..de0c2c7 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -250,8 +250,7 @@ struct pktgen_dev { __u64 count; /* Default No packets to send */ __u64 sofar; /* How many pkts we've sent so far */ __u64 tx_bytes; /* How many bytes we've transmitted */ - __u64 errors; /* Errors when trying to transmit, - pkts will be re-sent */ + __u64 errors; /* Errors when trying to transmit, */ /* runtime counters relating to clone_skb */ @@ -3465,6 +3464,12 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->seq_num++; pkt_dev->tx_bytes += pkt_dev->last_pkt_size; break; + case NET_XMIT_DROP: + case NET_XMIT_CN: + case NET_XMIT_POLICED: + /* skb has been consumed */ + pkt_dev->errors++; + break; default: /* Drivers are not supposed to return other values! */ if (net_ratelimit()) pr_info("pktgen: %s xmit error: %d\n", diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5cdbc10..040c4f0 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1397,6 +1397,7 @@ static struct devinet_sysctl_table { DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, "accept_source_route"), DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), + DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"), DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3323168..82dbf71 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -252,6 +252,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, no_addr = in_dev->ifa_list == NULL; rpf = IN_DEV_RPFILTER(in_dev); accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); + if (mark && !IN_DEV_SRC_VMARK(in_dev)) + fl.mark = 0; } rcu_read_unlock(); diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 3787455..d7dcee6 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -34,9 +34,28 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, ht_cap->ht_supported = true; - ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) & sband->ht_cap.cap; - ht_cap->cap &= ~IEEE80211_HT_CAP_SM_PS; - ht_cap->cap |= sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS; + /* + * The bits listed in this expression should be + * the same for the peer and us, if the station + * advertises more then we can't use those thus + * we mask them out. + */ + ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) & + (sband->ht_cap.cap | + ~(IEEE80211_HT_CAP_LDPC_CODING | + IEEE80211_HT_CAP_SUP_WIDTH_20_40 | + IEEE80211_HT_CAP_GRN_FLD | + IEEE80211_HT_CAP_SGI_20 | + IEEE80211_HT_CAP_SGI_40 | + IEEE80211_HT_CAP_DSSSCCK40)); + /* + * The STBC bits are asymmetric -- if we don't have + * TX then mask out the peer's RX and vice versa. + */ + if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC)) + ht_cap->cap &= ~IEEE80211_HT_CAP_RX_STBC; + if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC)) + ht_cap->cap &= ~IEEE80211_HT_CAP_TX_STBC; ampdu_info = ht_cap_ie->ampdu_params_info; ht_cap->ampdu_factor = diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 10d1385..1f2db64 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -382,6 +382,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, u8 *bssid,u8 *addr, u32 supp_rates) { + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; struct sta_info *sta; int band = local->hw.conf.channel->band; @@ -397,6 +398,9 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, return NULL; } + if (ifibss->state == IEEE80211_IBSS_MLME_SEARCH) + return NULL; + if (compare_ether_addr(bssid, sdata->u.ibss.bssid)) return NULL; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 8116d1a..0d2d948 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -515,6 +515,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) * and we need some headroom for passing the frame to monitor * interfaces, but never both at the same time. */ + BUILD_BUG_ON(IEEE80211_TX_STATUS_HEADROOM != + sizeof(struct ieee80211_tx_status_rtap_hdr)); local->tx_headroom = max_t(unsigned int , local->hw.extra_tx_headroom, sizeof(struct ieee80211_tx_status_rtap_hdr)); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d8d50fb..c79e59f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -915,6 +915,14 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL | IEEE80211_STA_BEACON_POLL); + /* + * Always handle WMM once after association regardless + * of the first value the AP uses. Setting -1 here has + * that effect because the AP values is an unsigned + * 4-bit value. + */ + sdata->u.mgd.wmm_last_param_set = -1; + ieee80211_led_assoc(local, 1); sdata->vif.bss_conf.assoc = 1; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8834cc9..27ceaef 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1419,6 +1419,10 @@ static bool need_dynamic_ps(struct ieee80211_local *local) if (!local->ps_sdata) return false; + /* No point if we're going to suspend */ + if (local->quiescing) + return false; + return true; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 78a6e92..dc76267 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1039,7 +1039,19 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* restart hardware */ if (local->open_count) { + /* + * Upon resume hardware can sometimes be goofy due to + * various platform / driver / bus issues, so restarting + * the device may at times not work immediately. Propagate + * the error. + */ res = drv_start(local); + if (res) { + WARN(local->suspended, "Harware became unavailable " + "upon resume. This is could be a software issue" + "prior to suspend or a harware issue\n"); + return res; + } ieee80211_led_radio(local, true); } diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 1001db4..82e6002 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -93,7 +93,18 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len) } } - WARN_ON(!bss); + /* + * We might be coming here because the driver reported + * a successful association at the same time as the + * user requested a deauth. In that case, we will have + * removed the BSS from the auth_bsses list due to the + * deauth request when the assoc response makes it. If + * the two code paths acquire the lock the other way + * around, that's just the standard situation of a + * deauth being requested while connected. + */ + if (!bss) + goto out; } else if (wdev->conn) { cfg80211_sme_failed_assoc(wdev); /* diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 12dfa62..0c2cbbe 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -601,7 +601,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, struct cfg80211_registered_device *rdev; struct wiphy *wiphy; struct iw_scan_req *wreq = NULL; - struct cfg80211_scan_request *creq; + struct cfg80211_scan_request *creq = NULL; int i, err, n_channels = 0; enum ieee80211_band band; @@ -694,8 +694,10 @@ int cfg80211_wext_siwscan(struct net_device *dev, /* translate "Scan for SSID" request */ if (wreq) { if (wrqu->data.flags & IW_SCAN_THIS_ESSID) { - if (wreq->essid_len > IEEE80211_MAX_SSID_LEN) - return -EINVAL; + if (wreq->essid_len > IEEE80211_MAX_SSID_LEN) { + err = -EINVAL; + goto out; + } memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len); creq->ssids[0].ssid_len = wreq->essid_len; } @@ -707,12 +709,15 @@ int cfg80211_wext_siwscan(struct net_device *dev, err = rdev->ops->scan(wiphy, dev, creq); if (err) { rdev->scan_req = NULL; - kfree(creq); + /* creq will be freed below */ } else { nl80211_send_scan_start(rdev, dev); + /* creq now owned by driver */ + creq = NULL; dev_hold(dev); } out: + kfree(creq); cfg80211_unlock_rdev(rdev); return err; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cb81ca3..4725a54 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1445,7 +1445,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, if (!dev) goto free_dst; - /* Copy neighbout for reachability confirmation */ + /* Copy neighbour for reachability confirmation */ dst0->neighbour = neigh_clone(dst->neighbour); xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); diff --git a/sound/arm/aaci.c b/sound/arm/aaci.c index c569986..656e474 100644 --- a/sound/arm/aaci.c +++ b/sound/arm/aaci.c @@ -441,6 +441,7 @@ static int aaci_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { int err; + struct aaci *aaci = substream->private_data; aaci_pcm_hw_free(substream); if (aacirun->pcm_open) { @@ -560,7 +561,6 @@ static int aaci_pcm_open(struct snd_pcm_substream *substream) static int aaci_pcm_playback_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { - struct aaci *aaci = substream->private_data; struct aaci_runtime *aacirun = substream->runtime->private_data; unsigned int channels = params_channels(params); int ret; @@ -659,7 +659,6 @@ static struct snd_pcm_ops aaci_playback_ops = { static int aaci_pcm_capture_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { - struct aaci *aaci = substream->private_data; struct aaci_runtime *aacirun = substream->runtime->private_data; int ret; diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 29ab46a..25b0641 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -1918,13 +1918,13 @@ int snd_pcm_hw_constraints_complete(struct snd_pcm_substream *substream) err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_RATE, hw->rate_min, hw->rate_max); - if (err < 0) - return err; + if (err < 0) + return err; err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIOD_BYTES, hw->period_bytes_min, hw->period_bytes_max); - if (err < 0) - return err; + if (err < 0) + return err; err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIODS, hw->periods_min, hw->periods_max); diff --git a/sound/pci/hda/hda_beep.c b/sound/pci/hda/hda_beep.c index 5fe34a8..e4581a4 100644 --- a/sound/pci/hda/hda_beep.c +++ b/sound/pci/hda/hda_beep.c @@ -42,7 +42,7 @@ static void snd_hda_generate_beep(struct work_struct *work) return; /* generate tone */ - snd_hda_codec_write_cache(codec, beep->nid, 0, + snd_hda_codec_write(codec, beep->nid, 0, AC_VERB_SET_BEEP_CONTROL, beep->tone); } @@ -119,7 +119,7 @@ static void snd_hda_do_detach(struct hda_beep *beep) beep->dev = NULL; cancel_work_sync(&beep->beep_work); /* turn off beep for sure */ - snd_hda_codec_write_cache(beep->codec, beep->nid, 0, + snd_hda_codec_write(beep->codec, beep->nid, 0, AC_VERB_SET_BEEP_CONTROL, 0); } @@ -192,7 +192,7 @@ int snd_hda_enable_beep_device(struct hda_codec *codec, int enable) beep->enabled = enable; if (!enable) { /* turn off beep */ - snd_hda_codec_write_cache(beep->codec, beep->nid, 0, + snd_hda_codec_write(beep->codec, beep->nid, 0, AC_VERB_SET_BEEP_CONTROL, 0); } if (beep->mode == HDA_BEEP_MODE_SWREG) { @@ -239,8 +239,12 @@ int snd_hda_attach_beep_device(struct hda_codec *codec, int nid) mutex_init(&beep->mutex); if (beep->mode == HDA_BEEP_MODE_ON) { - beep->enabled = 1; - snd_hda_do_register(&beep->register_work); + int err = snd_hda_do_attach(beep); + if (err < 0) { + kfree(beep); + codec->beep = NULL; + return err; + } } return 0; @@ -253,7 +257,7 @@ void snd_hda_detach_beep_device(struct hda_codec *codec) if (beep) { cancel_work_sync(&beep->register_work); cancel_delayed_work(&beep->unregister_work); - if (beep->enabled) + if (beep->dev) snd_hda_do_detach(beep); codec->beep = NULL; kfree(beep); diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 950ee5c..f98b47c 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1327,11 +1327,13 @@ EXPORT_SYMBOL_HDA(snd_hda_query_pin_caps); */ u32 snd_hda_pin_sense(struct hda_codec *codec, hda_nid_t nid) { - u32 pincap = snd_hda_query_pin_caps(codec, nid); - - if (pincap & AC_PINCAP_TRIG_REQ) /* need trigger? */ - snd_hda_codec_read(codec, nid, 0, AC_VERB_SET_PIN_SENSE, 0); + u32 pincap; + if (!codec->no_trigger_sense) { + pincap = snd_hda_query_pin_caps(codec, nid); + if (pincap & AC_PINCAP_TRIG_REQ) /* need trigger? */ + snd_hda_codec_read(codec, nid, 0, AC_VERB_SET_PIN_SENSE, 0); + } return snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_PIN_SENSE, 0); } diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h index 1d541b7..0a770a2 100644 --- a/sound/pci/hda/hda_codec.h +++ b/sound/pci/hda/hda_codec.h @@ -817,6 +817,7 @@ struct hda_codec { unsigned int pin_amp_workaround:1; /* pin out-amp takes index * (e.g. Conexant codecs) */ + unsigned int no_trigger_sense:1; /* don't trigger at pin-sensing */ #ifdef CONFIG_SND_HDA_POWER_SAVE unsigned int power_on :1; /* current (global) power-state */ unsigned int power_transition :1; /* power-state in transition */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index ff8ad46..ec9c348 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -356,6 +356,7 @@ struct azx_dev { */ unsigned char stream_tag; /* assigned stream */ unsigned char index; /* stream index */ + int device; /* last device number assigned to */ unsigned int opened :1; unsigned int running :1; @@ -1441,10 +1442,13 @@ static int __devinit azx_codec_configure(struct azx *chip) */ /* assign a stream for the PCM */ -static inline struct azx_dev *azx_assign_device(struct azx *chip, int stream) +static inline struct azx_dev * +azx_assign_device(struct azx *chip, struct snd_pcm_substream *substream) { int dev, i, nums; - if (stream == SNDRV_PCM_STREAM_PLAYBACK) { + struct azx_dev *res = NULL; + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { dev = chip->playback_index_offset; nums = chip->playback_streams; } else { @@ -1453,10 +1457,15 @@ static inline struct azx_dev *azx_assign_device(struct azx *chip, int stream) } for (i = 0; i < nums; i++, dev++) if (!chip->azx_dev[dev].opened) { - chip->azx_dev[dev].opened = 1; - return &chip->azx_dev[dev]; + res = &chip->azx_dev[dev]; + if (res->device == substream->pcm->device) + break; } - return NULL; + if (res) { + res->opened = 1; + res->device = substream->pcm->device; + } + return res; } /* release the assigned stream */ @@ -1505,7 +1514,7 @@ static int azx_pcm_open(struct snd_pcm_substream *substream) int err; mutex_lock(&chip->open_mutex); - azx_dev = azx_assign_device(chip, substream->stream); + azx_dev = azx_assign_device(chip, substream); if (azx_dev == NULL) { mutex_unlock(&chip->open_mutex); return -EBUSY; diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 1a36137..69a941c 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -1186,6 +1186,8 @@ static int patch_ad1986a(struct hda_codec *codec) */ spec->multiout.no_share_stream = 1; + codec->no_trigger_sense = 1; + return 0; } @@ -1371,6 +1373,8 @@ static int patch_ad1983(struct hda_codec *codec) codec->patch_ops = ad198x_patch_ops; + codec->no_trigger_sense = 1; + return 0; } @@ -1813,6 +1817,9 @@ static int patch_ad1981(struct hda_codec *codec) codec->patch_ops.unsol_event = ad1981_hp_unsol_event; break; } + + codec->no_trigger_sense = 1; + return 0; } @@ -3118,6 +3125,8 @@ static int patch_ad1988(struct hda_codec *codec) #endif spec->vmaster_nid = 0x04; + codec->no_trigger_sense = 1; + return 0; } @@ -3330,6 +3339,8 @@ static int patch_ad1884(struct hda_codec *codec) codec->patch_ops = ad198x_patch_ops; + codec->no_trigger_sense = 1; + return 0; } @@ -4287,6 +4298,8 @@ static int patch_ad1884a(struct hda_codec *codec) break; } + codec->no_trigger_sense = 1; + return 0; } @@ -4623,6 +4636,9 @@ static int patch_ad1882(struct hda_codec *codec) spec->mixers[2] = ad1882_6stack_mixers; break; } + + codec->no_trigger_sense = 1; + return 0; } diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index eeda7be..2291a83 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -4453,14 +4453,7 @@ static inline int get_pin_presence(struct hda_codec *codec, hda_nid_t nid) { if (!nid) return 0; - /* NOTE: we can't use snd_hda_jack_detect() here because STAC/IDT - * codecs behave wrongly when SET_PIN_SENSE is triggered, although - * the pincap gives TRIG_REQ bit. - */ - if (snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_PIN_SENSE, 0) & - AC_PINSENSE_PRESENCE) - return 1; - return 0; + return snd_hda_jack_detect(codec, nid); } static void stac92xx_line_out_detect(struct hda_codec *codec, @@ -4962,6 +4955,7 @@ static int patch_stac9200(struct hda_codec *codec) if (spec == NULL) return -ENOMEM; + codec->no_trigger_sense = 1; codec->spec = spec; spec->num_pins = ARRAY_SIZE(stac9200_pin_nids); spec->pin_nids = stac9200_pin_nids; @@ -5024,6 +5018,7 @@ static int patch_stac925x(struct hda_codec *codec) if (spec == NULL) return -ENOMEM; + codec->no_trigger_sense = 1; codec->spec = spec; spec->num_pins = ARRAY_SIZE(stac925x_pin_nids); spec->pin_nids = stac925x_pin_nids; @@ -5108,6 +5103,7 @@ static int patch_stac92hd73xx(struct hda_codec *codec) if (spec == NULL) return -ENOMEM; + codec->no_trigger_sense = 1; codec->spec = spec; codec->slave_dig_outs = stac92hd73xx_slave_dig_outs; spec->num_pins = ARRAY_SIZE(stac92hd73xx_pin_nids); @@ -5255,6 +5251,7 @@ static int patch_stac92hd83xxx(struct hda_codec *codec) if (spec == NULL) return -ENOMEM; + codec->no_trigger_sense = 1; codec->spec = spec; codec->slave_dig_outs = stac92hd83xxx_slave_dig_outs; spec->digbeep_nid = 0x21; @@ -5418,6 +5415,7 @@ static int patch_stac92hd71bxx(struct hda_codec *codec) if (spec == NULL) return -ENOMEM; + codec->no_trigger_sense = 1; codec->spec = spec; codec->patch_ops = stac92xx_patch_ops; spec->num_pins = STAC92HD71BXX_NUM_PINS; @@ -5661,6 +5659,7 @@ static int patch_stac922x(struct hda_codec *codec) if (spec == NULL) return -ENOMEM; + codec->no_trigger_sense = 1; codec->spec = spec; spec->num_pins = ARRAY_SIZE(stac922x_pin_nids); spec->pin_nids = stac922x_pin_nids; @@ -5764,6 +5763,7 @@ static int patch_stac927x(struct hda_codec *codec) if (spec == NULL) return -ENOMEM; + codec->no_trigger_sense = 1; codec->spec = spec; codec->slave_dig_outs = stac927x_slave_dig_outs; spec->num_pins = ARRAY_SIZE(stac927x_pin_nids); @@ -5898,6 +5898,7 @@ static int patch_stac9205(struct hda_codec *codec) if (spec == NULL) return -ENOMEM; + codec->no_trigger_sense = 1; codec->spec = spec; spec->num_pins = ARRAY_SIZE(stac9205_pin_nids); spec->pin_nids = stac9205_pin_nids; @@ -6053,6 +6054,7 @@ static int patch_stac9872(struct hda_codec *codec) spec = kzalloc(sizeof(*spec), GFP_KERNEL); if (spec == NULL) return -ENOMEM; + codec->no_trigger_sense = 1; codec->spec = spec; spec->num_pins = ARRAY_SIZE(stac9872_pin_nids); spec->pin_nids = stac9872_pin_nids; diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 4390d22..652a470 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -343,13 +343,18 @@ LIB_H += util/include/linux/string.h LIB_H += util/include/linux/types.h LIB_H += util/include/asm/asm-offsets.h LIB_H += util/include/asm/bitops.h +LIB_H += util/include/asm/bug.h LIB_H += util/include/asm/byteorder.h LIB_H += util/include/asm/swab.h LIB_H += util/include/asm/system.h LIB_H += util/include/asm/uaccess.h LIB_H += perf.h +LIB_H += util/cache.h +LIB_H += util/callchain.h +LIB_H += util/debug.h LIB_H += util/debugfs.h LIB_H += util/event.h +LIB_H += util/exec_cmd.h LIB_H += util/types.h LIB_H += util/levenshtein.h LIB_H += util/parse-options.h @@ -362,6 +367,7 @@ LIB_H += util/session.h LIB_H += util/strbuf.h LIB_H += util/string.h LIB_H += util/strlist.h +LIB_H += util/svghelper.h LIB_H += util/run-command.h LIB_H += util/sigchain.h LIB_H += util/symbol.h @@ -370,6 +376,8 @@ LIB_H += util/values.h LIB_H += util/sort.h LIB_H += util/hist.h LIB_H += util/thread.h +LIB_H += util/trace-event.h +LIB_H += util/trace-event-perl.h LIB_H += util/probe-finder.h LIB_H += util/probe-event.h diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index e693e67..1e99ac8 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -17,7 +17,7 @@ static char const *input_name = "perf.data"; static int force; -static const char *const buildid_list_usage[] = { +static const char * const buildid_list_usage[] = { "perf buildid-list []", NULL }; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 4d33b55..bd71b8c 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -189,8 +189,9 @@ out_delete: return ret; } -static const char *const diff_usage[] = { +static const char * const diff_usage[] = { "perf diff [] [old_file] [new_file]", + NULL, }; static const struct option options[] = { diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index fc21ad7..7ceb741 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -137,7 +137,7 @@ static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, if (data && data->ptr == ptr) { data->hit++; data->bytes_req += bytes_req; - data->bytes_alloc += bytes_req; + data->bytes_alloc += bytes_alloc; } else { data = malloc(sizeof(*data)); if (!data) @@ -177,7 +177,7 @@ static void insert_caller_stat(unsigned long call_site, if (data && data->call_site == call_site) { data->hit++; data->bytes_req += bytes_req; - data->bytes_alloc += bytes_req; + data->bytes_alloc += bytes_alloc; } else { data = malloc(sizeof(*data)); if (!data) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 63136d0..2654253 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -402,7 +402,7 @@ static void atexit_header(void) perf_header__write(&session->header, output, true); } -static int __cmd_record(int argc __used, const char **argv) +static int __cmd_record(int argc, const char **argv) { int i, counter; struct stat st; @@ -411,6 +411,7 @@ static int __cmd_record(int argc __used, const char **argv) int err; unsigned long waking = 0; int child_ready_pipe[2], go_pipe[2]; + const bool forks = target_pid == -1 && argc > 0; char buf; page_size = sysconf(_SC_PAGE_SIZE); @@ -422,7 +423,7 @@ static int __cmd_record(int argc __used, const char **argv) signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); - if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) { + if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { perror("failed to create pipes"); exit(-1); } @@ -483,7 +484,7 @@ static int __cmd_record(int argc __used, const char **argv) atexit(atexit_header); - if (target_pid == -1) { + if (forks) { pid = fork(); if (pid < 0) { perror("failed to fork"); @@ -550,7 +551,7 @@ static int __cmd_record(int argc __used, const char **argv) return err; } - if (!system_wide) + if (!system_wide && profile_cpu == -1) event__synthesize_thread(pid, process_synthesized_event, session); else @@ -569,7 +570,8 @@ static int __cmd_record(int argc __used, const char **argv) /* * Let the child rip */ - close(go_pipe[1]); + if (forks) + close(go_pipe[1]); for (;;) { int hits = samples; @@ -667,7 +669,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) argc = parse_options(argc, argv, options, record_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc && target_pid == -1 && (!system_wide || profile_cpu == -1)) + if (!argc && target_pid == -1 && !system_wide && profile_cpu == -1) usage_with_options(record_usage, options); symbol__init(); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5c2ab53..db10c0e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -294,8 +294,7 @@ setup: return 0; } -//static const char * const report_usage[] = { -const char * const report_usage[] = { +static const char * const report_usage[] = { "perf report [] ", NULL }; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e2285e2..574a215 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -512,7 +512,7 @@ static char *get_script_path(const char *script_root, const char *suffix) return path; } -static const char * const annotate_usage[] = { +static const char * const trace_usage[] = { "perf trace [] ", NULL }; @@ -581,7 +581,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) setup_scripting(); - argc = parse_options(argc, argv, options, annotate_usage, + argc = parse_options(argc, argv, options, trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (symbol__init() < 0) diff --git a/tools/perf/design.txt b/tools/perf/design.txt index f000c30..8d0de51 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -21,7 +21,7 @@ There's one file descriptor per virtual counter used. The special file descriptor is opened via the perf_event_open() system call: - int sys_perf_event_open(struct perf_event_hw_event *hw_event_uptr, + int sys_perf_event_open(struct perf_event_attr *hw_event_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags); @@ -32,9 +32,9 @@ can be used to set the blocking mode, etc. Multiple counters can be kept open at a time, and the counters can be poll()ed. -When creating a new counter fd, 'perf_event_hw_event' is: +When creating a new counter fd, 'perf_event_attr' is: -struct perf_event_hw_event { +struct perf_event_attr { /* * The MSB of the config word signifies if the rest contains cpu * specific (raw) counter configuration data, if unset, the next @@ -399,7 +399,7 @@ Notification of new events is possible through poll()/select()/epoll() and fcntl() managing signals. Normally a notification is generated for every page filled, however one can -additionally set perf_event_hw_event.wakeup_events to generate one every +additionally set perf_event_attr.wakeup_events to generate one every so many counter overflow events. Future work will include a splice() interface to the ring-buffer. diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index fd9c097..f73de63 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -508,8 +508,8 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *match; struct pci_dev *dev; - down_read(&kvm->slots_lock); mutex_lock(&kvm->lock); + down_read(&kvm->slots_lock); match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, assigned_dev->assigned_dev_id); @@ -573,8 +573,8 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, } out: - mutex_unlock(&kvm->lock); up_read(&kvm->slots_lock); + mutex_unlock(&kvm->lock); return r; out_list_del: list_del(&match->list); @@ -585,8 +585,8 @@ out_put: pci_dev_put(dev); out_free: kfree(match); - mutex_unlock(&kvm->lock); up_read(&kvm->slots_lock); + mutex_unlock(&kvm->lock); return r; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b5af881..a944be3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -64,7 +64,7 @@ MODULE_LICENSE("GPL"); /* * Ordering of locks: * - * kvm->slots_lock --> kvm->lock --> kvm->irq_lock + * kvm->lock --> kvm->slots_lock --> kvm->irq_lock */ DEFINE_SPINLOCK(kvm_lock); @@ -406,8 +406,11 @@ static struct kvm *kvm_create_vm(void) out: return kvm; +#if defined(KVM_COALESCED_MMIO_PAGE_OFFSET) || \ + (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)) out_err: hardware_disable_all(); +#endif out_err_nodisable: kfree(kvm); return ERR_PTR(r);