From 31ba7346f082f3468b8e06b45db475a6e25f01fc Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:00:53 +0000 Subject: [PATCH 0001/3163] slab: Use proper formatting specs for unsigned size_t Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slab_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 3f3cd97d3fdf..53adfbf2f3b2 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -299,7 +299,7 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t siz err = __kmem_cache_create(s, flags); if (err) - panic("Creation of kmalloc slab %s size=%zd failed. Reason %d\n", + panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n", name, size, err); s->refcount = -1; /* Exempt from merging for now */ -- GitLab From 345046673449b5c35840e5cc34a60059cbec9305 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:00:53 +0000 Subject: [PATCH 0002/3163] slab: Move kmalloc related function defs Move these functions higher up in slab.h so that they are grouped with other generic kmalloc related definitions. Acked-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 5d168d7e0a28..ccbb37685c6c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -147,6 +147,15 @@ void kmem_cache_free(struct kmem_cache *, void *); sizeof(struct __struct), __alignof__(struct __struct),\ (__flags), NULL) +/* + * Common kmalloc functions provided by all allocators + */ +void * __must_check __krealloc(const void *, size_t, gfp_t); +void * __must_check krealloc(const void *, size_t, gfp_t); +void kfree(const void *); +void kzfree(const void *); +size_t ksize(const void *); + /* * The largest kmalloc size supported by the slab allocators is * 32 megabyte (2^25) or the maximum allocatable page order if that is @@ -224,15 +233,6 @@ struct seq_file; int cache_show(struct kmem_cache *s, struct seq_file *m); void print_slabinfo_header(struct seq_file *m); -/* - * Common kmalloc functions provided by all allocators - */ -void * __must_check __krealloc(const void *, size_t, gfp_t); -void * __must_check krealloc(const void *, size_t, gfp_t); -void kfree(const void *); -void kzfree(const void *); -size_t ksize(const void *); - /* * Allocator specific definitions. These are mainly used to establish optimized * ways to convert kmalloc() calls to kmem_cache_alloc() invocations by -- GitLab From ce6a50263d4ddeba1f0d08f16716a82770c03690 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:14:19 +0000 Subject: [PATCH 0003/3163] slab: Common kmalloc slab index determination Extract the function to determine the index of the slab within the array of kmalloc caches as well as a function to determine maximum object size from the nr of the kmalloc slab. This is used here only to simplify slub bootstrap but will be used later also for SLAB. Acked-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab.h | 172 +++++++++++++++++++++++++++------------ include/linux/slub_def.h | 63 -------------- 2 files changed, 122 insertions(+), 113 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index ccbb37685c6c..c97fe92532d1 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -94,29 +94,6 @@ #define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \ (unsigned long)ZERO_SIZE_PTR) -/* - * Common fields provided in kmem_cache by all slab allocators - * This struct is either used directly by the allocator (SLOB) - * or the allocator must include definitions for all fields - * provided in kmem_cache_common in their definition of kmem_cache. - * - * Once we can do anonymous structs (C11 standard) we could put a - * anonymous struct definition in these allocators so that the - * separate allocations in the kmem_cache structure of SLAB and - * SLUB is no longer needed. - */ -#ifdef CONFIG_SLOB -struct kmem_cache { - unsigned int object_size;/* The original size of the object */ - unsigned int size; /* The aligned/padded/added on size */ - unsigned int align; /* Alignment as calculated */ - unsigned long flags; /* Active flags on the slab */ - const char *name; /* Slab name for sysfs */ - int refcount; /* Use counter */ - void (*ctor)(void *); /* Called on object slot creation */ - struct list_head list; /* List of all slab caches on the system */ -}; -#endif struct mem_cgroup; /* @@ -156,6 +133,35 @@ void kfree(const void *); void kzfree(const void *); size_t ksize(const void *); +#ifdef CONFIG_SLOB +/* + * Common fields provided in kmem_cache by all slab allocators + * This struct is either used directly by the allocator (SLOB) + * or the allocator must include definitions for all fields + * provided in kmem_cache_common in their definition of kmem_cache. + * + * Once we can do anonymous structs (C11 standard) we could put a + * anonymous struct definition in these allocators so that the + * separate allocations in the kmem_cache structure of SLAB and + * SLUB is no longer needed. + */ +struct kmem_cache { + unsigned int object_size;/* The original size of the object */ + unsigned int size; /* The aligned/padded/added on size */ + unsigned int align; /* Alignment as calculated */ + unsigned long flags; /* Active flags on the slab */ + const char *name; /* Slab name for sysfs */ + int refcount; /* Use counter */ + void (*ctor)(void *); /* Called on object slot creation */ + struct list_head list; /* List of all slab caches on the system */ +}; + +#define KMALLOC_MAX_SIZE (1UL << 30) + +#include + +#else /* CONFIG_SLOB */ + /* * The largest kmalloc size supported by the slab allocators is * 32 megabyte (2^25) or the maximum allocatable page order if that is @@ -171,6 +177,99 @@ size_t ksize(const void *); #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_HIGH) #define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_HIGH - PAGE_SHIFT) +/* + * Kmalloc subsystem. + */ +#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8 +#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN +#else +#ifdef CONFIG_SLAB +#define KMALLOC_MIN_SIZE 32 +#else +#define KMALLOC_MIN_SIZE 8 +#endif +#endif + +#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) + +/* + * Figure out which kmalloc slab an allocation of a certain size + * belongs to. + * 0 = zero alloc + * 1 = 65 .. 96 bytes + * 2 = 120 .. 192 bytes + * n = 2^(n-1) .. 2^n -1 + */ +static __always_inline int kmalloc_index(size_t size) +{ + if (!size) + return 0; + + if (size <= KMALLOC_MIN_SIZE) + return KMALLOC_SHIFT_LOW; + + if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96) + return 1; + if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192) + return 2; + if (size <= 8) return 3; + if (size <= 16) return 4; + if (size <= 32) return 5; + if (size <= 64) return 6; + if (size <= 128) return 7; + if (size <= 256) return 8; + if (size <= 512) return 9; + if (size <= 1024) return 10; + if (size <= 2 * 1024) return 11; + if (size <= 4 * 1024) return 12; + if (size <= 8 * 1024) return 13; + if (size <= 16 * 1024) return 14; + if (size <= 32 * 1024) return 15; + if (size <= 64 * 1024) return 16; + if (size <= 128 * 1024) return 17; + if (size <= 256 * 1024) return 18; + if (size <= 512 * 1024) return 19; + if (size <= 1024 * 1024) return 20; + if (size <= 2 * 1024 * 1024) return 21; + if (size <= 4 * 1024 * 1024) return 22; + if (size <= 8 * 1024 * 1024) return 23; + if (size <= 16 * 1024 * 1024) return 24; + if (size <= 32 * 1024 * 1024) return 25; + if (size <= 64 * 1024 * 1024) return 26; + BUG(); + + /* Will never be reached. Needed because the compiler may complain */ + return -1; +} + +#ifdef CONFIG_SLAB +#include +#elif defined(CONFIG_SLUB) +#include +#else +#error "Unknown slab allocator" +#endif + +/* + * Determine size used for the nth kmalloc cache. + * return size or 0 if a kmalloc cache for that + * size does not exist + */ +static __always_inline int kmalloc_size(int n) +{ + if (n > 2) + return 1 << n; + + if (n == 1 && KMALLOC_MIN_SIZE <= 32) + return 96; + + if (n == 2 && KMALLOC_MIN_SIZE <= 64) + return 192; + + return 0; +} +#endif /* !CONFIG_SLOB */ + /* * Some archs want to perform DMA into kmalloc caches and need a guaranteed * alignment larger than the alignment of a 64-bit integer. @@ -233,33 +332,6 @@ struct seq_file; int cache_show(struct kmem_cache *s, struct seq_file *m); void print_slabinfo_header(struct seq_file *m); -/* - * Allocator specific definitions. These are mainly used to establish optimized - * ways to convert kmalloc() calls to kmem_cache_alloc() invocations by - * selecting the appropriate general cache at compile time. - * - * Allocators must define at least: - * - * kmem_cache_alloc() - * __kmalloc() - * kmalloc() - * - * Those wishing to support NUMA must also define: - * - * kmem_cache_alloc_node() - * kmalloc_node() - * - * See each allocator definition file for additional comments and - * implementation notes. - */ -#ifdef CONFIG_SLUB -#include -#elif defined(CONFIG_SLOB) -#include -#else -#include -#endif - /** * kmalloc_array - allocate memory for an array. * @n: number of elements. diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 9db4825cd393..99c3e05ff1f0 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -115,17 +115,6 @@ struct kmem_cache { struct kmem_cache_node *node[MAX_NUMNODES]; }; -/* - * Kmalloc subsystem. - */ -#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8 -#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN -#else -#define KMALLOC_MIN_SIZE 8 -#endif - -#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) - /* * Maximum kmalloc object size handled by SLUB. Larger object allocations * are passed through to the page allocator. The page allocator "fastpath" @@ -152,58 +141,6 @@ struct kmem_cache { */ extern struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; -/* - * Sorry that the following has to be that ugly but some versions of GCC - * have trouble with constant propagation and loops. - */ -static __always_inline int kmalloc_index(size_t size) -{ - if (!size) - return 0; - - if (size <= KMALLOC_MIN_SIZE) - return KMALLOC_SHIFT_LOW; - - if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96) - return 1; - if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192) - return 2; - if (size <= 8) return 3; - if (size <= 16) return 4; - if (size <= 32) return 5; - if (size <= 64) return 6; - if (size <= 128) return 7; - if (size <= 256) return 8; - if (size <= 512) return 9; - if (size <= 1024) return 10; - if (size <= 2 * 1024) return 11; - if (size <= 4 * 1024) return 12; -/* - * The following is only needed to support architectures with a larger page - * size than 4k. We need to support 2 * PAGE_SIZE here. So for a 64k page - * size we would have to go up to 128k. - */ - if (size <= 8 * 1024) return 13; - if (size <= 16 * 1024) return 14; - if (size <= 32 * 1024) return 15; - if (size <= 64 * 1024) return 16; - if (size <= 128 * 1024) return 17; - if (size <= 256 * 1024) return 18; - if (size <= 512 * 1024) return 19; - if (size <= 1024 * 1024) return 20; - if (size <= 2 * 1024 * 1024) return 21; - BUG(); - return -1; /* Will never be reached */ - -/* - * What we really wanted to do and cannot do because of compiler issues is: - * int i; - * for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) - * if (size <= (1 << i)) - * return i; - */ -} - /* * Find the slab cache for a given combination of allocation flags and size. * -- GitLab From e33660165c901d18e7d3df2290db070d3e4b46df Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:14:18 +0000 Subject: [PATCH 0004/3163] slab: Use common kmalloc_index/kmalloc_size functions Make slab use the common functions. We can get rid of a lot of old ugly stuff as a results. Among them the sizes array and the weird include/linux/kmalloc_sizes file and some pretty bad #include statements in slab_def.h. The one thing that is different in slab is that the 32 byte cache will also be created for arches that have page sizes larger than 4K. There are numerous smaller allocations that SLOB and SLUB can handle better because of their support for smaller allocation sizes so lets keep the 32 byte slab also for arches with > 4K pages. Reviewed-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/kmalloc_sizes.h | 45 --------- include/linux/slab_def.h | 47 +++------- mm/slab.c | 169 +++++++++++++++------------------- 3 files changed, 88 insertions(+), 173 deletions(-) delete mode 100644 include/linux/kmalloc_sizes.h diff --git a/include/linux/kmalloc_sizes.h b/include/linux/kmalloc_sizes.h deleted file mode 100644 index e576b848ce10..000000000000 --- a/include/linux/kmalloc_sizes.h +++ /dev/null @@ -1,45 +0,0 @@ -#if (PAGE_SIZE == 4096) - CACHE(32) -#endif - CACHE(64) -#if L1_CACHE_BYTES < 64 - CACHE(96) -#endif - CACHE(128) -#if L1_CACHE_BYTES < 128 - CACHE(192) -#endif - CACHE(256) - CACHE(512) - CACHE(1024) - CACHE(2048) - CACHE(4096) - CACHE(8192) - CACHE(16384) - CACHE(32768) - CACHE(65536) - CACHE(131072) -#if KMALLOC_MAX_SIZE >= 262144 - CACHE(262144) -#endif -#if KMALLOC_MAX_SIZE >= 524288 - CACHE(524288) -#endif -#if KMALLOC_MAX_SIZE >= 1048576 - CACHE(1048576) -#endif -#if KMALLOC_MAX_SIZE >= 2097152 - CACHE(2097152) -#endif -#if KMALLOC_MAX_SIZE >= 4194304 - CACHE(4194304) -#endif -#if KMALLOC_MAX_SIZE >= 8388608 - CACHE(8388608) -#endif -#if KMALLOC_MAX_SIZE >= 16777216 - CACHE(16777216) -#endif -#if KMALLOC_MAX_SIZE >= 33554432 - CACHE(33554432) -#endif diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 8bb6e0eaf3c6..e0f30ef9525d 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -11,8 +11,6 @@ */ #include -#include /* kmalloc_sizes.h needs PAGE_SIZE */ -#include /* kmalloc_sizes.h needs L1_CACHE_BYTES */ #include /* @@ -104,15 +102,8 @@ struct kmem_cache { */ }; -/* Size description struct for general caches. */ -struct cache_sizes { - size_t cs_size; - struct kmem_cache *cs_cachep; -#ifdef CONFIG_ZONE_DMA - struct kmem_cache *cs_dmacachep; -#endif -}; -extern struct cache_sizes malloc_sizes[]; +extern struct kmem_cache *kmalloc_caches[PAGE_SHIFT + MAX_ORDER]; +extern struct kmem_cache *kmalloc_dma_caches[PAGE_SHIFT + MAX_ORDER]; void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); @@ -133,26 +124,19 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) void *ret; if (__builtin_constant_p(size)) { - int i = 0; + int i; if (!size) return ZERO_SIZE_PTR; -#define CACHE(x) \ - if (size <= x) \ - goto found; \ - else \ - i++; -#include -#undef CACHE - return NULL; -found: + i = kmalloc_index(size); + #ifdef CONFIG_ZONE_DMA if (flags & GFP_DMA) - cachep = malloc_sizes[i].cs_dmacachep; + cachep = kmalloc_dma_caches[i]; else #endif - cachep = malloc_sizes[i].cs_cachep; + cachep = kmalloc_caches[i]; ret = kmem_cache_alloc_trace(cachep, flags, size); @@ -186,26 +170,19 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) struct kmem_cache *cachep; if (__builtin_constant_p(size)) { - int i = 0; + int i; if (!size) return ZERO_SIZE_PTR; -#define CACHE(x) \ - if (size <= x) \ - goto found; \ - else \ - i++; -#include -#undef CACHE - return NULL; -found: + i = kmalloc_index(size); + #ifdef CONFIG_ZONE_DMA if (flags & GFP_DMA) - cachep = malloc_sizes[i].cs_dmacachep; + cachep = kmalloc_dma_caches[i]; else #endif - cachep = malloc_sizes[i].cs_cachep; + cachep = kmalloc_caches[i]; return kmem_cache_alloc_node_trace(cachep, flags, node, size); } diff --git a/mm/slab.c b/mm/slab.c index e7667a3584bc..2a7132ec4ff6 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -318,34 +318,18 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int len, static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); static void cache_reap(struct work_struct *unused); -/* - * This function must be completely optimized away if a constant is passed to - * it. Mostly the same as what is in linux/slab.h except it returns an index. - */ -static __always_inline int index_of(const size_t size) -{ - extern void __bad_size(void); - - if (__builtin_constant_p(size)) { - int i = 0; +struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; +EXPORT_SYMBOL(kmalloc_caches); -#define CACHE(x) \ - if (size <=x) \ - return i; \ - else \ - i++; -#include -#undef CACHE - __bad_size(); - } else - __bad_size(); - return 0; -} +#ifdef CONFIG_ZONE_DMA +struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; +EXPORT_SYMBOL(kmalloc_dma_caches); +#endif static int slab_early_init = 1; -#define INDEX_AC index_of(sizeof(struct arraycache_init)) -#define INDEX_L3 index_of(sizeof(struct kmem_list3)) +#define INDEX_AC kmalloc_index(sizeof(struct arraycache_init)) +#define INDEX_L3 kmalloc_index(sizeof(struct kmem_list3)) static void kmem_list3_init(struct kmem_list3 *parent) { @@ -524,30 +508,6 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache, return reciprocal_divide(offset, cache->reciprocal_buffer_size); } -/* - * These are the default caches for kmalloc. Custom caches can have other sizes. - */ -struct cache_sizes malloc_sizes[] = { -#define CACHE(x) { .cs_size = (x) }, -#include - CACHE(ULONG_MAX) -#undef CACHE -}; -EXPORT_SYMBOL(malloc_sizes); - -/* Must match cache_sizes above. Out of line to keep cache footprint low. */ -struct cache_names { - char *name; - char *name_dma; -}; - -static struct cache_names __initdata cache_names[] = { -#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" }, -#include - {NULL,} -#undef CACHE -}; - static struct arraycache_init initarray_generic = { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; @@ -625,19 +585,23 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) static void init_node_lock_keys(int q) { - struct cache_sizes *s = malloc_sizes; + int i; if (slab_state < UP) return; - for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { + for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) { struct kmem_list3 *l3; + struct kmem_cache *cache = kmalloc_caches[i]; + + if (!cache) + continue; - l3 = s->cs_cachep->nodelists[q]; - if (!l3 || OFF_SLAB(s->cs_cachep)) + l3 = cache->nodelists[q]; + if (!l3 || OFF_SLAB(cache)) continue; - slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key, + slab_set_lock_classes(cache, &on_slab_l3_key, &on_slab_alc_key, q); } } @@ -705,20 +669,19 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) static inline struct kmem_cache *__find_general_cachep(size_t size, gfp_t gfpflags) { - struct cache_sizes *csizep = malloc_sizes; + int i; #if DEBUG /* This happens if someone tries to call * kmem_cache_create(), or __kmalloc(), before * the generic caches are initialized. */ - BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL); + BUG_ON(kmalloc_caches[INDEX_AC] == NULL); #endif if (!size) return ZERO_SIZE_PTR; - while (size > csizep->cs_size) - csizep++; + i = kmalloc_index(size); /* * Really subtle: The last entry with cs->cs_size==ULONG_MAX @@ -727,9 +690,9 @@ static inline struct kmem_cache *__find_general_cachep(size_t size, */ #ifdef CONFIG_ZONE_DMA if (unlikely(gfpflags & GFP_DMA)) - return csizep->cs_dmacachep; + return kmalloc_dma_caches[i]; #endif - return csizep->cs_cachep; + return kmalloc_caches[i]; } static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) @@ -1602,8 +1565,6 @@ static void setup_nodelists_pointer(struct kmem_cache *cachep) */ void __init kmem_cache_init(void) { - struct cache_sizes *sizes; - struct cache_names *names; int i; kmem_cache = &kmem_cache_boot; @@ -1657,8 +1618,6 @@ void __init kmem_cache_init(void) list_add(&kmem_cache->list, &slab_caches); /* 2+3) create the kmalloc caches */ - sizes = malloc_sizes; - names = cache_names; /* * Initialize the caches that provide memory for the array cache and the @@ -1666,35 +1625,39 @@ void __init kmem_cache_init(void) * bug. */ - sizes[INDEX_AC].cs_cachep = create_kmalloc_cache(names[INDEX_AC].name, - sizes[INDEX_AC].cs_size, ARCH_KMALLOC_FLAGS); + kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac", + kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS); if (INDEX_AC != INDEX_L3) - sizes[INDEX_L3].cs_cachep = - create_kmalloc_cache(names[INDEX_L3].name, - sizes[INDEX_L3].cs_size, ARCH_KMALLOC_FLAGS); + kmalloc_caches[INDEX_L3] = + create_kmalloc_cache("kmalloc-l3", + kmalloc_size(INDEX_L3), ARCH_KMALLOC_FLAGS); slab_early_init = 0; - while (sizes->cs_size != ULONG_MAX) { - /* - * For performance, all the general caches are L1 aligned. - * This should be particularly beneficial on SMP boxes, as it - * eliminates "false sharing". - * Note for systems short on memory removing the alignment will - * allow tighter packing of the smaller caches. - */ - if (!sizes->cs_cachep) - sizes->cs_cachep = create_kmalloc_cache(names->name, - sizes->cs_size, ARCH_KMALLOC_FLAGS); + for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) { + size_t cs_size = kmalloc_size(i); + + if (cs_size < KMALLOC_MIN_SIZE) + continue; + + if (!kmalloc_caches[i]) { + /* + * For performance, all the general caches are L1 aligned. + * This should be particularly beneficial on SMP boxes, as it + * eliminates "false sharing". + * Note for systems short on memory removing the alignment will + * allow tighter packing of the smaller caches. + */ + kmalloc_caches[i] = create_kmalloc_cache("kmalloc", + cs_size, ARCH_KMALLOC_FLAGS); + } #ifdef CONFIG_ZONE_DMA - sizes->cs_dmacachep = create_kmalloc_cache( - names->name_dma, sizes->cs_size, + kmalloc_dma_caches[i] = create_kmalloc_cache( + "kmalloc-dma", cs_size, SLAB_CACHE_DMA|ARCH_KMALLOC_FLAGS); #endif - sizes++; - names++; } /* 4) Replace the bootstrap head arrays */ { @@ -1713,17 +1676,16 @@ void __init kmem_cache_init(void) ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); - BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) + BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC]) != &initarray_generic.cache); - memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), + memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]), sizeof(struct arraycache_init)); /* * Do not assume that spinlocks can be initialized via memcpy: */ spin_lock_init(&ptr->lock); - malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = - ptr; + kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr; } /* 5) Replace the bootstrap kmem_list3's */ { @@ -1732,17 +1694,39 @@ void __init kmem_cache_init(void) for_each_online_node(nid) { init_list(kmem_cache, &initkmem_list3[CACHE_CACHE + nid], nid); - init_list(malloc_sizes[INDEX_AC].cs_cachep, + init_list(kmalloc_caches[INDEX_AC], &initkmem_list3[SIZE_AC + nid], nid); if (INDEX_AC != INDEX_L3) { - init_list(malloc_sizes[INDEX_L3].cs_cachep, + init_list(kmalloc_caches[INDEX_L3], &initkmem_list3[SIZE_L3 + nid], nid); } } } slab_state = UP; + + /* Create the proper names */ + for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) { + char *s; + struct kmem_cache *c = kmalloc_caches[i]; + + if (!c) + continue; + + s = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i)); + + BUG_ON(!s); + c->name = s; + +#ifdef CONFIG_ZONE_DMA + c = kmalloc_dma_caches[i]; + BUG_ON(!c); + s = kasprintf(GFP_NOWAIT, "dma-kmalloc-%d", kmalloc_size(i)); + BUG_ON(!s); + c->name = s; +#endif + } } void __init kmem_cache_init_late(void) @@ -2428,10 +2412,9 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) size += BYTES_PER_WORD; } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) - if (size >= malloc_sizes[INDEX_L3 + 1].cs_size - && cachep->object_size > cache_line_size() - && ALIGN(size, cachep->align) < PAGE_SIZE) { - cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align); + if (size >= kmalloc_size(INDEX_L3 + 1) + && cachep->object_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) { + cachep->obj_offset += PAGE_SIZE - ALIGN(size, align); size = PAGE_SIZE; } #endif -- GitLab From 6744f087ba2a49f6d6935d9daa0b20a0f03567b5 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:12:17 +0000 Subject: [PATCH 0005/3163] slab: Common name for the per node structures Rename the structure used for the per node structures in slab to have a name that expresses that fact. Acked-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab_def.h | 2 +- mm/slab.c | 87 ++++++++++++++++++++-------------------- 2 files changed, 44 insertions(+), 45 deletions(-) diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index e0f30ef9525d..8b5b2f6b36d3 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -95,7 +95,7 @@ struct kmem_cache { * pointer for each node since "nodelists" uses the remainder of * available pointers. */ - struct kmem_list3 **nodelists; + struct kmem_cache_node **nodelists; struct array_cache *array[NR_CPUS + MAX_NUMNODES]; /* * Do not add fields after array[] diff --git a/mm/slab.c b/mm/slab.c index 2a7132ec4ff6..7c0da4c86973 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -288,7 +288,7 @@ struct arraycache_init { /* * The slab lists for all objects. */ -struct kmem_list3 { +struct kmem_cache_node { struct list_head slabs_partial; /* partial list first, better asm code */ struct list_head slabs_full; struct list_head slabs_free; @@ -306,13 +306,13 @@ struct kmem_list3 { * Need this for bootstrapping a per node allocator. */ #define NUM_INIT_LISTS (3 * MAX_NUMNODES) -static struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; +static struct kmem_cache_node __initdata initkmem_list3[NUM_INIT_LISTS]; #define CACHE_CACHE 0 #define SIZE_AC MAX_NUMNODES #define SIZE_L3 (2 * MAX_NUMNODES) static int drain_freelist(struct kmem_cache *cache, - struct kmem_list3 *l3, int tofree); + struct kmem_cache_node *l3, int tofree); static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); @@ -329,9 +329,9 @@ EXPORT_SYMBOL(kmalloc_dma_caches); static int slab_early_init = 1; #define INDEX_AC kmalloc_index(sizeof(struct arraycache_init)) -#define INDEX_L3 kmalloc_index(sizeof(struct kmem_list3)) +#define INDEX_L3 kmalloc_index(sizeof(struct kmem_cache_node)) -static void kmem_list3_init(struct kmem_list3 *parent) +static void kmem_list3_init(struct kmem_cache_node *parent) { INIT_LIST_HEAD(&parent->slabs_full); INIT_LIST_HEAD(&parent->slabs_partial); @@ -546,7 +546,7 @@ static void slab_set_lock_classes(struct kmem_cache *cachep, int q) { struct array_cache **alc; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; int r; l3 = cachep->nodelists[q]; @@ -591,7 +591,7 @@ static void init_node_lock_keys(int q) return; for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) { - struct kmem_list3 *l3; + struct kmem_cache_node *l3; struct kmem_cache *cache = kmalloc_caches[i]; if (!cache) @@ -608,9 +608,8 @@ static void init_node_lock_keys(int q) static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q) { - struct kmem_list3 *l3; - l3 = cachep->nodelists[q]; - if (!l3) + + if (!cachep->nodelists[q]) return; slab_set_lock_classes(cachep, &on_slab_l3_key, @@ -901,7 +900,7 @@ static inline bool is_slab_pfmemalloc(struct slab *slabp) static void recheck_pfmemalloc_active(struct kmem_cache *cachep, struct array_cache *ac) { - struct kmem_list3 *l3 = cachep->nodelists[numa_mem_id()]; + struct kmem_cache_node *l3 = cachep->nodelists[numa_mem_id()]; struct slab *slabp; unsigned long flags; @@ -934,7 +933,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */ if (unlikely(is_obj_pfmemalloc(objp))) { - struct kmem_list3 *l3; + struct kmem_cache_node *l3; if (gfp_pfmemalloc_allowed(flags)) { clear_obj_pfmemalloc(&objp); @@ -1106,7 +1105,7 @@ static void free_alien_cache(struct array_cache **ac_ptr) static void __drain_alien_cache(struct kmem_cache *cachep, struct array_cache *ac, int node) { - struct kmem_list3 *rl3 = cachep->nodelists[node]; + struct kmem_cache_node *rl3 = cachep->nodelists[node]; if (ac->avail) { spin_lock(&rl3->list_lock); @@ -1127,7 +1126,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep, /* * Called from cache_reap() to regularly drain alien caches round robin. */ -static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) +static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *l3) { int node = __this_cpu_read(slab_reap_node); @@ -1162,7 +1161,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) { struct slab *slabp = virt_to_slab(objp); int nodeid = slabp->nodeid; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; struct array_cache *alien = NULL; int node; @@ -1207,8 +1206,8 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) static int init_cache_nodelists_node(int node) { struct kmem_cache *cachep; - struct kmem_list3 *l3; - const int memsize = sizeof(struct kmem_list3); + struct kmem_cache_node *l3; + const int memsize = sizeof(struct kmem_cache_node); list_for_each_entry(cachep, &slab_caches, list) { /* @@ -1244,7 +1243,7 @@ static int init_cache_nodelists_node(int node) static void __cpuinit cpuup_canceled(long cpu) { struct kmem_cache *cachep; - struct kmem_list3 *l3 = NULL; + struct kmem_cache_node *l3 = NULL; int node = cpu_to_mem(cpu); const struct cpumask *mask = cpumask_of_node(node); @@ -1309,7 +1308,7 @@ static void __cpuinit cpuup_canceled(long cpu) static int __cpuinit cpuup_prepare(long cpu) { struct kmem_cache *cachep; - struct kmem_list3 *l3 = NULL; + struct kmem_cache_node *l3 = NULL; int node = cpu_to_mem(cpu); int err; @@ -1463,7 +1462,7 @@ static int __meminit drain_cache_nodelists_node(int node) int ret = 0; list_for_each_entry(cachep, &slab_caches, list) { - struct kmem_list3 *l3; + struct kmem_cache_node *l3; l3 = cachep->nodelists[node]; if (!l3) @@ -1516,15 +1515,15 @@ static int __meminit slab_memory_callback(struct notifier_block *self, /* * swap the static kmem_list3 with kmalloced memory */ -static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list, +static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *list, int nodeid) { - struct kmem_list3 *ptr; + struct kmem_cache_node *ptr; - ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid); + ptr = kmalloc_node(sizeof(struct kmem_cache_node), GFP_NOWAIT, nodeid); BUG_ON(!ptr); - memcpy(ptr, list, sizeof(struct kmem_list3)); + memcpy(ptr, list, sizeof(struct kmem_cache_node)); /* * Do not assume that spinlocks can be initialized via memcpy: */ @@ -1556,7 +1555,7 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index) */ static void setup_nodelists_pointer(struct kmem_cache *cachep) { - cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids]; + cachep->nodelists = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids]; } /* @@ -1613,7 +1612,7 @@ void __init kmem_cache_init(void) */ create_boot_cache(kmem_cache, "kmem_cache", offsetof(struct kmem_cache, array[nr_cpu_ids]) + - nr_node_ids * sizeof(struct kmem_list3 *), + nr_node_ids * sizeof(struct kmem_cache_node *), SLAB_HWCACHE_ALIGN); list_add(&kmem_cache->list, &slab_caches); @@ -1787,7 +1786,7 @@ __initcall(cpucache_init); static noinline void slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) { - struct kmem_list3 *l3; + struct kmem_cache_node *l3; struct slab *slabp; unsigned long flags; int node; @@ -2279,7 +2278,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) int node; for_each_online_node(node) { cachep->nodelists[node] = - kmalloc_node(sizeof(struct kmem_list3), + kmalloc_node(sizeof(struct kmem_cache_node), gfp, node); BUG_ON(!cachep->nodelists[node]); kmem_list3_init(cachep->nodelists[node]); @@ -2547,7 +2546,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) #define check_spinlock_acquired_node(x, y) do { } while(0) #endif -static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, +static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *l3, struct array_cache *ac, int force, int node); @@ -2567,7 +2566,7 @@ static void do_drain(void *arg) static void drain_cpu_caches(struct kmem_cache *cachep) { - struct kmem_list3 *l3; + struct kmem_cache_node *l3; int node; on_each_cpu(do_drain, cachep, 1); @@ -2592,7 +2591,7 @@ static void drain_cpu_caches(struct kmem_cache *cachep) * Returns the actual number of slabs released. */ static int drain_freelist(struct kmem_cache *cache, - struct kmem_list3 *l3, int tofree) + struct kmem_cache_node *l3, int tofree) { struct list_head *p; int nr_freed; @@ -2630,7 +2629,7 @@ static int drain_freelist(struct kmem_cache *cache, static int __cache_shrink(struct kmem_cache *cachep) { int ret = 0, i = 0; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; drain_cpu_caches(cachep); @@ -2672,7 +2671,7 @@ EXPORT_SYMBOL(kmem_cache_shrink); int __kmem_cache_shutdown(struct kmem_cache *cachep) { int i; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; int rc = __cache_shrink(cachep); if (rc) @@ -2869,7 +2868,7 @@ static int cache_grow(struct kmem_cache *cachep, struct slab *slabp; size_t offset; gfp_t local_flags; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; /* * Be lazy and only check for valid flags here, keeping it out of the @@ -3059,7 +3058,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, bool force_refill) { int batchcount; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; struct array_cache *ac; int node; @@ -3391,7 +3390,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, { struct list_head *entry; struct slab *slabp; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; void *obj; int x; @@ -3586,7 +3585,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, int node) { int i; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; for (i = 0; i < nr_objects; i++) { void *objp; @@ -3632,7 +3631,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) { int batchcount; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; int node = numa_mem_id(); batchcount = ac->batchcount; @@ -3924,7 +3923,7 @@ EXPORT_SYMBOL(kfree); static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) { int node; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; struct array_cache *new_shared; struct array_cache **new_alien = NULL; @@ -3969,7 +3968,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) free_alien_cache(new_alien); continue; } - l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node); + l3 = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node); if (!l3) { free_alien_cache(new_alien); kfree(new_shared); @@ -4165,7 +4164,7 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) * necessary. Note that the l3 listlock also protects the array_cache * if drain_array() is used on the shared array. */ -static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, +static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *l3, struct array_cache *ac, int force, int node) { int tofree; @@ -4204,7 +4203,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, static void cache_reap(struct work_struct *w) { struct kmem_cache *searchp; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; int node = numa_mem_id(); struct delayed_work *work = to_delayed_work(w); @@ -4268,7 +4267,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) const char *name; char *error = NULL; int node; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; active_objs = 0; num_slabs = 0; @@ -4482,7 +4481,7 @@ static int leaks_show(struct seq_file *m, void *p) { struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); struct slab *slabp; - struct kmem_list3 *l3; + struct kmem_cache_node *l3; const char *name; unsigned long *n = m->private; int node; -- GitLab From 6a67368c36e2c0c2578ba62f6264ab739af08cce Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:14:19 +0000 Subject: [PATCH 0006/3163] slab: Rename nodelists to node Have a common naming between both slab caches for future changes. Acked-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab_def.h | 2 +- mm/slab.c | 135 +++++++++++++++++++-------------------- 2 files changed, 68 insertions(+), 69 deletions(-) diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 8b5b2f6b36d3..4ff50e8d1a2c 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -95,7 +95,7 @@ struct kmem_cache { * pointer for each node since "nodelists" uses the remainder of * available pointers. */ - struct kmem_cache_node **nodelists; + struct kmem_cache_node **node; struct array_cache *array[NR_CPUS + MAX_NUMNODES]; /* * Do not add fields after array[] diff --git a/mm/slab.c b/mm/slab.c index 7c0da4c86973..3416f4c544b3 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -347,7 +347,7 @@ static void kmem_list3_init(struct kmem_cache_node *parent) #define MAKE_LIST(cachep, listp, slab, nodeid) \ do { \ INIT_LIST_HEAD(listp); \ - list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ + list_splice(&(cachep->node[nodeid]->slab), listp); \ } while (0) #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ @@ -549,7 +549,7 @@ static void slab_set_lock_classes(struct kmem_cache *cachep, struct kmem_cache_node *l3; int r; - l3 = cachep->nodelists[q]; + l3 = cachep->node[q]; if (!l3) return; @@ -597,7 +597,7 @@ static void init_node_lock_keys(int q) if (!cache) continue; - l3 = cache->nodelists[q]; + l3 = cache->node[q]; if (!l3 || OFF_SLAB(cache)) continue; @@ -608,8 +608,7 @@ static void init_node_lock_keys(int q) static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q) { - - if (!cachep->nodelists[q]) + if (!cachep->node[q]) return; slab_set_lock_classes(cachep, &on_slab_l3_key, @@ -900,7 +899,7 @@ static inline bool is_slab_pfmemalloc(struct slab *slabp) static void recheck_pfmemalloc_active(struct kmem_cache *cachep, struct array_cache *ac) { - struct kmem_cache_node *l3 = cachep->nodelists[numa_mem_id()]; + struct kmem_cache_node *l3 = cachep->node[numa_mem_id()]; struct slab *slabp; unsigned long flags; @@ -955,7 +954,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, * If there are empty slabs on the slabs_free list and we are * being forced to refill the cache, mark this one !pfmemalloc. */ - l3 = cachep->nodelists[numa_mem_id()]; + l3 = cachep->node[numa_mem_id()]; if (!list_empty(&l3->slabs_free) && force_refill) { struct slab *slabp = virt_to_slab(objp); ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem)); @@ -1105,7 +1104,7 @@ static void free_alien_cache(struct array_cache **ac_ptr) static void __drain_alien_cache(struct kmem_cache *cachep, struct array_cache *ac, int node) { - struct kmem_cache_node *rl3 = cachep->nodelists[node]; + struct kmem_cache_node *rl3 = cachep->node[node]; if (ac->avail) { spin_lock(&rl3->list_lock); @@ -1174,7 +1173,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) if (likely(slabp->nodeid == node)) return 0; - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; STATS_INC_NODEFREES(cachep); if (l3->alien && l3->alien[nodeid]) { alien = l3->alien[nodeid]; @@ -1186,24 +1185,24 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) ac_put_obj(cachep, alien, objp); spin_unlock(&alien->lock); } else { - spin_lock(&(cachep->nodelists[nodeid])->list_lock); + spin_lock(&(cachep->node[nodeid])->list_lock); free_block(cachep, &objp, 1, nodeid); - spin_unlock(&(cachep->nodelists[nodeid])->list_lock); + spin_unlock(&(cachep->node[nodeid])->list_lock); } return 1; } #endif /* - * Allocates and initializes nodelists for a node on each slab cache, used for + * Allocates and initializes node for a node on each slab cache, used for * either memory or cpu hotplug. If memory is being hot-added, the kmem_list3 * will be allocated off-node since memory is not yet online for the new node. - * When hotplugging memory or a cpu, existing nodelists are not replaced if + * When hotplugging memory or a cpu, existing node are not replaced if * already in use. * * Must hold slab_mutex. */ -static int init_cache_nodelists_node(int node) +static int init_cache_node_node(int node) { struct kmem_cache *cachep; struct kmem_cache_node *l3; @@ -1215,7 +1214,7 @@ static int init_cache_nodelists_node(int node) * begin anything. Make sure some other cpu on this * node has not already allocated this */ - if (!cachep->nodelists[node]) { + if (!cachep->node[node]) { l3 = kmalloc_node(memsize, GFP_KERNEL, node); if (!l3) return -ENOMEM; @@ -1228,14 +1227,14 @@ static int init_cache_nodelists_node(int node) * go. slab_mutex is sufficient * protection here. */ - cachep->nodelists[node] = l3; + cachep->node[node] = l3; } - spin_lock_irq(&cachep->nodelists[node]->list_lock); - cachep->nodelists[node]->free_limit = + spin_lock_irq(&cachep->node[node]->list_lock); + cachep->node[node]->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - spin_unlock_irq(&cachep->nodelists[node]->list_lock); + spin_unlock_irq(&cachep->node[node]->list_lock); } return 0; } @@ -1255,7 +1254,7 @@ static void __cpuinit cpuup_canceled(long cpu) /* cpu is dead; no one can alloc from it. */ nc = cachep->array[cpu]; cachep->array[cpu] = NULL; - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (!l3) goto free_array_cache; @@ -1298,7 +1297,7 @@ static void __cpuinit cpuup_canceled(long cpu) * shrink each nodelist to its limit. */ list_for_each_entry(cachep, &slab_caches, list) { - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (!l3) continue; drain_freelist(cachep, l3, l3->free_objects); @@ -1318,7 +1317,7 @@ static int __cpuinit cpuup_prepare(long cpu) * kmalloc_node allows us to add the slab to the right * kmem_list3 and not this cpu's kmem_list3 */ - err = init_cache_nodelists_node(node); + err = init_cache_node_node(node); if (err < 0) goto bad; @@ -1353,7 +1352,7 @@ static int __cpuinit cpuup_prepare(long cpu) } } cachep->array[cpu] = nc; - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; BUG_ON(!l3); spin_lock_irq(&l3->list_lock); @@ -1456,7 +1455,7 @@ static struct notifier_block __cpuinitdata cpucache_notifier = { * * Must hold slab_mutex. */ -static int __meminit drain_cache_nodelists_node(int node) +static int __meminit drain_cache_node_node(int node) { struct kmem_cache *cachep; int ret = 0; @@ -1464,7 +1463,7 @@ static int __meminit drain_cache_nodelists_node(int node) list_for_each_entry(cachep, &slab_caches, list) { struct kmem_cache_node *l3; - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (!l3) continue; @@ -1493,12 +1492,12 @@ static int __meminit slab_memory_callback(struct notifier_block *self, switch (action) { case MEM_GOING_ONLINE: mutex_lock(&slab_mutex); - ret = init_cache_nodelists_node(nid); + ret = init_cache_node_node(nid); mutex_unlock(&slab_mutex); break; case MEM_GOING_OFFLINE: mutex_lock(&slab_mutex); - ret = drain_cache_nodelists_node(nid); + ret = drain_cache_node_node(nid); mutex_unlock(&slab_mutex); break; case MEM_ONLINE: @@ -1530,7 +1529,7 @@ static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node * spin_lock_init(&ptr->list_lock); MAKE_ALL_LISTS(cachep, ptr, nodeid); - cachep->nodelists[nodeid] = ptr; + cachep->node[nodeid] = ptr; } /* @@ -1542,8 +1541,8 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index) int node; for_each_online_node(node) { - cachep->nodelists[node] = &initkmem_list3[index + node]; - cachep->nodelists[node]->next_reap = jiffies + + cachep->node[node] = &initkmem_list3[index + node]; + cachep->node[node]->next_reap = jiffies + REAPTIMEOUT_LIST3 + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; } @@ -1551,11 +1550,11 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index) /* * The memory after the last cpu cache pointer is used for the - * the nodelists pointer. + * the node pointer. */ -static void setup_nodelists_pointer(struct kmem_cache *cachep) +static void setup_node_pointer(struct kmem_cache *cachep) { - cachep->nodelists = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids]; + cachep->node = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids]; } /* @@ -1567,7 +1566,7 @@ void __init kmem_cache_init(void) int i; kmem_cache = &kmem_cache_boot; - setup_nodelists_pointer(kmem_cache); + setup_node_pointer(kmem_cache); if (num_possible_nodes() == 1) use_alien_caches = 0; @@ -1756,7 +1755,7 @@ void __init kmem_cache_init_late(void) #ifdef CONFIG_NUMA /* * Register a memory hotplug callback that initializes and frees - * nodelists. + * node. */ hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); #endif @@ -1801,7 +1800,7 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) unsigned long active_objs = 0, num_objs = 0, free_objects = 0; unsigned long active_slabs = 0, num_slabs = 0; - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (!l3) continue; @@ -2277,15 +2276,15 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) } else { int node; for_each_online_node(node) { - cachep->nodelists[node] = + cachep->node[node] = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node); - BUG_ON(!cachep->nodelists[node]); - kmem_list3_init(cachep->nodelists[node]); + BUG_ON(!cachep->node[node]); + kmem_list3_init(cachep->node[node]); } } } - cachep->nodelists[numa_mem_id()]->next_reap = + cachep->node[numa_mem_id()]->next_reap = jiffies + REAPTIMEOUT_LIST3 + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; @@ -2388,7 +2387,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) else gfp = GFP_NOWAIT; - setup_nodelists_pointer(cachep); + setup_node_pointer(cachep); #if DEBUG /* @@ -2527,7 +2526,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep) { #ifdef CONFIG_SMP check_irq_off(); - assert_spin_locked(&cachep->nodelists[numa_mem_id()]->list_lock); + assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock); #endif } @@ -2535,7 +2534,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) { #ifdef CONFIG_SMP check_irq_off(); - assert_spin_locked(&cachep->nodelists[node]->list_lock); + assert_spin_locked(&cachep->node[node]->list_lock); #endif } @@ -2558,9 +2557,9 @@ static void do_drain(void *arg) check_irq_off(); ac = cpu_cache_get(cachep); - spin_lock(&cachep->nodelists[node]->list_lock); + spin_lock(&cachep->node[node]->list_lock); free_block(cachep, ac->entry, ac->avail, node); - spin_unlock(&cachep->nodelists[node]->list_lock); + spin_unlock(&cachep->node[node]->list_lock); ac->avail = 0; } @@ -2572,13 +2571,13 @@ static void drain_cpu_caches(struct kmem_cache *cachep) on_each_cpu(do_drain, cachep, 1); check_irq_on(); for_each_online_node(node) { - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (l3 && l3->alien) drain_alien_cache(cachep, l3->alien); } for_each_online_node(node) { - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (l3) drain_array(cachep, l3, l3->shared, 1, node); } @@ -2635,7 +2634,7 @@ static int __cache_shrink(struct kmem_cache *cachep) check_irq_on(); for_each_online_node(i) { - l3 = cachep->nodelists[i]; + l3 = cachep->node[i]; if (!l3) continue; @@ -2682,7 +2681,7 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep) /* NUMA: free the list3 structures */ for_each_online_node(i) { - l3 = cachep->nodelists[i]; + l3 = cachep->node[i]; if (l3) { kfree(l3->shared); free_alien_cache(l3->alien); @@ -2879,7 +2878,7 @@ static int cache_grow(struct kmem_cache *cachep, /* Take the l3 list lock to change the colour_next on this node */ check_irq_off(); - l3 = cachep->nodelists[nodeid]; + l3 = cachep->node[nodeid]; spin_lock(&l3->list_lock); /* Get colour for the slab, and cal the next value. */ @@ -3077,7 +3076,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, */ batchcount = BATCHREFILL_LIMIT; } - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; BUG_ON(ac->avail > 0 || !l3); spin_lock(&l3->list_lock); @@ -3299,7 +3298,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) /* * Fallback function if there was no memory available and no objects on a * certain node and fall back is permitted. First we scan all the - * available nodelists for available objects. If that fails then we + * available node for available objects. If that fails then we * perform an allocation without specifying a node. This allows the page * allocator to do its reclaim / fallback magic. We then insert the * slab into the proper nodelist and then allocate from it. @@ -3333,8 +3332,8 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) nid = zone_to_nid(zone); if (cpuset_zone_allowed_hardwall(zone, flags) && - cache->nodelists[nid] && - cache->nodelists[nid]->free_objects) { + cache->node[nid] && + cache->node[nid]->free_objects) { obj = ____cache_alloc_node(cache, flags | GFP_THISNODE, nid); if (obj) @@ -3394,7 +3393,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, void *obj; int x; - l3 = cachep->nodelists[nodeid]; + l3 = cachep->node[nodeid]; BUG_ON(!l3); retry: @@ -3479,7 +3478,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, if (nodeid == NUMA_NO_NODE) nodeid = slab_node; - if (unlikely(!cachep->nodelists[nodeid])) { + if (unlikely(!cachep->node[nodeid])) { /* Node not bootstrapped yet */ ptr = fallback_alloc(cachep, flags); goto out; @@ -3595,7 +3594,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, objp = objpp[i]; slabp = virt_to_slab(objp); - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; list_del(&slabp->list); check_spinlock_acquired_node(cachep, node); check_slabp(cachep, slabp); @@ -3639,7 +3638,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) BUG_ON(!batchcount || batchcount > ac->avail); #endif check_irq_off(); - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; spin_lock(&l3->list_lock); if (l3->shared) { struct array_cache *shared_array = l3->shared; @@ -3946,7 +3945,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) } } - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (l3) { struct array_cache *shared = l3->shared; @@ -3982,7 +3981,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) l3->alien = new_alien; l3->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - cachep->nodelists[node] = l3; + cachep->node[node] = l3; } return 0; @@ -3991,13 +3990,13 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) /* Cache is not active yet. Roll back what we did */ node--; while (node >= 0) { - if (cachep->nodelists[node]) { - l3 = cachep->nodelists[node]; + if (cachep->node[node]) { + l3 = cachep->node[node]; kfree(l3->shared); free_alien_cache(l3->alien); kfree(l3); - cachep->nodelists[node] = NULL; + cachep->node[node] = NULL; } node--; } @@ -4057,9 +4056,9 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit, struct array_cache *ccold = new->new[i]; if (!ccold) continue; - spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock); + spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock); free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i)); - spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock); + spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock); kfree(ccold); } kfree(new); @@ -4219,7 +4218,7 @@ static void cache_reap(struct work_struct *w) * have established with reasonable certainty that * we can do some work if the lock was obtained. */ - l3 = searchp->nodelists[node]; + l3 = searchp->node[node]; reap_alien(searchp, l3); @@ -4272,7 +4271,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) active_objs = 0; num_slabs = 0; for_each_online_node(node) { - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (!l3) continue; @@ -4497,7 +4496,7 @@ static int leaks_show(struct seq_file *m, void *p) n[1] = 0; for_each_online_node(node) { - l3 = cachep->nodelists[node]; + l3 = cachep->node[node]; if (!l3) continue; -- GitLab From 95a05b428cc675694321c8f762591984f3fd2b1e Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:14:19 +0000 Subject: [PATCH 0007/3163] slab: Common constants for kmalloc boundaries Standardize the constants that describe the smallest and largest object kept in the kmalloc arrays for SLAB and SLUB. Differentiate between the maximum size for which a slab cache is used (KMALLOC_MAX_CACHE_SIZE) and the maximum allocatable size (KMALLOC_MAX_SIZE, KMALLOC_MAX_ORDER). Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab.h | 34 ++++++++++++++++++++++++---------- include/linux/slub_def.h | 19 +++---------------- mm/slub.c | 22 +++++++++++----------- 3 files changed, 38 insertions(+), 37 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index c97fe92532d1..c01780540054 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -163,7 +163,12 @@ struct kmem_cache { #else /* CONFIG_SLOB */ /* - * The largest kmalloc size supported by the slab allocators is + * Kmalloc array related definitions + */ + +#ifdef CONFIG_SLAB +/* + * The largest kmalloc size supported by the SLAB allocators is * 32 megabyte (2^25) or the maximum allocatable page order if that is * less than 32 MB. * @@ -173,9 +178,24 @@ struct kmem_cache { */ #define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \ (MAX_ORDER + PAGE_SHIFT - 1) : 25) +#define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH +#define KMALLOC_SHIFT_LOW 5 +#else +/* + * SLUB allocates up to order 2 pages directly and otherwise + * passes the request to the page allocator. + */ +#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) +#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT) +#define KMALLOC_SHIFT_LOW 3 +#endif -#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_HIGH) -#define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_HIGH - PAGE_SHIFT) +/* Maximum allocatable size */ +#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) +/* Maximum size for which we actually use a slab cache */ +#define KMALLOC_MAX_CACHE_SIZE (1UL << KMALLOC_SHIFT_HIGH) +/* Maximum order allocatable via the slab allocagtor */ +#define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_MAX - PAGE_SHIFT) /* * Kmalloc subsystem. @@ -183,15 +203,9 @@ struct kmem_cache { #if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8 #define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN #else -#ifdef CONFIG_SLAB -#define KMALLOC_MIN_SIZE 32 -#else -#define KMALLOC_MIN_SIZE 8 -#endif +#define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW) #endif -#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) - /* * Figure out which kmalloc slab an allocation of a certain size * belongs to. diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 99c3e05ff1f0..032028ef9a34 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -115,19 +115,6 @@ struct kmem_cache { struct kmem_cache_node *node[MAX_NUMNODES]; }; -/* - * Maximum kmalloc object size handled by SLUB. Larger object allocations - * are passed through to the page allocator. The page allocator "fastpath" - * is relatively slow so we need this value sufficiently high so that - * performance critical objects are allocated through the SLUB fastpath. - * - * This should be dropped to PAGE_SIZE / 2 once the page allocator - * "fastpath" becomes competitive with the slab allocator fastpaths. - */ -#define SLUB_MAX_SIZE (2 * PAGE_SIZE) - -#define SLUB_PAGE_SHIFT (PAGE_SHIFT + 2) - #ifdef CONFIG_ZONE_DMA #define SLUB_DMA __GFP_DMA #else @@ -139,7 +126,7 @@ struct kmem_cache { * We keep the general caches in an array of slab caches that are used for * 2^x bytes of allocations. */ -extern struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; +extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; /* * Find the slab cache for a given combination of allocation flags and size. @@ -211,7 +198,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) static __always_inline void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { - if (size > SLUB_MAX_SIZE) + if (size > KMALLOC_MAX_CACHE_SIZE) return kmalloc_large(size, flags); if (!(flags & SLUB_DMA)) { @@ -247,7 +234,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { if (__builtin_constant_p(size) && - size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) { + size <= KMALLOC_MAX_CACHE_SIZE && !(flags & SLUB_DMA)) { struct kmem_cache *s = kmalloc_slab(size); if (!s) diff --git a/mm/slub.c b/mm/slub.c index ba2ca53f6c3a..d0f72ee06310 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2775,7 +2775,7 @@ init_kmem_cache_node(struct kmem_cache_node *n) static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) { BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < - SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); + KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu)); /* * Must align to double word boundary for the double cmpxchg @@ -3174,11 +3174,11 @@ int __kmem_cache_shutdown(struct kmem_cache *s) * Kmalloc subsystem *******************************************************************/ -struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; +struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; EXPORT_SYMBOL(kmalloc_caches); #ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT]; +static struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; #endif static int __init setup_slub_min_order(char *str) @@ -3280,7 +3280,7 @@ void *__kmalloc(size_t size, gfp_t flags) struct kmem_cache *s; void *ret; - if (unlikely(size > SLUB_MAX_SIZE)) + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return kmalloc_large(size, flags); s = get_slab(size, flags); @@ -3316,7 +3316,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) struct kmem_cache *s; void *ret; - if (unlikely(size > SLUB_MAX_SIZE)) { + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { ret = kmalloc_large_node(size, flags, node); trace_kmalloc_node(_RET_IP_, ret, @@ -3721,7 +3721,7 @@ void __init kmem_cache_init(void) caches++; } - for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0); caches++; } @@ -3739,7 +3739,7 @@ void __init kmem_cache_init(void) BUG_ON(!kmalloc_caches[2]->name); } - for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); BUG_ON(!s); @@ -3751,7 +3751,7 @@ void __init kmem_cache_init(void) #endif #ifdef CONFIG_ZONE_DMA - for (i = 0; i < SLUB_PAGE_SHIFT; i++) { + for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { struct kmem_cache *s = kmalloc_caches[i]; if (s && s->size) { @@ -3930,7 +3930,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) struct kmem_cache *s; void *ret; - if (unlikely(size > SLUB_MAX_SIZE)) + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return kmalloc_large(size, gfpflags); s = get_slab(size, gfpflags); @@ -3953,7 +3953,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, struct kmem_cache *s; void *ret; - if (unlikely(size > SLUB_MAX_SIZE)) { + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { ret = kmalloc_large_node(size, gfpflags, node); trace_kmalloc_node(caller, ret, @@ -4312,7 +4312,7 @@ static void resiliency_test(void) { u8 *p; - BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10); + BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10); printk(KERN_ERR "SLUB resiliency testing\n"); printk(KERN_ERR "-----------------------\n"); -- GitLab From 9425c58e5445277699ff3c2a87bac1cfebc1b48d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:12:17 +0000 Subject: [PATCH 0008/3163] slab: Common definition for the array of kmalloc caches Have a common definition fo the kmalloc cache arrays in SLAB and SLUB Acked-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab.h | 5 +++++ include/linux/slab_def.h | 3 --- include/linux/slub_def.h | 6 ------ mm/slab.c | 8 -------- mm/slab_common.c | 8 ++++++++ mm/slub.c | 7 ------- 6 files changed, 13 insertions(+), 24 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index c01780540054..f2327a898a85 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -206,6 +206,11 @@ struct kmem_cache { #define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW) #endif +extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; +#ifdef CONFIG_ZONE_DMA +extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; +#endif + /* * Figure out which kmalloc slab an allocation of a certain size * belongs to. diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 4ff50e8d1a2c..113ec080313f 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -102,9 +102,6 @@ struct kmem_cache { */ }; -extern struct kmem_cache *kmalloc_caches[PAGE_SHIFT + MAX_ORDER]; -extern struct kmem_cache *kmalloc_dma_caches[PAGE_SHIFT + MAX_ORDER]; - void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 032028ef9a34..3701896f7f8a 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -122,12 +122,6 @@ struct kmem_cache { #define SLUB_DMA (__force gfp_t)0 #endif -/* - * We keep the general caches in an array of slab caches that are used for - * 2^x bytes of allocations. - */ -extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; - /* * Find the slab cache for a given combination of allocation flags and size. * diff --git a/mm/slab.c b/mm/slab.c index 3416f4c544b3..357f0bdc5e43 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -318,14 +318,6 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int len, static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); static void cache_reap(struct work_struct *unused); -struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; -EXPORT_SYMBOL(kmalloc_caches); - -#ifdef CONFIG_ZONE_DMA -struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; -EXPORT_SYMBOL(kmalloc_dma_caches); -#endif - static int slab_early_init = 1; #define INDEX_AC kmalloc_index(sizeof(struct arraycache_init)) diff --git a/mm/slab_common.c b/mm/slab_common.c index 53adfbf2f3b2..0437b8189b8a 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -319,6 +319,14 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, return s; } +struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; +EXPORT_SYMBOL(kmalloc_caches); + +#ifdef CONFIG_ZONE_DMA +struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; +EXPORT_SYMBOL(kmalloc_dma_caches); +#endif + #endif /* !CONFIG_SLOB */ diff --git a/mm/slub.c b/mm/slub.c index d0f72ee06310..527cbfb5c49b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3174,13 +3174,6 @@ int __kmem_cache_shutdown(struct kmem_cache *s) * Kmalloc subsystem *******************************************************************/ -struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; -EXPORT_SYMBOL(kmalloc_caches); - -#ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; -#endif - static int __init setup_slub_min_order(char *str) { get_option(&str, &slub_min_order); -- GitLab From f97d5f634d3b5133951424fae751db1f339548bd Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:12:17 +0000 Subject: [PATCH 0009/3163] slab: Common function to create the kmalloc array The kmalloc array is created in similar ways in both SLAB and SLUB. Create a common function and have both allocators call that function. V1->V2: Whitespace cleanup Reviewed-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slab.c | 48 +----------------------------------------- mm/slab.h | 6 ++++++ mm/slab_common.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++ mm/slub.c | 55 +++--------------------------------------------- 4 files changed, 64 insertions(+), 99 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 357f0bdc5e43..08ba44f81a28 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1625,30 +1625,6 @@ void __init kmem_cache_init(void) slab_early_init = 0; - for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) { - size_t cs_size = kmalloc_size(i); - - if (cs_size < KMALLOC_MIN_SIZE) - continue; - - if (!kmalloc_caches[i]) { - /* - * For performance, all the general caches are L1 aligned. - * This should be particularly beneficial on SMP boxes, as it - * eliminates "false sharing". - * Note for systems short on memory removing the alignment will - * allow tighter packing of the smaller caches. - */ - kmalloc_caches[i] = create_kmalloc_cache("kmalloc", - cs_size, ARCH_KMALLOC_FLAGS); - } - -#ifdef CONFIG_ZONE_DMA - kmalloc_dma_caches[i] = create_kmalloc_cache( - "kmalloc-dma", cs_size, - SLAB_CACHE_DMA|ARCH_KMALLOC_FLAGS); -#endif - } /* 4) Replace the bootstrap head arrays */ { struct array_cache *ptr; @@ -1694,29 +1670,7 @@ void __init kmem_cache_init(void) } } - slab_state = UP; - - /* Create the proper names */ - for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) { - char *s; - struct kmem_cache *c = kmalloc_caches[i]; - - if (!c) - continue; - - s = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i)); - - BUG_ON(!s); - c->name = s; - -#ifdef CONFIG_ZONE_DMA - c = kmalloc_dma_caches[i]; - BUG_ON(!c); - s = kasprintf(GFP_NOWAIT, "dma-kmalloc-%d", kmalloc_size(i)); - BUG_ON(!s); - c->name = s; -#endif - } + create_kmalloc_caches(ARCH_KMALLOC_FLAGS); } void __init kmem_cache_init_late(void) diff --git a/mm/slab.h b/mm/slab.h index 34a98d642196..44c0bd6dc19e 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -35,6 +35,12 @@ extern struct kmem_cache *kmem_cache; unsigned long calculate_alignment(unsigned long flags, unsigned long align, unsigned long size); +#ifndef CONFIG_SLOB +/* Kmalloc array related functions */ +void create_kmalloc_caches(unsigned long); +#endif + + /* Functions provided by the slab allocators */ extern int __kmem_cache_create(struct kmem_cache *, unsigned long flags); diff --git a/mm/slab_common.c b/mm/slab_common.c index 0437b8189b8a..2b0ebb6d071d 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -327,6 +327,60 @@ struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; EXPORT_SYMBOL(kmalloc_dma_caches); #endif +/* + * Create the kmalloc array. Some of the regular kmalloc arrays + * may already have been created because they were needed to + * enable allocations for slab creation. + */ +void __init create_kmalloc_caches(unsigned long flags) +{ + int i; + + /* Caches that are not of the two-to-the-power-of size */ + if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1]) + kmalloc_caches[1] = create_kmalloc_cache(NULL, 96, flags); + + if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2]) + kmalloc_caches[2] = create_kmalloc_cache(NULL, 192, flags); + + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) + if (!kmalloc_caches[i]) + kmalloc_caches[i] = create_kmalloc_cache(NULL, + 1 << i, flags); + + /* Kmalloc array is now usable */ + slab_state = UP; + + for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { + struct kmem_cache *s = kmalloc_caches[i]; + char *n; + + if (s) { + n = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i)); + + BUG_ON(!n); + s->name = n; + } + } + +#ifdef CONFIG_ZONE_DMA + for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { + struct kmem_cache *s = kmalloc_caches[i]; + + if (s) { + int size = kmalloc_size(i); + char *n = kasprintf(GFP_NOWAIT, + "dma-kmalloc-%d", size); + + BUG_ON(!n); + kmalloc_dma_caches[i] = create_kmalloc_cache(n, + size, SLAB_CACHE_DMA | flags); + } + } +#endif +} + + #endif /* !CONFIG_SLOB */ diff --git a/mm/slub.c b/mm/slub.c index 527cbfb5c49b..e813c2d30fe0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3633,7 +3633,6 @@ void __init kmem_cache_init(void) static __initdata struct kmem_cache boot_kmem_cache, boot_kmem_cache_node; int i; - int caches = 2; if (debug_guardpage_minorder()) slub_max_order = 0; @@ -3703,64 +3702,16 @@ void __init kmem_cache_init(void) size_index[size_index_elem(i)] = 8; } - /* Caches that are not of the two-to-the-power-of size */ - if (KMALLOC_MIN_SIZE <= 32) { - kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0); - caches++; - } - - if (KMALLOC_MIN_SIZE <= 64) { - kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0); - caches++; - } - - for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { - kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0); - caches++; - } - - slab_state = UP; - - /* Provide the correct kmalloc names now that the caches are up */ - if (KMALLOC_MIN_SIZE <= 32) { - kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT); - BUG_ON(!kmalloc_caches[1]->name); - } - - if (KMALLOC_MIN_SIZE <= 64) { - kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT); - BUG_ON(!kmalloc_caches[2]->name); - } - - for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { - char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); - - BUG_ON(!s); - kmalloc_caches[i]->name = s; - } + create_kmalloc_caches(0); #ifdef CONFIG_SMP register_cpu_notifier(&slab_notifier); #endif -#ifdef CONFIG_ZONE_DMA - for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { - struct kmem_cache *s = kmalloc_caches[i]; - - if (s && s->size) { - char *name = kasprintf(GFP_NOWAIT, - "dma-kmalloc-%d", s->object_size); - - BUG_ON(!name); - kmalloc_dma_caches[i] = create_kmalloc_cache(name, - s->object_size, SLAB_CACHE_DMA); - } - } -#endif printk(KERN_INFO - "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," + "SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d," " CPUs=%d, Nodes=%d\n", - caches, cache_line_size(), + cache_line_size(), slub_min_order, slub_max_order, slub_min_objects, nr_cpu_ids, nr_node_ids); } -- GitLab From 9e5e8deca74603357626471a9b44f05dea9e32b1 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:14:17 +0000 Subject: [PATCH 0010/3163] stat: Use size_t for sizes instead of unsigned On some platforms (such as IA64) the large page size may results in slab allocations to be allowed of numbers that do not fit in 32 bit. Acked-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- fs/proc/stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/stat.c b/fs/proc/stat.c index e296572c73ed..1cf86c0e8689 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -184,7 +184,7 @@ static int show_stat(struct seq_file *p, void *v) static int stat_open(struct inode *inode, struct file *file) { - unsigned size = 1024 + 128 * num_possible_cpus(); + size_t size = 1024 + 128 * num_possible_cpus(); char *buf; struct seq_file *m; int res; -- GitLab From 2c59dd6544212faa5ce761920d2251f4152f408d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:14:19 +0000 Subject: [PATCH 0011/3163] slab: Common Kmalloc cache determination Extract the optimized lookup functions from slub and put them into slab_common.c. Then make slab use these functions as well. Joonsoo notes that this fixes some issues with constant folding which also reduces the code size for slub. https://lkml.org/lkml/2012/10/20/82 Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 41 ++++----------- mm/slab.c | 40 ++------------- mm/slab.h | 3 ++ mm/slab_common.c | 105 ++++++++++++++++++++++++++++++++++++- mm/slub.c | 108 ++------------------------------------- 5 files changed, 124 insertions(+), 173 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 3701896f7f8a..16341e5316de 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -115,29 +115,6 @@ struct kmem_cache { struct kmem_cache_node *node[MAX_NUMNODES]; }; -#ifdef CONFIG_ZONE_DMA -#define SLUB_DMA __GFP_DMA -#else -/* Disable DMA functionality */ -#define SLUB_DMA (__force gfp_t)0 -#endif - -/* - * Find the slab cache for a given combination of allocation flags and size. - * - * This ought to end up with a global pointer to the right cache - * in kmalloc_caches. - */ -static __always_inline struct kmem_cache *kmalloc_slab(size_t size) -{ - int index = kmalloc_index(size); - - if (index == 0) - return NULL; - - return kmalloc_caches[index]; -} - void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); @@ -195,13 +172,14 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) if (size > KMALLOC_MAX_CACHE_SIZE) return kmalloc_large(size, flags); - if (!(flags & SLUB_DMA)) { - struct kmem_cache *s = kmalloc_slab(size); + if (!(flags & GFP_DMA)) { + int index = kmalloc_index(size); - if (!s) + if (!index) return ZERO_SIZE_PTR; - return kmem_cache_alloc_trace(s, flags, size); + return kmem_cache_alloc_trace(kmalloc_caches[index], + flags, size); } } return __kmalloc(size, flags); @@ -228,13 +206,14 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { if (__builtin_constant_p(size) && - size <= KMALLOC_MAX_CACHE_SIZE && !(flags & SLUB_DMA)) { - struct kmem_cache *s = kmalloc_slab(size); + size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) { + int index = kmalloc_index(size); - if (!s) + if (!index) return ZERO_SIZE_PTR; - return kmem_cache_alloc_node_trace(s, flags, node, size); + return kmem_cache_alloc_node_trace(kmalloc_caches[index], + flags, node, size); } return __kmalloc_node(size, flags, node); } diff --git a/mm/slab.c b/mm/slab.c index 08ba44f81a28..62629b11df38 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -656,40 +656,6 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) return cachep->array[smp_processor_id()]; } -static inline struct kmem_cache *__find_general_cachep(size_t size, - gfp_t gfpflags) -{ - int i; - -#if DEBUG - /* This happens if someone tries to call - * kmem_cache_create(), or __kmalloc(), before - * the generic caches are initialized. - */ - BUG_ON(kmalloc_caches[INDEX_AC] == NULL); -#endif - if (!size) - return ZERO_SIZE_PTR; - - i = kmalloc_index(size); - - /* - * Really subtle: The last entry with cs->cs_size==ULONG_MAX - * has cs_{dma,}cachep==NULL. Thus no special case - * for large kmalloc calls required. - */ -#ifdef CONFIG_ZONE_DMA - if (unlikely(gfpflags & GFP_DMA)) - return kmalloc_dma_caches[i]; -#endif - return kmalloc_caches[i]; -} - -static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) -{ - return __find_general_cachep(size, gfpflags); -} - static size_t slab_mgmt_size(size_t nr_objs, size_t align) { return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); @@ -2426,7 +2392,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) cachep->reciprocal_buffer_size = reciprocal_value(size); if (flags & CFLGS_OFF_SLAB) { - cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); + cachep->slabp_cache = kmalloc_slab(slab_size, 0u); /* * This is a possibility for one of the malloc_sizes caches. * But since we go off slab only for object size greater than @@ -3729,7 +3695,7 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller) { struct kmem_cache *cachep; - cachep = kmem_find_general_cachep(size, flags); + cachep = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; return kmem_cache_alloc_node_trace(cachep, flags, node, size); @@ -3774,7 +3740,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, * Then kmalloc uses the uninlined functions instead of the inline * functions. */ - cachep = __find_general_cachep(size, flags); + cachep = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; ret = slab_alloc(cachep, flags, caller); diff --git a/mm/slab.h b/mm/slab.h index 44c0bd6dc19e..c01bc8921ac5 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -38,6 +38,9 @@ unsigned long calculate_alignment(unsigned long flags, #ifndef CONFIG_SLOB /* Kmalloc array related functions */ void create_kmalloc_caches(unsigned long); + +/* Find the kmalloc slab corresponding for a certain size */ +struct kmem_cache *kmalloc_slab(size_t, gfp_t); #endif diff --git a/mm/slab_common.c b/mm/slab_common.c index 2b0ebb6d071d..6d73f0b7f21c 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -327,6 +327,68 @@ struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; EXPORT_SYMBOL(kmalloc_dma_caches); #endif +/* + * Conversion table for small slabs sizes / 8 to the index in the + * kmalloc array. This is necessary for slabs < 192 since we have non power + * of two cache sizes there. The size of larger slabs can be determined using + * fls. + */ +static s8 size_index[24] = { + 3, /* 8 */ + 4, /* 16 */ + 5, /* 24 */ + 5, /* 32 */ + 6, /* 40 */ + 6, /* 48 */ + 6, /* 56 */ + 6, /* 64 */ + 1, /* 72 */ + 1, /* 80 */ + 1, /* 88 */ + 1, /* 96 */ + 7, /* 104 */ + 7, /* 112 */ + 7, /* 120 */ + 7, /* 128 */ + 2, /* 136 */ + 2, /* 144 */ + 2, /* 152 */ + 2, /* 160 */ + 2, /* 168 */ + 2, /* 176 */ + 2, /* 184 */ + 2 /* 192 */ +}; + +static inline int size_index_elem(size_t bytes) +{ + return (bytes - 1) / 8; +} + +/* + * Find the kmem_cache structure that serves a given size of + * allocation + */ +struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) +{ + int index; + + if (size <= 192) { + if (!size) + return ZERO_SIZE_PTR; + + index = size_index[size_index_elem(size)]; + } else + index = fls(size - 1); + +#ifdef CONFIG_ZONE_DMA + if (unlikely((flags & SLAB_CACHE_DMA))) + return kmalloc_dma_caches[index]; + +#endif + return kmalloc_caches[index]; +} + /* * Create the kmalloc array. Some of the regular kmalloc arrays * may already have been created because they were needed to @@ -336,6 +398,47 @@ void __init create_kmalloc_caches(unsigned long flags) { int i; + /* + * Patch up the size_index table if we have strange large alignment + * requirements for the kmalloc array. This is only the case for + * MIPS it seems. The standard arches will not generate any code here. + * + * Largest permitted alignment is 256 bytes due to the way we + * handle the index determination for the smaller caches. + * + * Make sure that nothing crazy happens if someone starts tinkering + * around with ARCH_KMALLOC_MINALIGN + */ + BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || + (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); + + for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { + int elem = size_index_elem(i); + + if (elem >= ARRAY_SIZE(size_index)) + break; + size_index[elem] = KMALLOC_SHIFT_LOW; + } + + if (KMALLOC_MIN_SIZE >= 64) { + /* + * The 96 byte size cache is not used if the alignment + * is 64 byte. + */ + for (i = 64 + 8; i <= 96; i += 8) + size_index[size_index_elem(i)] = 7; + + } + + if (KMALLOC_MIN_SIZE >= 128) { + /* + * The 192 byte sized cache is not used if the alignment + * is 128 byte. Redirect kmalloc to use the 256 byte cache + * instead. + */ + for (i = 128 + 8; i <= 192; i += 8) + size_index[size_index_elem(i)] = 8; + } /* Caches that are not of the two-to-the-power-of size */ if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1]) kmalloc_caches[1] = create_kmalloc_cache(NULL, 96, flags); @@ -379,8 +482,6 @@ void __init create_kmalloc_caches(unsigned long flags) } #endif } - - #endif /* !CONFIG_SLOB */ diff --git a/mm/slub.c b/mm/slub.c index e813c2d30fe0..6184b0821f7e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2982,7 +2982,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) s->allocflags |= __GFP_COMP; if (s->flags & SLAB_CACHE_DMA) - s->allocflags |= SLUB_DMA; + s->allocflags |= GFP_DMA; if (s->flags & SLAB_RECLAIM_ACCOUNT) s->allocflags |= __GFP_RECLAIMABLE; @@ -3210,64 +3210,6 @@ static int __init setup_slub_nomerge(char *str) __setup("slub_nomerge", setup_slub_nomerge); -/* - * Conversion table for small slabs sizes / 8 to the index in the - * kmalloc array. This is necessary for slabs < 192 since we have non power - * of two cache sizes there. The size of larger slabs can be determined using - * fls. - */ -static s8 size_index[24] = { - 3, /* 8 */ - 4, /* 16 */ - 5, /* 24 */ - 5, /* 32 */ - 6, /* 40 */ - 6, /* 48 */ - 6, /* 56 */ - 6, /* 64 */ - 1, /* 72 */ - 1, /* 80 */ - 1, /* 88 */ - 1, /* 96 */ - 7, /* 104 */ - 7, /* 112 */ - 7, /* 120 */ - 7, /* 128 */ - 2, /* 136 */ - 2, /* 144 */ - 2, /* 152 */ - 2, /* 160 */ - 2, /* 168 */ - 2, /* 176 */ - 2, /* 184 */ - 2 /* 192 */ -}; - -static inline int size_index_elem(size_t bytes) -{ - return (bytes - 1) / 8; -} - -static struct kmem_cache *get_slab(size_t size, gfp_t flags) -{ - int index; - - if (size <= 192) { - if (!size) - return ZERO_SIZE_PTR; - - index = size_index[size_index_elem(size)]; - } else - index = fls(size - 1); - -#ifdef CONFIG_ZONE_DMA - if (unlikely((flags & SLUB_DMA))) - return kmalloc_dma_caches[index]; - -#endif - return kmalloc_caches[index]; -} - void *__kmalloc(size_t size, gfp_t flags) { struct kmem_cache *s; @@ -3276,7 +3218,7 @@ void *__kmalloc(size_t size, gfp_t flags) if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return kmalloc_large(size, flags); - s = get_slab(size, flags); + s = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; @@ -3319,7 +3261,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) return ret; } - s = get_slab(size, flags); + s = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; @@ -3632,7 +3574,6 @@ void __init kmem_cache_init(void) { static __initdata struct kmem_cache boot_kmem_cache, boot_kmem_cache_node; - int i; if (debug_guardpage_minorder()) slub_max_order = 0; @@ -3663,45 +3604,6 @@ void __init kmem_cache_init(void) kmem_cache_node = bootstrap(&boot_kmem_cache_node); /* Now we can use the kmem_cache to allocate kmalloc slabs */ - - /* - * Patch up the size_index table if we have strange large alignment - * requirements for the kmalloc array. This is only the case for - * MIPS it seems. The standard arches will not generate any code here. - * - * Largest permitted alignment is 256 bytes due to the way we - * handle the index determination for the smaller caches. - * - * Make sure that nothing crazy happens if someone starts tinkering - * around with ARCH_KMALLOC_MINALIGN - */ - BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || - (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); - - for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { - int elem = size_index_elem(i); - if (elem >= ARRAY_SIZE(size_index)) - break; - size_index[elem] = KMALLOC_SHIFT_LOW; - } - - if (KMALLOC_MIN_SIZE == 64) { - /* - * The 96 byte size cache is not used if the alignment - * is 64 byte. - */ - for (i = 64 + 8; i <= 96; i += 8) - size_index[size_index_elem(i)] = 7; - } else if (KMALLOC_MIN_SIZE == 128) { - /* - * The 192 byte sized cache is not used if the alignment - * is 128 byte. Redirect kmalloc to use the 256 byte cache - * instead. - */ - for (i = 128 + 8; i <= 192; i += 8) - size_index[size_index_elem(i)] = 8; - } - create_kmalloc_caches(0); #ifdef CONFIG_SMP @@ -3877,7 +3779,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return kmalloc_large(size, gfpflags); - s = get_slab(size, gfpflags); + s = kmalloc_slab(size, gfpflags); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; @@ -3907,7 +3809,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, return ret; } - s = get_slab(size, gfpflags); + s = kmalloc_slab(size, gfpflags); if (unlikely(ZERO_OR_NULL_PTR(s))) return s; -- GitLab From ce8eb6c424c794d7fb4d1a6667d267990ca28072 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:14:19 +0000 Subject: [PATCH 0012/3163] slab: Rename list3/l3 to node The list3 or l3 pointers are pointing to per node structures. Reflect that in the names of variables used. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slab.c | 516 +++++++++++++++++++++++++++--------------------------- mm/slab.h | 2 +- 2 files changed, 259 insertions(+), 259 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 62629b11df38..c162b2eb493a 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -306,13 +306,13 @@ struct kmem_cache_node { * Need this for bootstrapping a per node allocator. */ #define NUM_INIT_LISTS (3 * MAX_NUMNODES) -static struct kmem_cache_node __initdata initkmem_list3[NUM_INIT_LISTS]; +static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS]; #define CACHE_CACHE 0 #define SIZE_AC MAX_NUMNODES -#define SIZE_L3 (2 * MAX_NUMNODES) +#define SIZE_NODE (2 * MAX_NUMNODES) static int drain_freelist(struct kmem_cache *cache, - struct kmem_cache_node *l3, int tofree); + struct kmem_cache_node *n, int tofree); static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); @@ -321,9 +321,9 @@ static void cache_reap(struct work_struct *unused); static int slab_early_init = 1; #define INDEX_AC kmalloc_index(sizeof(struct arraycache_init)) -#define INDEX_L3 kmalloc_index(sizeof(struct kmem_cache_node)) +#define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node)) -static void kmem_list3_init(struct kmem_cache_node *parent) +static void kmem_cache_node_init(struct kmem_cache_node *parent) { INIT_LIST_HEAD(&parent->slabs_full); INIT_LIST_HEAD(&parent->slabs_partial); @@ -538,15 +538,15 @@ static void slab_set_lock_classes(struct kmem_cache *cachep, int q) { struct array_cache **alc; - struct kmem_cache_node *l3; + struct kmem_cache_node *n; int r; - l3 = cachep->node[q]; - if (!l3) + n = cachep->node[q]; + if (!n) return; - lockdep_set_class(&l3->list_lock, l3_key); - alc = l3->alien; + lockdep_set_class(&n->list_lock, l3_key); + alc = n->alien; /* * FIXME: This check for BAD_ALIEN_MAGIC * should go away when common slab code is taught to @@ -583,14 +583,14 @@ static void init_node_lock_keys(int q) return; for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) { - struct kmem_cache_node *l3; + struct kmem_cache_node *n; struct kmem_cache *cache = kmalloc_caches[i]; if (!cache) continue; - l3 = cache->node[q]; - if (!l3 || OFF_SLAB(cache)) + n = cache->node[q]; + if (!n || OFF_SLAB(cache)) continue; slab_set_lock_classes(cache, &on_slab_l3_key, @@ -857,29 +857,29 @@ static inline bool is_slab_pfmemalloc(struct slab *slabp) static void recheck_pfmemalloc_active(struct kmem_cache *cachep, struct array_cache *ac) { - struct kmem_cache_node *l3 = cachep->node[numa_mem_id()]; + struct kmem_cache_node *n = cachep->node[numa_mem_id()]; struct slab *slabp; unsigned long flags; if (!pfmemalloc_active) return; - spin_lock_irqsave(&l3->list_lock, flags); - list_for_each_entry(slabp, &l3->slabs_full, list) + spin_lock_irqsave(&n->list_lock, flags); + list_for_each_entry(slabp, &n->slabs_full, list) if (is_slab_pfmemalloc(slabp)) goto out; - list_for_each_entry(slabp, &l3->slabs_partial, list) + list_for_each_entry(slabp, &n->slabs_partial, list) if (is_slab_pfmemalloc(slabp)) goto out; - list_for_each_entry(slabp, &l3->slabs_free, list) + list_for_each_entry(slabp, &n->slabs_free, list) if (is_slab_pfmemalloc(slabp)) goto out; pfmemalloc_active = false; out: - spin_unlock_irqrestore(&l3->list_lock, flags); + spin_unlock_irqrestore(&n->list_lock, flags); } static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, @@ -890,7 +890,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */ if (unlikely(is_obj_pfmemalloc(objp))) { - struct kmem_cache_node *l3; + struct kmem_cache_node *n; if (gfp_pfmemalloc_allowed(flags)) { clear_obj_pfmemalloc(&objp); @@ -912,8 +912,8 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, * If there are empty slabs on the slabs_free list and we are * being forced to refill the cache, mark this one !pfmemalloc. */ - l3 = cachep->node[numa_mem_id()]; - if (!list_empty(&l3->slabs_free) && force_refill) { + n = cachep->node[numa_mem_id()]; + if (!list_empty(&n->slabs_free) && force_refill) { struct slab *slabp = virt_to_slab(objp); ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem)); clear_obj_pfmemalloc(&objp); @@ -990,7 +990,7 @@ static int transfer_objects(struct array_cache *to, #ifndef CONFIG_NUMA #define drain_alien_cache(cachep, alien) do { } while (0) -#define reap_alien(cachep, l3) do { } while (0) +#define reap_alien(cachep, n) do { } while (0) static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) { @@ -1062,33 +1062,33 @@ static void free_alien_cache(struct array_cache **ac_ptr) static void __drain_alien_cache(struct kmem_cache *cachep, struct array_cache *ac, int node) { - struct kmem_cache_node *rl3 = cachep->node[node]; + struct kmem_cache_node *n = cachep->node[node]; if (ac->avail) { - spin_lock(&rl3->list_lock); + spin_lock(&n->list_lock); /* * Stuff objects into the remote nodes shared array first. * That way we could avoid the overhead of putting the objects * into the free lists and getting them back later. */ - if (rl3->shared) - transfer_objects(rl3->shared, ac, ac->limit); + if (n->shared) + transfer_objects(n->shared, ac, ac->limit); free_block(cachep, ac->entry, ac->avail, node); ac->avail = 0; - spin_unlock(&rl3->list_lock); + spin_unlock(&n->list_lock); } } /* * Called from cache_reap() to regularly drain alien caches round robin. */ -static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *l3) +static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n) { int node = __this_cpu_read(slab_reap_node); - if (l3->alien) { - struct array_cache *ac = l3->alien[node]; + if (n->alien) { + struct array_cache *ac = n->alien[node]; if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { __drain_alien_cache(cachep, ac, node); @@ -1118,7 +1118,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) { struct slab *slabp = virt_to_slab(objp); int nodeid = slabp->nodeid; - struct kmem_cache_node *l3; + struct kmem_cache_node *n; struct array_cache *alien = NULL; int node; @@ -1131,10 +1131,10 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) if (likely(slabp->nodeid == node)) return 0; - l3 = cachep->node[node]; + n = cachep->node[node]; STATS_INC_NODEFREES(cachep); - if (l3->alien && l3->alien[nodeid]) { - alien = l3->alien[nodeid]; + if (n->alien && n->alien[nodeid]) { + alien = n->alien[nodeid]; spin_lock(&alien->lock); if (unlikely(alien->avail == alien->limit)) { STATS_INC_ACOVERFLOW(cachep); @@ -1153,7 +1153,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) /* * Allocates and initializes node for a node on each slab cache, used for - * either memory or cpu hotplug. If memory is being hot-added, the kmem_list3 + * either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node * will be allocated off-node since memory is not yet online for the new node. * When hotplugging memory or a cpu, existing node are not replaced if * already in use. @@ -1163,7 +1163,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) static int init_cache_node_node(int node) { struct kmem_cache *cachep; - struct kmem_cache_node *l3; + struct kmem_cache_node *n; const int memsize = sizeof(struct kmem_cache_node); list_for_each_entry(cachep, &slab_caches, list) { @@ -1173,11 +1173,11 @@ static int init_cache_node_node(int node) * node has not already allocated this */ if (!cachep->node[node]) { - l3 = kmalloc_node(memsize, GFP_KERNEL, node); - if (!l3) + n = kmalloc_node(memsize, GFP_KERNEL, node); + if (!n) return -ENOMEM; - kmem_list3_init(l3); - l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + + kmem_cache_node_init(n); + n->next_reap = jiffies + REAPTIMEOUT_LIST3 + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; /* @@ -1185,7 +1185,7 @@ static int init_cache_node_node(int node) * go. slab_mutex is sufficient * protection here. */ - cachep->node[node] = l3; + cachep->node[node] = n; } spin_lock_irq(&cachep->node[node]->list_lock); @@ -1200,7 +1200,7 @@ static int init_cache_node_node(int node) static void __cpuinit cpuup_canceled(long cpu) { struct kmem_cache *cachep; - struct kmem_cache_node *l3 = NULL; + struct kmem_cache_node *n = NULL; int node = cpu_to_mem(cpu); const struct cpumask *mask = cpumask_of_node(node); @@ -1212,34 +1212,34 @@ static void __cpuinit cpuup_canceled(long cpu) /* cpu is dead; no one can alloc from it. */ nc = cachep->array[cpu]; cachep->array[cpu] = NULL; - l3 = cachep->node[node]; + n = cachep->node[node]; - if (!l3) + if (!n) goto free_array_cache; - spin_lock_irq(&l3->list_lock); + spin_lock_irq(&n->list_lock); - /* Free limit for this kmem_list3 */ - l3->free_limit -= cachep->batchcount; + /* Free limit for this kmem_cache_node */ + n->free_limit -= cachep->batchcount; if (nc) free_block(cachep, nc->entry, nc->avail, node); if (!cpumask_empty(mask)) { - spin_unlock_irq(&l3->list_lock); + spin_unlock_irq(&n->list_lock); goto free_array_cache; } - shared = l3->shared; + shared = n->shared; if (shared) { free_block(cachep, shared->entry, shared->avail, node); - l3->shared = NULL; + n->shared = NULL; } - alien = l3->alien; - l3->alien = NULL; + alien = n->alien; + n->alien = NULL; - spin_unlock_irq(&l3->list_lock); + spin_unlock_irq(&n->list_lock); kfree(shared); if (alien) { @@ -1255,17 +1255,17 @@ static void __cpuinit cpuup_canceled(long cpu) * shrink each nodelist to its limit. */ list_for_each_entry(cachep, &slab_caches, list) { - l3 = cachep->node[node]; - if (!l3) + n = cachep->node[node]; + if (!n) continue; - drain_freelist(cachep, l3, l3->free_objects); + drain_freelist(cachep, n, n->free_objects); } } static int __cpuinit cpuup_prepare(long cpu) { struct kmem_cache *cachep; - struct kmem_cache_node *l3 = NULL; + struct kmem_cache_node *n = NULL; int node = cpu_to_mem(cpu); int err; @@ -1273,7 +1273,7 @@ static int __cpuinit cpuup_prepare(long cpu) * We need to do this right in the beginning since * alloc_arraycache's are going to use this list. * kmalloc_node allows us to add the slab to the right - * kmem_list3 and not this cpu's kmem_list3 + * kmem_cache_node and not this cpu's kmem_cache_node */ err = init_cache_node_node(node); if (err < 0) @@ -1310,25 +1310,25 @@ static int __cpuinit cpuup_prepare(long cpu) } } cachep->array[cpu] = nc; - l3 = cachep->node[node]; - BUG_ON(!l3); + n = cachep->node[node]; + BUG_ON(!n); - spin_lock_irq(&l3->list_lock); - if (!l3->shared) { + spin_lock_irq(&n->list_lock); + if (!n->shared) { /* * We are serialised from CPU_DEAD or * CPU_UP_CANCELLED by the cpucontrol lock */ - l3->shared = shared; + n->shared = shared; shared = NULL; } #ifdef CONFIG_NUMA - if (!l3->alien) { - l3->alien = alien; + if (!n->alien) { + n->alien = alien; alien = NULL; } #endif - spin_unlock_irq(&l3->list_lock); + spin_unlock_irq(&n->list_lock); kfree(shared); free_alien_cache(alien); if (cachep->flags & SLAB_DEBUG_OBJECTS) @@ -1383,9 +1383,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, case CPU_DEAD_FROZEN: /* * Even if all the cpus of a node are down, we don't free the - * kmem_list3 of any cache. This to avoid a race between + * kmem_cache_node of any cache. This to avoid a race between * cpu_down, and a kmalloc allocation from another cpu for - * memory from the node of the cpu going down. The list3 + * memory from the node of the cpu going down. The node * structure is usually allocated from kmem_cache_create() and * gets destroyed at kmem_cache_destroy(). */ @@ -1419,16 +1419,16 @@ static int __meminit drain_cache_node_node(int node) int ret = 0; list_for_each_entry(cachep, &slab_caches, list) { - struct kmem_cache_node *l3; + struct kmem_cache_node *n; - l3 = cachep->node[node]; - if (!l3) + n = cachep->node[node]; + if (!n) continue; - drain_freelist(cachep, l3, l3->free_objects); + drain_freelist(cachep, n, n->free_objects); - if (!list_empty(&l3->slabs_full) || - !list_empty(&l3->slabs_partial)) { + if (!list_empty(&n->slabs_full) || + !list_empty(&n->slabs_partial)) { ret = -EBUSY; break; } @@ -1470,7 +1470,7 @@ static int __meminit slab_memory_callback(struct notifier_block *self, #endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */ /* - * swap the static kmem_list3 with kmalloced memory + * swap the static kmem_cache_node with kmalloced memory */ static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *list, int nodeid) @@ -1491,15 +1491,15 @@ static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node * } /* - * For setting up all the kmem_list3s for cache whose buffer_size is same as - * size of kmem_list3. + * For setting up all the kmem_cache_node for cache whose buffer_size is same as + * size of kmem_cache_node. */ -static void __init set_up_list3s(struct kmem_cache *cachep, int index) +static void __init set_up_node(struct kmem_cache *cachep, int index) { int node; for_each_online_node(node) { - cachep->node[node] = &initkmem_list3[index + node]; + cachep->node[node] = &init_kmem_cache_node[index + node]; cachep->node[node]->next_reap = jiffies + REAPTIMEOUT_LIST3 + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; @@ -1530,9 +1530,9 @@ void __init kmem_cache_init(void) use_alien_caches = 0; for (i = 0; i < NUM_INIT_LISTS; i++) - kmem_list3_init(&initkmem_list3[i]); + kmem_cache_node_init(&init_kmem_cache_node[i]); - set_up_list3s(kmem_cache, CACHE_CACHE); + set_up_node(kmem_cache, CACHE_CACHE); /* * Fragmentation resistance on low memory - only use bigger @@ -1548,7 +1548,7 @@ void __init kmem_cache_init(void) * kmem_cache structures of all caches, except kmem_cache itself: * kmem_cache is statically allocated. * Initially an __init data area is used for the head array and the - * kmem_list3 structures, it's replaced with a kmalloc allocated + * kmem_cache_node structures, it's replaced with a kmalloc allocated * array at the end of the bootstrap. * 2) Create the first kmalloc cache. * The struct kmem_cache for the new cache is allocated normally. @@ -1557,7 +1557,7 @@ void __init kmem_cache_init(void) * head arrays. * 4) Replace the __init data head arrays for kmem_cache and the first * kmalloc cache with kmalloc allocated arrays. - * 5) Replace the __init data for kmem_list3 for kmem_cache and + * 5) Replace the __init data for kmem_cache_node for kmem_cache and * the other cache's with kmalloc allocated memory. * 6) Resize the head arrays of the kmalloc caches to their final sizes. */ @@ -1577,17 +1577,17 @@ void __init kmem_cache_init(void) /* * Initialize the caches that provide memory for the array cache and the - * kmem_list3 structures first. Without this, further allocations will + * kmem_cache_node structures first. Without this, further allocations will * bug. */ kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac", kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS); - if (INDEX_AC != INDEX_L3) - kmalloc_caches[INDEX_L3] = - create_kmalloc_cache("kmalloc-l3", - kmalloc_size(INDEX_L3), ARCH_KMALLOC_FLAGS); + if (INDEX_AC != INDEX_NODE) + kmalloc_caches[INDEX_NODE] = + create_kmalloc_cache("kmalloc-node", + kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS); slab_early_init = 0; @@ -1619,19 +1619,19 @@ void __init kmem_cache_init(void) kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr; } - /* 5) Replace the bootstrap kmem_list3's */ + /* 5) Replace the bootstrap kmem_cache_node */ { int nid; for_each_online_node(nid) { - init_list(kmem_cache, &initkmem_list3[CACHE_CACHE + nid], nid); + init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid); init_list(kmalloc_caches[INDEX_AC], - &initkmem_list3[SIZE_AC + nid], nid); + &init_kmem_cache_node[SIZE_AC + nid], nid); - if (INDEX_AC != INDEX_L3) { - init_list(kmalloc_caches[INDEX_L3], - &initkmem_list3[SIZE_L3 + nid], nid); + if (INDEX_AC != INDEX_NODE) { + init_list(kmalloc_caches[INDEX_NODE], + &init_kmem_cache_node[SIZE_NODE + nid], nid); } } } @@ -1697,7 +1697,7 @@ __initcall(cpucache_init); static noinline void slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) { - struct kmem_cache_node *l3; + struct kmem_cache_node *n; struct slab *slabp; unsigned long flags; int node; @@ -1712,24 +1712,24 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) unsigned long active_objs = 0, num_objs = 0, free_objects = 0; unsigned long active_slabs = 0, num_slabs = 0; - l3 = cachep->node[node]; - if (!l3) + n = cachep->node[node]; + if (!n) continue; - spin_lock_irqsave(&l3->list_lock, flags); - list_for_each_entry(slabp, &l3->slabs_full, list) { + spin_lock_irqsave(&n->list_lock, flags); + list_for_each_entry(slabp, &n->slabs_full, list) { active_objs += cachep->num; active_slabs++; } - list_for_each_entry(slabp, &l3->slabs_partial, list) { + list_for_each_entry(slabp, &n->slabs_partial, list) { active_objs += slabp->inuse; active_slabs++; } - list_for_each_entry(slabp, &l3->slabs_free, list) + list_for_each_entry(slabp, &n->slabs_free, list) num_slabs++; - free_objects += l3->free_objects; - spin_unlock_irqrestore(&l3->list_lock, flags); + free_objects += n->free_objects; + spin_unlock_irqrestore(&n->list_lock, flags); num_slabs += active_slabs; num_objs = num_slabs * cachep->num; @@ -2154,7 +2154,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) if (slab_state == DOWN) { /* * Note: Creation of first cache (kmem_cache). - * The setup_list3s is taken care + * The setup_node is taken care * of by the caller of __kmem_cache_create */ cachep->array[smp_processor_id()] = &initarray_generic.cache; @@ -2168,13 +2168,13 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) cachep->array[smp_processor_id()] = &initarray_generic.cache; /* - * If the cache that's used by kmalloc(sizeof(kmem_list3)) is - * the second cache, then we need to set up all its list3s, + * If the cache that's used by kmalloc(sizeof(kmem_cache_node)) is + * the second cache, then we need to set up all its node/, * otherwise the creation of further caches will BUG(). */ - set_up_list3s(cachep, SIZE_AC); - if (INDEX_AC == INDEX_L3) - slab_state = PARTIAL_L3; + set_up_node(cachep, SIZE_AC); + if (INDEX_AC == INDEX_NODE) + slab_state = PARTIAL_NODE; else slab_state = PARTIAL_ARRAYCACHE; } else { @@ -2183,8 +2183,8 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) kmalloc(sizeof(struct arraycache_init), gfp); if (slab_state == PARTIAL_ARRAYCACHE) { - set_up_list3s(cachep, SIZE_L3); - slab_state = PARTIAL_L3; + set_up_node(cachep, SIZE_NODE); + slab_state = PARTIAL_NODE; } else { int node; for_each_online_node(node) { @@ -2192,7 +2192,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) kmalloc_node(sizeof(struct kmem_cache_node), gfp, node); BUG_ON(!cachep->node[node]); - kmem_list3_init(cachep->node[node]); + kmem_cache_node_init(cachep->node[node]); } } } @@ -2322,7 +2322,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) size += BYTES_PER_WORD; } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) - if (size >= kmalloc_size(INDEX_L3 + 1) + if (size >= kmalloc_size(INDEX_NODE + 1) && cachep->object_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) { cachep->obj_offset += PAGE_SIZE - ALIGN(size, align); size = PAGE_SIZE; @@ -2457,7 +2457,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) #define check_spinlock_acquired_node(x, y) do { } while(0) #endif -static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *l3, +static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n, struct array_cache *ac, int force, int node); @@ -2477,21 +2477,21 @@ static void do_drain(void *arg) static void drain_cpu_caches(struct kmem_cache *cachep) { - struct kmem_cache_node *l3; + struct kmem_cache_node *n; int node; on_each_cpu(do_drain, cachep, 1); check_irq_on(); for_each_online_node(node) { - l3 = cachep->node[node]; - if (l3 && l3->alien) - drain_alien_cache(cachep, l3->alien); + n = cachep->node[node]; + if (n && n->alien) + drain_alien_cache(cachep, n->alien); } for_each_online_node(node) { - l3 = cachep->node[node]; - if (l3) - drain_array(cachep, l3, l3->shared, 1, node); + n = cachep->node[node]; + if (n) + drain_array(cachep, n, n->shared, 1, node); } } @@ -2502,19 +2502,19 @@ static void drain_cpu_caches(struct kmem_cache *cachep) * Returns the actual number of slabs released. */ static int drain_freelist(struct kmem_cache *cache, - struct kmem_cache_node *l3, int tofree) + struct kmem_cache_node *n, int tofree) { struct list_head *p; int nr_freed; struct slab *slabp; nr_freed = 0; - while (nr_freed < tofree && !list_empty(&l3->slabs_free)) { + while (nr_freed < tofree && !list_empty(&n->slabs_free)) { - spin_lock_irq(&l3->list_lock); - p = l3->slabs_free.prev; - if (p == &l3->slabs_free) { - spin_unlock_irq(&l3->list_lock); + spin_lock_irq(&n->list_lock); + p = n->slabs_free.prev; + if (p == &n->slabs_free) { + spin_unlock_irq(&n->list_lock); goto out; } @@ -2527,8 +2527,8 @@ static int drain_freelist(struct kmem_cache *cache, * Safe to drop the lock. The slab is no longer linked * to the cache. */ - l3->free_objects -= cache->num; - spin_unlock_irq(&l3->list_lock); + n->free_objects -= cache->num; + spin_unlock_irq(&n->list_lock); slab_destroy(cache, slabp); nr_freed++; } @@ -2540,20 +2540,20 @@ static int drain_freelist(struct kmem_cache *cache, static int __cache_shrink(struct kmem_cache *cachep) { int ret = 0, i = 0; - struct kmem_cache_node *l3; + struct kmem_cache_node *n; drain_cpu_caches(cachep); check_irq_on(); for_each_online_node(i) { - l3 = cachep->node[i]; - if (!l3) + n = cachep->node[i]; + if (!n) continue; - drain_freelist(cachep, l3, l3->free_objects); + drain_freelist(cachep, n, n->free_objects); - ret += !list_empty(&l3->slabs_full) || - !list_empty(&l3->slabs_partial); + ret += !list_empty(&n->slabs_full) || + !list_empty(&n->slabs_partial); } return (ret ? 1 : 0); } @@ -2582,7 +2582,7 @@ EXPORT_SYMBOL(kmem_cache_shrink); int __kmem_cache_shutdown(struct kmem_cache *cachep) { int i; - struct kmem_cache_node *l3; + struct kmem_cache_node *n; int rc = __cache_shrink(cachep); if (rc) @@ -2591,13 +2591,13 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep) for_each_online_cpu(i) kfree(cachep->array[i]); - /* NUMA: free the list3 structures */ + /* NUMA: free the node structures */ for_each_online_node(i) { - l3 = cachep->node[i]; - if (l3) { - kfree(l3->shared); - free_alien_cache(l3->alien); - kfree(l3); + n = cachep->node[i]; + if (n) { + kfree(n->shared); + free_alien_cache(n->alien); + kfree(n); } } return 0; @@ -2779,7 +2779,7 @@ static int cache_grow(struct kmem_cache *cachep, struct slab *slabp; size_t offset; gfp_t local_flags; - struct kmem_cache_node *l3; + struct kmem_cache_node *n; /* * Be lazy and only check for valid flags here, keeping it out of the @@ -2788,17 +2788,17 @@ static int cache_grow(struct kmem_cache *cachep, BUG_ON(flags & GFP_SLAB_BUG_MASK); local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); - /* Take the l3 list lock to change the colour_next on this node */ + /* Take the node list lock to change the colour_next on this node */ check_irq_off(); - l3 = cachep->node[nodeid]; - spin_lock(&l3->list_lock); + n = cachep->node[nodeid]; + spin_lock(&n->list_lock); /* Get colour for the slab, and cal the next value. */ - offset = l3->colour_next; - l3->colour_next++; - if (l3->colour_next >= cachep->colour) - l3->colour_next = 0; - spin_unlock(&l3->list_lock); + offset = n->colour_next; + n->colour_next++; + if (n->colour_next >= cachep->colour) + n->colour_next = 0; + spin_unlock(&n->list_lock); offset *= cachep->colour_off; @@ -2835,13 +2835,13 @@ static int cache_grow(struct kmem_cache *cachep, if (local_flags & __GFP_WAIT) local_irq_disable(); check_irq_off(); - spin_lock(&l3->list_lock); + spin_lock(&n->list_lock); /* Make slab active. */ - list_add_tail(&slabp->list, &(l3->slabs_free)); + list_add_tail(&slabp->list, &(n->slabs_free)); STATS_INC_GROWN(cachep); - l3->free_objects += cachep->num; - spin_unlock(&l3->list_lock); + n->free_objects += cachep->num; + spin_unlock(&n->list_lock); return 1; opps1: kmem_freepages(cachep, objp); @@ -2969,7 +2969,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, bool force_refill) { int batchcount; - struct kmem_cache_node *l3; + struct kmem_cache_node *n; struct array_cache *ac; int node; @@ -2988,14 +2988,14 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, */ batchcount = BATCHREFILL_LIMIT; } - l3 = cachep->node[node]; + n = cachep->node[node]; - BUG_ON(ac->avail > 0 || !l3); - spin_lock(&l3->list_lock); + BUG_ON(ac->avail > 0 || !n); + spin_lock(&n->list_lock); /* See if we can refill from the shared array */ - if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) { - l3->shared->touched = 1; + if (n->shared && transfer_objects(ac, n->shared, batchcount)) { + n->shared->touched = 1; goto alloc_done; } @@ -3003,11 +3003,11 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, struct list_head *entry; struct slab *slabp; /* Get slab alloc is to come from. */ - entry = l3->slabs_partial.next; - if (entry == &l3->slabs_partial) { - l3->free_touched = 1; - entry = l3->slabs_free.next; - if (entry == &l3->slabs_free) + entry = n->slabs_partial.next; + if (entry == &n->slabs_partial) { + n->free_touched = 1; + entry = n->slabs_free.next; + if (entry == &n->slabs_free) goto must_grow; } @@ -3035,15 +3035,15 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, /* move slabp to correct slabp list: */ list_del(&slabp->list); if (slabp->free == BUFCTL_END) - list_add(&slabp->list, &l3->slabs_full); + list_add(&slabp->list, &n->slabs_full); else - list_add(&slabp->list, &l3->slabs_partial); + list_add(&slabp->list, &n->slabs_partial); } must_grow: - l3->free_objects -= ac->avail; + n->free_objects -= ac->avail; alloc_done: - spin_unlock(&l3->list_lock); + spin_unlock(&n->list_lock); if (unlikely(!ac->avail)) { int x; @@ -3301,21 +3301,21 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, { struct list_head *entry; struct slab *slabp; - struct kmem_cache_node *l3; + struct kmem_cache_node *n; void *obj; int x; - l3 = cachep->node[nodeid]; - BUG_ON(!l3); + n = cachep->node[nodeid]; + BUG_ON(!n); retry: check_irq_off(); - spin_lock(&l3->list_lock); - entry = l3->slabs_partial.next; - if (entry == &l3->slabs_partial) { - l3->free_touched = 1; - entry = l3->slabs_free.next; - if (entry == &l3->slabs_free) + spin_lock(&n->list_lock); + entry = n->slabs_partial.next; + if (entry == &n->slabs_partial) { + n->free_touched = 1; + entry = n->slabs_free.next; + if (entry == &n->slabs_free) goto must_grow; } @@ -3331,20 +3331,20 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, obj = slab_get_obj(cachep, slabp, nodeid); check_slabp(cachep, slabp); - l3->free_objects--; + n->free_objects--; /* move slabp to correct slabp list: */ list_del(&slabp->list); if (slabp->free == BUFCTL_END) - list_add(&slabp->list, &l3->slabs_full); + list_add(&slabp->list, &n->slabs_full); else - list_add(&slabp->list, &l3->slabs_partial); + list_add(&slabp->list, &n->slabs_partial); - spin_unlock(&l3->list_lock); + spin_unlock(&n->list_lock); goto done; must_grow: - spin_unlock(&l3->list_lock); + spin_unlock(&n->list_lock); x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); if (x) goto retry; @@ -3496,7 +3496,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, int node) { int i; - struct kmem_cache_node *l3; + struct kmem_cache_node *n; for (i = 0; i < nr_objects; i++) { void *objp; @@ -3506,19 +3506,19 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, objp = objpp[i]; slabp = virt_to_slab(objp); - l3 = cachep->node[node]; + n = cachep->node[node]; list_del(&slabp->list); check_spinlock_acquired_node(cachep, node); check_slabp(cachep, slabp); slab_put_obj(cachep, slabp, objp, node); STATS_DEC_ACTIVE(cachep); - l3->free_objects++; + n->free_objects++; check_slabp(cachep, slabp); /* fixup slab chains */ if (slabp->inuse == 0) { - if (l3->free_objects > l3->free_limit) { - l3->free_objects -= cachep->num; + if (n->free_objects > n->free_limit) { + n->free_objects -= cachep->num; /* No need to drop any previously held * lock here, even if we have a off-slab slab * descriptor it is guaranteed to come from @@ -3527,14 +3527,14 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, */ slab_destroy(cachep, slabp); } else { - list_add(&slabp->list, &l3->slabs_free); + list_add(&slabp->list, &n->slabs_free); } } else { /* Unconditionally move a slab to the end of the * partial list on free - maximum time for the * other objects to be freed, too. */ - list_add_tail(&slabp->list, &l3->slabs_partial); + list_add_tail(&slabp->list, &n->slabs_partial); } } } @@ -3542,7 +3542,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) { int batchcount; - struct kmem_cache_node *l3; + struct kmem_cache_node *n; int node = numa_mem_id(); batchcount = ac->batchcount; @@ -3550,10 +3550,10 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) BUG_ON(!batchcount || batchcount > ac->avail); #endif check_irq_off(); - l3 = cachep->node[node]; - spin_lock(&l3->list_lock); - if (l3->shared) { - struct array_cache *shared_array = l3->shared; + n = cachep->node[node]; + spin_lock(&n->list_lock); + if (n->shared) { + struct array_cache *shared_array = n->shared; int max = shared_array->limit - shared_array->avail; if (max) { if (batchcount > max) @@ -3572,8 +3572,8 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) int i = 0; struct list_head *p; - p = l3->slabs_free.next; - while (p != &(l3->slabs_free)) { + p = n->slabs_free.next; + while (p != &(n->slabs_free)) { struct slab *slabp; slabp = list_entry(p, struct slab, list); @@ -3585,7 +3585,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) STATS_SET_FREEABLE(cachep, i); } #endif - spin_unlock(&l3->list_lock); + spin_unlock(&n->list_lock); ac->avail -= batchcount; memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); } @@ -3829,12 +3829,12 @@ void kfree(const void *objp) EXPORT_SYMBOL(kfree); /* - * This initializes kmem_list3 or resizes various caches for all nodes. + * This initializes kmem_cache_node or resizes various caches for all nodes. */ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) { int node; - struct kmem_cache_node *l3; + struct kmem_cache_node *n; struct array_cache *new_shared; struct array_cache **new_alien = NULL; @@ -3857,43 +3857,43 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) } } - l3 = cachep->node[node]; - if (l3) { - struct array_cache *shared = l3->shared; + n = cachep->node[node]; + if (n) { + struct array_cache *shared = n->shared; - spin_lock_irq(&l3->list_lock); + spin_lock_irq(&n->list_lock); if (shared) free_block(cachep, shared->entry, shared->avail, node); - l3->shared = new_shared; - if (!l3->alien) { - l3->alien = new_alien; + n->shared = new_shared; + if (!n->alien) { + n->alien = new_alien; new_alien = NULL; } - l3->free_limit = (1 + nr_cpus_node(node)) * + n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - spin_unlock_irq(&l3->list_lock); + spin_unlock_irq(&n->list_lock); kfree(shared); free_alien_cache(new_alien); continue; } - l3 = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node); - if (!l3) { + n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node); + if (!n) { free_alien_cache(new_alien); kfree(new_shared); goto fail; } - kmem_list3_init(l3); - l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + + kmem_cache_node_init(n); + n->next_reap = jiffies + REAPTIMEOUT_LIST3 + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; - l3->shared = new_shared; - l3->alien = new_alien; - l3->free_limit = (1 + nr_cpus_node(node)) * + n->shared = new_shared; + n->alien = new_alien; + n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - cachep->node[node] = l3; + cachep->node[node] = n; } return 0; @@ -3903,11 +3903,11 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) node--; while (node >= 0) { if (cachep->node[node]) { - l3 = cachep->node[node]; + n = cachep->node[node]; - kfree(l3->shared); - free_alien_cache(l3->alien); - kfree(l3); + kfree(n->shared); + free_alien_cache(n->alien); + kfree(n); cachep->node[node] = NULL; } node--; @@ -4071,11 +4071,11 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) } /* - * Drain an array if it contains any elements taking the l3 lock only if - * necessary. Note that the l3 listlock also protects the array_cache + * Drain an array if it contains any elements taking the node lock only if + * necessary. Note that the node listlock also protects the array_cache * if drain_array() is used on the shared array. */ -static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *l3, +static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n, struct array_cache *ac, int force, int node) { int tofree; @@ -4085,7 +4085,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *l3, if (ac->touched && !force) { ac->touched = 0; } else { - spin_lock_irq(&l3->list_lock); + spin_lock_irq(&n->list_lock); if (ac->avail) { tofree = force ? ac->avail : (ac->limit + 4) / 5; if (tofree > ac->avail) @@ -4095,7 +4095,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *l3, memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail); } - spin_unlock_irq(&l3->list_lock); + spin_unlock_irq(&n->list_lock); } } @@ -4114,7 +4114,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *l3, static void cache_reap(struct work_struct *w) { struct kmem_cache *searchp; - struct kmem_cache_node *l3; + struct kmem_cache_node *n; int node = numa_mem_id(); struct delayed_work *work = to_delayed_work(w); @@ -4126,33 +4126,33 @@ static void cache_reap(struct work_struct *w) check_irq_on(); /* - * We only take the l3 lock if absolutely necessary and we + * We only take the node lock if absolutely necessary and we * have established with reasonable certainty that * we can do some work if the lock was obtained. */ - l3 = searchp->node[node]; + n = searchp->node[node]; - reap_alien(searchp, l3); + reap_alien(searchp, n); - drain_array(searchp, l3, cpu_cache_get(searchp), 0, node); + drain_array(searchp, n, cpu_cache_get(searchp), 0, node); /* * These are racy checks but it does not matter * if we skip one check or scan twice. */ - if (time_after(l3->next_reap, jiffies)) + if (time_after(n->next_reap, jiffies)) goto next; - l3->next_reap = jiffies + REAPTIMEOUT_LIST3; + n->next_reap = jiffies + REAPTIMEOUT_LIST3; - drain_array(searchp, l3, l3->shared, 0, node); + drain_array(searchp, n, n->shared, 0, node); - if (l3->free_touched) - l3->free_touched = 0; + if (n->free_touched) + n->free_touched = 0; else { int freed; - freed = drain_freelist(searchp, l3, (l3->free_limit + + freed = drain_freelist(searchp, n, (n->free_limit + 5 * searchp->num - 1) / (5 * searchp->num)); STATS_ADD_REAPED(searchp, freed); } @@ -4178,25 +4178,25 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) const char *name; char *error = NULL; int node; - struct kmem_cache_node *l3; + struct kmem_cache_node *n; active_objs = 0; num_slabs = 0; for_each_online_node(node) { - l3 = cachep->node[node]; - if (!l3) + n = cachep->node[node]; + if (!n) continue; check_irq_on(); - spin_lock_irq(&l3->list_lock); + spin_lock_irq(&n->list_lock); - list_for_each_entry(slabp, &l3->slabs_full, list) { + list_for_each_entry(slabp, &n->slabs_full, list) { if (slabp->inuse != cachep->num && !error) error = "slabs_full accounting error"; active_objs += cachep->num; active_slabs++; } - list_for_each_entry(slabp, &l3->slabs_partial, list) { + list_for_each_entry(slabp, &n->slabs_partial, list) { if (slabp->inuse == cachep->num && !error) error = "slabs_partial inuse accounting error"; if (!slabp->inuse && !error) @@ -4204,16 +4204,16 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) active_objs += slabp->inuse; active_slabs++; } - list_for_each_entry(slabp, &l3->slabs_free, list) { + list_for_each_entry(slabp, &n->slabs_free, list) { if (slabp->inuse && !error) error = "slabs_free/inuse accounting error"; num_slabs++; } - free_objects += l3->free_objects; - if (l3->shared) - shared_avail += l3->shared->avail; + free_objects += n->free_objects; + if (n->shared) + shared_avail += n->shared->avail; - spin_unlock_irq(&l3->list_lock); + spin_unlock_irq(&n->list_lock); } num_slabs += active_slabs; num_objs = num_slabs * cachep->num; @@ -4239,7 +4239,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep) { #if STATS - { /* list3 stats */ + { /* node stats */ unsigned long high = cachep->high_mark; unsigned long allocs = cachep->num_allocations; unsigned long grown = cachep->grown; @@ -4392,7 +4392,7 @@ static int leaks_show(struct seq_file *m, void *p) { struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); struct slab *slabp; - struct kmem_cache_node *l3; + struct kmem_cache_node *n; const char *name; unsigned long *n = m->private; int node; @@ -4408,18 +4408,18 @@ static int leaks_show(struct seq_file *m, void *p) n[1] = 0; for_each_online_node(node) { - l3 = cachep->node[node]; - if (!l3) + n = cachep->node[node]; + if (!n) continue; check_irq_on(); - spin_lock_irq(&l3->list_lock); + spin_lock_irq(&n->list_lock); - list_for_each_entry(slabp, &l3->slabs_full, list) + list_for_each_entry(slabp, &n->slabs_full, list) handle_slab(n, cachep, slabp); - list_for_each_entry(slabp, &l3->slabs_partial, list) + list_for_each_entry(slabp, &n->slabs_partial, list) handle_slab(n, cachep, slabp); - spin_unlock_irq(&l3->list_lock); + spin_unlock_irq(&n->list_lock); } name = cachep->name; if (n[0] == n[1]) { diff --git a/mm/slab.h b/mm/slab.h index c01bc8921ac5..f0a552ff7b9b 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -16,7 +16,7 @@ enum slab_state { DOWN, /* No slab functionality yet */ PARTIAL, /* SLUB: kmem_cache_node available */ PARTIAL_ARRAYCACHE, /* SLAB: kmalloc size for arraycache available */ - PARTIAL_L3, /* SLAB: kmalloc size for l3 struct available */ + PARTIAL_NODE, /* SLAB: kmalloc size for node struct available */ UP, /* Slab caches usable but not all extras yet */ FULL /* Everything is working */ }; -- GitLab From ca34956b804b7554fc4e88826773380d9d5122a8 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 10 Jan 2013 19:14:19 +0000 Subject: [PATCH 0013/3163] slab: Common definition for kmem_cache_node Put the definitions for the kmem_cache_node structures together so that we have one structure. That will allow us to create more common fields in the future which could yield more opportunities to share code. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 11 ----------- mm/slab.c | 17 ----------------- mm/slab.h | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 28 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 16341e5316de..027276fa8713 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -53,17 +53,6 @@ struct kmem_cache_cpu { #endif }; -struct kmem_cache_node { - spinlock_t list_lock; /* Protect partial list and nr_partial */ - unsigned long nr_partial; - struct list_head partial; -#ifdef CONFIG_SLUB_DEBUG - atomic_long_t nr_slabs; - atomic_long_t total_objects; - struct list_head full; -#endif -}; - /* * Word size structure that can be atomically updated or read and that * contains both the order and the number of objects that a slab of the diff --git a/mm/slab.c b/mm/slab.c index c162b2eb493a..17f859614546 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -285,23 +285,6 @@ struct arraycache_init { void *entries[BOOT_CPUCACHE_ENTRIES]; }; -/* - * The slab lists for all objects. - */ -struct kmem_cache_node { - struct list_head slabs_partial; /* partial list first, better asm code */ - struct list_head slabs_full; - struct list_head slabs_free; - unsigned long free_objects; - unsigned int free_limit; - unsigned int colour_next; /* Per-node cache coloring */ - spinlock_t list_lock; - struct array_cache *shared; /* shared per node */ - struct array_cache **alien; /* on other nodes */ - unsigned long next_reap; /* updated without locking */ - int free_touched; /* updated without locking */ -}; - /* * Need this for bootstrapping a per node allocator. */ diff --git a/mm/slab.h b/mm/slab.h index f0a552ff7b9b..f96b49e4704e 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -239,3 +239,35 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) return s; } #endif + + +/* + * The slab lists for all objects. + */ +struct kmem_cache_node { + spinlock_t list_lock; + +#ifdef CONFIG_SLAB + struct list_head slabs_partial; /* partial list first, better asm code */ + struct list_head slabs_full; + struct list_head slabs_free; + unsigned long free_objects; + unsigned int free_limit; + unsigned int colour_next; /* Per-node cache coloring */ + struct array_cache *shared; /* shared per node */ + struct array_cache **alien; /* on other nodes */ + unsigned long next_reap; /* updated without locking */ + int free_touched; /* updated without locking */ +#endif + +#ifdef CONFIG_SLUB + unsigned long nr_partial; + struct list_head partial; +#ifdef CONFIG_SLUB_DEBUG + atomic_long_t nr_slabs; + atomic_long_t total_objects; + struct list_head full; +#endif +#endif + +}; -- GitLab From c601fd6956e92b0eb268d4af754073c76155b99d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Feb 2013 16:36:47 +0000 Subject: [PATCH 0014/3163] slab: Handle ARCH_DMA_MINALIGN correctly James Hogan hit boot problems in next-20130204 on Meta: META213-Thread0 DSP [LogF] kobject (4fc03980): tried to init an initialized object, something is seriously wrong. META213-Thread0 DSP [LogF] META213-Thread0 DSP [LogF] Call trace: META213-Thread0 DSP [LogF] [<4000888c>] _show_stack+0x68/0x7c META213-Thread0 DSP [LogF] [<400088b4>] _dump_stack+0x14/0x28 META213-Thread0 DSP [LogF] [<40103794>] _kobject_init+0x58/0x9c META213-Thread0 DSP [LogF] [<40103810>] _kobject_create+0x38/0x64 META213-Thread0 DSP [LogF] [<40103eac>] _kobject_create_and_add+0x14/0x8c META213-Thread0 DSP [LogF] [<40190ac4>] _mnt_init+0xd8/0x220 META213-Thread0 DSP [LogF] [<40190508>] _vfs_caches_init+0xb0/0x160 META213-Thread0 DSP [LogF] [<401851f4>] _start_kernel+0x274/0x340 META213-Thread0 DSP [LogF] [<40188424>] _metag_start_kernel+0x58/0x6c META213-Thread0 DSP [LogF] [<40000044>] __start+0x44/0x48 META213-Thread0 DSP [LogF] META213-Thread0 DSP [LogF] devtmpfs: initialized META213-Thread0 DSP [LogF] L2 Cache: Not present META213-Thread0 DSP [LogF] BUG: failure at fs/sysfs/dir.c:736/sysfs_read_ns_type()! META213-Thread0 DSP [LogF] Kernel panic - not syncing: BUG! META213-Thread0 DSP [Thread Exit] Thread has exited - return code = 4294967295 And bisected the problem to commit 95a05b4 ("slab: Common constants for kmalloc boundaries"). As it turns out, a fixed KMALLOC_SHIFT_LOW does not work for arches with higher alignment requirements. Determine KMALLOC_SHIFT_LOW from ARCH_DMA_MINALIGN instead. Reported-and-tested-by: James Hogan Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab.h | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index f2327a898a85..0c621752caa6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -133,6 +133,19 @@ void kfree(const void *); void kzfree(const void *); size_t ksize(const void *); +/* + * Some archs want to perform DMA into kmalloc caches and need a guaranteed + * alignment larger than the alignment of a 64-bit integer. + * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that. + */ +#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8 +#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN +#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN +#define KMALLOC_SHIFT_LOW ilog2(ARCH_DMA_MINALIGN) +#else +#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) +#endif + #ifdef CONFIG_SLOB /* * Common fields provided in kmem_cache by all slab allocators @@ -179,7 +192,9 @@ struct kmem_cache { #define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \ (MAX_ORDER + PAGE_SHIFT - 1) : 25) #define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH +#ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 5 +#endif #else /* * SLUB allocates up to order 2 pages directly and otherwise @@ -187,8 +202,10 @@ struct kmem_cache { */ #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) #define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT) +#ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 3 #endif +#endif /* Maximum allocatable size */ #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) @@ -200,9 +217,7 @@ struct kmem_cache { /* * Kmalloc subsystem. */ -#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8 -#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN -#else +#ifndef KMALLOC_MIN_SIZE #define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW) #endif @@ -289,17 +304,6 @@ static __always_inline int kmalloc_size(int n) } #endif /* !CONFIG_SLOB */ -/* - * Some archs want to perform DMA into kmalloc caches and need a guaranteed - * alignment larger than the alignment of a 64-bit integer. - * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that. - */ -#ifdef ARCH_DMA_MINALIGN -#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN -#else -#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) -#endif - /* * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. * Intended for arches that get misalignment faults even for 64 bit integer -- GitLab From db8450673458e724229e91050ac9a92253b01234 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Feb 2013 18:45:23 +0000 Subject: [PATCH 0015/3163] slab: Fixup CONFIG_PAGE_ALLOC/DEBUG_SLAB_LEAK sections Variables were not properly converted and the conversion caused a naming conflict. Signed-off-by: Tetsuo Handa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slab.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 17f859614546..910df6326831 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2306,8 +2306,9 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) if (size >= kmalloc_size(INDEX_NODE + 1) - && cachep->object_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) { - cachep->obj_offset += PAGE_SIZE - ALIGN(size, align); + && cachep->object_size > cache_line_size() + && ALIGN(size, cachep->align) < PAGE_SIZE) { + cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align); size = PAGE_SIZE; } #endif @@ -4377,7 +4378,7 @@ static int leaks_show(struct seq_file *m, void *p) struct slab *slabp; struct kmem_cache_node *n; const char *name; - unsigned long *n = m->private; + unsigned long *x = m->private; int node; int i; @@ -4388,7 +4389,7 @@ static int leaks_show(struct seq_file *m, void *p) /* OK, we can do it */ - n[1] = 0; + x[1] = 0; for_each_online_node(node) { n = cachep->node[node]; @@ -4399,32 +4400,32 @@ static int leaks_show(struct seq_file *m, void *p) spin_lock_irq(&n->list_lock); list_for_each_entry(slabp, &n->slabs_full, list) - handle_slab(n, cachep, slabp); + handle_slab(x, cachep, slabp); list_for_each_entry(slabp, &n->slabs_partial, list) - handle_slab(n, cachep, slabp); + handle_slab(x, cachep, slabp); spin_unlock_irq(&n->list_lock); } name = cachep->name; - if (n[0] == n[1]) { + if (x[0] == x[1]) { /* Increase the buffer size */ mutex_unlock(&slab_mutex); - m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL); + m->private = kzalloc(x[0] * 4 * sizeof(unsigned long), GFP_KERNEL); if (!m->private) { /* Too bad, we are really out */ - m->private = n; + m->private = x; mutex_lock(&slab_mutex); return -ENOMEM; } - *(unsigned long *)m->private = n[0] * 2; - kfree(n); + *(unsigned long *)m->private = x[0] * 2; + kfree(x); mutex_lock(&slab_mutex); /* Now make sure this entry will be retried */ m->count = m->size; return 0; } - for (i = 0; i < n[1]; i++) { - seq_printf(m, "%s: %lu ", name, n[2*i+3]); - show_symbol(m, n[2*i+2]); + for (i = 0; i < x[1]; i++) { + seq_printf(m, "%s: %lu ", name, x[2*i+3]); + show_symbol(m, x[2*i+2]); seq_putc(m, '\n'); } -- GitLab From b1e0541674904db00e17ce646b0a1d54f59136a5 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Mon, 4 Feb 2013 23:46:46 +0900 Subject: [PATCH 0016/3163] mm/sl[au]b: correct allocation type check in kmalloc_slab() commit "slab: Common Kmalloc cache determination" made mistake in kmalloc_slab(). SLAB_CACHE_DMA is for kmem_cache creation, not for allocation. For allocation, we should use GFP_XXX to identify type of allocation. So, change SLAB_CACHE_DMA to GFP_DMA. Acked-by: Christoph Lameter Reported-by: Fengguang Wu Signed-off-by: Joonsoo Kim Signed-off-by: Pekka Enberg --- mm/slab_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 6d73f0b7f21c..2f0e7d5976cb 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -382,7 +382,7 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) index = fls(size - 1); #ifdef CONFIG_ZONE_DMA - if (unlikely((flags & SLAB_CACHE_DMA))) + if (unlikely((flags & GFP_DMA))) return kmalloc_dma_caches[index]; #endif -- GitLab From 908a7bdd6adba3dfd35d8a74a48aed90593de178 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 18 Feb 2013 11:21:16 +0100 Subject: [PATCH 0017/3163] KVM: nVMX: Improve I/O exit handling This prevents trapping L2 I/O exits if L1 has neither unconditional nor bitmap-based exiting enabled. Furthermore, it implements I/O bitmap handling. Reviewed-by: Gleb Natapov Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 49 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6667042714cc..b4ce43c82748 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5908,6 +5908,52 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { static const int kvm_vmx_max_exit_handlers = ARRAY_SIZE(kvm_vmx_exit_handlers); +static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + unsigned long exit_qualification; + gpa_t bitmap, last_bitmap; + unsigned int port; + int size; + u8 b; + + if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING)) + return 1; + + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + return 0; + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + port = exit_qualification >> 16; + size = (exit_qualification & 7) + 1; + + last_bitmap = (gpa_t)-1; + b = -1; + + while (size > 0) { + if (port < 0x8000) + bitmap = vmcs12->io_bitmap_a; + else if (port < 0x10000) + bitmap = vmcs12->io_bitmap_b; + else + return 1; + bitmap += (port & 0x7fff) / 8; + + if (last_bitmap != bitmap) + if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1)) + return 1; + if (b & (1 << (port & 7))) + return 1; + + port++; + size--; + last_bitmap = bitmap; + } + + return 0; +} + /* * Return 1 if we should exit from L2 to L1 to handle an MSR access access, * rather than handle it ourselves in L0. I.e., check whether L1 expressed @@ -6097,8 +6143,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_DR_ACCESS: return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING); case EXIT_REASON_IO_INSTRUCTION: - /* TODO: support IO bitmaps */ - return 1; + return nested_vmx_exit_handled_io(vcpu, vmcs12); case EXIT_REASON_MSR_READ: case EXIT_REASON_MSR_WRITE: return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); -- GitLab From bd31a7f55777a46063ebe3d50e6536cfff51a757 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 14 Feb 2013 19:46:27 +0100 Subject: [PATCH 0018/3163] KVM: nVMX: Trap unconditionally if msr bitmap access fails This avoids basing decisions on uninitialized variables, potentially leaking kernel data to the L1 guest. Reviewed-by: Gleb Natapov Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b4ce43c82748..c1b3041a1f86 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5985,7 +5985,8 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, /* Then read the msr_index'th bit from this bitmap: */ if (msr_index < 1024*8) { unsigned char b; - kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1); + if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1)) + return 1; return 1 & (b >> (msr_index & 7)); } else return 1; /* let L1 handle the wrong parameter */ -- GitLab From fe1140cc369410a9c206fdb7aaabc644bd213dc2 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 23 Feb 2013 17:05:29 +0100 Subject: [PATCH 0019/3163] x86: kvmclock: Do not setup kvmclock vsyscall in the absence of that clock This fixes boot lockups with "no-kvmclock", when the host is not exposing this particular feature (QEMU: -cpu ...,-kvmclock) or when the kvmclock initialization failed for whatever reason. Reviewed-by: Marcelo Tosatti Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/kernel/kvmclock.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 5bedbdddf1f2..b730efad6fe9 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -160,8 +160,12 @@ int kvm_register_clock(char *txt) { int cpu = smp_processor_id(); int low, high, ret; - struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti; + struct pvclock_vcpu_time_info *src; + + if (!hv_clock) + return 0; + src = &hv_clock[cpu].pvti; low = (int)__pa(src) | 1; high = ((u64)__pa(src) >> 32); ret = native_write_msr_safe(msr_kvm_system_time, low, high); @@ -276,6 +280,9 @@ int __init kvm_setup_vsyscall_timeinfo(void) struct pvclock_vcpu_time_info *vcpu_time; unsigned int size; + if (!hv_clock) + return 0; + size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); preempt_disable(); -- GitLab From 733568f9cecc061eca213ba0e877a1f820a40de5 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 23 Feb 2013 15:07:47 +0100 Subject: [PATCH 0020/3163] KVM: VMX: Make prepare_vmcs12 and load_vmcs12_host_state static Both are only used locally. Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c1b3041a1f86..8a99a62d98a6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7330,7 +7330,7 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * exit-information fields only. Other fields are modified by L1 with VMWRITE, * which already writes to vmcs12 directly. */ -void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) +static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { /* update guest state fields: */ vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); @@ -7421,7 +7421,8 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * Failures During or After Loading Guest State"). * This function should be called when the active VMCS is L1's (vmcs01). */ -void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) +static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) { if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) vcpu->arch.efer = vmcs12->host_ia32_efer; -- GitLab From d6851fbeee3f79ac2629f823e15ac2a7f6f54e0e Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 23 Feb 2013 22:34:39 +0100 Subject: [PATCH 0021/3163] KVM: nVMX: Advertise PAUSE and WBINVD exiting support These exits have no preconditions, and we already process the corresponding reasons in nested_vmx_exit_handled correctly. Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8a99a62d98a6..329836af3240 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2080,6 +2080,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | + CPU_BASED_PAUSE_EXITING | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; /* * We can allow some features even when not supported by the @@ -2094,7 +2095,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high); nested_vmx_secondary_ctls_low = 0; nested_vmx_secondary_ctls_high &= - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_WBINVD_EXITING; } static inline bool vmx_control_verify(u32 control, u32 low, u32 high) -- GitLab From 36c3cc422b7c5d3cd84cbac769758b197e08f221 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 23 Feb 2013 22:35:37 +0100 Subject: [PATCH 0022/3163] KVM: nVMX: Clear segment cache after switching between L1 and L2 Switching the VMCS obviously invalidates what may have been cached about the guest segments. Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 329836af3240..3a58c1b8764e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7271,6 +7271,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) vcpu->cpu = cpu; put_cpu(); + vmx_segment_cache_clear(vmx); + vmcs12->launch_state = 1; prepare_vmcs02(vcpu, vmcs12); @@ -7517,6 +7519,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) vcpu->cpu = cpu; put_cpu(); + vmx_segment_cache_clear(vmx); + /* if no vmcs02 cache requested, remove the one we used */ if (VMCS02_POOL_SIZE == 0) nested_free_vmcs02(vmx, vmx->nested.current_vmptr); -- GitLab From 957c897e8cf5a26abbce1a1a38833251339d596d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 24 Feb 2013 14:11:34 +0100 Subject: [PATCH 0023/3163] KVM: nVMX: Use cached exit reason No need to re-read what vmx_vcpu_run already picked up for us. Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3a58c1b8764e..f7d2242e5d00 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6082,10 +6082,10 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, */ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) { - u32 exit_reason = vmcs_read32(VM_EXIT_REASON); u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + u32 exit_reason = vmx->exit_reason; if (vmx->nested.nested_run_pending) return 0; @@ -7399,7 +7399,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) /* update exit information fields: */ - vmcs12->vm_exit_reason = vmcs_read32(VM_EXIT_REASON); + vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason; vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); -- GitLab From 02cdb50fd7e4b5ce1f6f70e27f74283ced0e1872 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Wed, 27 Feb 2013 11:33:25 +0800 Subject: [PATCH 0024/3163] arch/x86/kvm: beautify source code for __u32 irq which is never < 0 irp->irq is __u32 which is never < 0. Signed-off-by: Chen Gang Signed-off-by: Gleb Natapov --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3c5bb6fe5280..d0cf7371a558 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2696,7 +2696,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS) + if (irq->irq >= KVM_NR_INTERRUPTS) return -EINVAL; if (irqchip_in_kernel(vcpu->kvm)) return -ENXIO; -- GitLab From 7d557b3cb69398d83ceabad9cf147c93a3aa97fd Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 22 Feb 2013 20:20:00 +0400 Subject: [PATCH 0025/3163] slub: correctly bootstrap boot caches After we create a boot cache, we may allocate from it until it is bootstraped. This will move the page from the partial list to the cpu slab list. If this happens, the loop: list_for_each_entry(p, &n->partial, lru) that we use to scan for all partial pages will yield nothing, and the pages will keep pointing to the boot cpu cache, which is of course, invalid. To do that, we should flush the cache to make sure that the cpu slab is back to the partial list. Signed-off-by: Glauber Costa Reported-by: Steffen Michalke Tested-by: KAMEZAWA Hiroyuki Acked-by: Christoph Lameter Cc: Andrew Morton Cc: Tejun Heo Signed-off-by: Pekka Enberg --- mm/slub.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index 6184b0821f7e..aa0728daf8bb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3552,6 +3552,12 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache) memcpy(s, static_cache, kmem_cache->object_size); + /* + * This runs very early, and only the boot processor is supposed to be + * up. Even if it weren't true, IRQs are not up so we couldn't fire + * IPIs around. + */ + __flush_cpu_slab(s, smp_processor_id()); for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = get_node(s, node); struct page *p; -- GitLab From 44ceb9d6653306f73fb40cbeca303b23937efd85 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 20 Feb 2013 14:02:23 +0100 Subject: [PATCH 0026/3163] KVM: nVMX: Avoid one redundant vmcs_read in prepare_vmcs12 IDT_VECTORING_INFO_FIELD was already read right after vmexit. Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f7d2242e5d00..238c59b73144 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7404,8 +7404,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); - vmcs12->idt_vectoring_info_field = - vmcs_read32(IDT_VECTORING_INFO_FIELD); + vmcs12->idt_vectoring_info_field = to_vmx(vcpu)->idt_vectoring_info; vmcs12->idt_vectoring_error_code = vmcs_read32(IDT_VECTORING_ERROR_CODE); vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); -- GitLab From 3ab66e8a455a4877889c65a848f2fb32be502f2c Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 20 Feb 2013 14:03:24 +0100 Subject: [PATCH 0027/3163] KVM: VMX: Pass vcpu to __vmx_complete_interrupts Cleanup: __vmx_complete_interrupts has no use for the vmx structure. Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 238c59b73144..7cc566b09ff2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6436,7 +6436,7 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); } -static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, +static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, u32 idt_vectoring_info, int instr_len_field, int error_code_field) @@ -6447,46 +6447,43 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; - vmx->vcpu.arch.nmi_injected = false; - kvm_clear_exception_queue(&vmx->vcpu); - kvm_clear_interrupt_queue(&vmx->vcpu); + vcpu->arch.nmi_injected = false; + kvm_clear_exception_queue(vcpu); + kvm_clear_interrupt_queue(vcpu); if (!idtv_info_valid) return; - kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu); + kvm_make_request(KVM_REQ_EVENT, vcpu); vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; switch (type) { case INTR_TYPE_NMI_INTR: - vmx->vcpu.arch.nmi_injected = true; + vcpu->arch.nmi_injected = true; /* * SDM 3: 27.7.1.2 (September 2008) * Clear bit "block by NMI" before VM entry if a NMI * delivery faulted. */ - vmx_set_nmi_mask(&vmx->vcpu, false); + vmx_set_nmi_mask(vcpu, false); break; case INTR_TYPE_SOFT_EXCEPTION: - vmx->vcpu.arch.event_exit_inst_len = - vmcs_read32(instr_len_field); + vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); /* fall through */ case INTR_TYPE_HARD_EXCEPTION: if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { u32 err = vmcs_read32(error_code_field); - kvm_queue_exception_e(&vmx->vcpu, vector, err); + kvm_queue_exception_e(vcpu, vector, err); } else - kvm_queue_exception(&vmx->vcpu, vector); + kvm_queue_exception(vcpu, vector); break; case INTR_TYPE_SOFT_INTR: - vmx->vcpu.arch.event_exit_inst_len = - vmcs_read32(instr_len_field); + vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); /* fall through */ case INTR_TYPE_EXT_INTR: - kvm_queue_interrupt(&vmx->vcpu, vector, - type == INTR_TYPE_SOFT_INTR); + kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR); break; default: break; @@ -6497,7 +6494,7 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) { if (is_guest_mode(&vmx->vcpu)) return; - __vmx_complete_interrupts(vmx, vmx->idt_vectoring_info, + __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, VM_EXIT_INSTRUCTION_LEN, IDT_VECTORING_ERROR_CODE); } @@ -6506,7 +6503,7 @@ static void vmx_cancel_injection(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) return; - __vmx_complete_interrupts(to_vmx(vcpu), + __vmx_complete_interrupts(vcpu, vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), VM_ENTRY_INSTRUCTION_LEN, VM_ENTRY_EXCEPTION_ERROR_CODE); -- GitLab From 165b6c2f33860e13d20cd7ac5993ea3acfb5ea34 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 15 Feb 2013 14:54:48 -0700 Subject: [PATCH 0028/3163] gpio/tegra: assume CONFIG_OF Tegra only supports, and always enables, device tree. Remove all ifdefs and runtime checks for DT support from the driver. Signed-off-by: Stephen Warren Signed-off-by: Grant Likely --- drivers/gpio/gpio-tegra.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index 414ad912232f..a78a81fbc2b0 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -398,10 +398,11 @@ static int tegra_gpio_probe(struct platform_device *pdev) int j; match = of_match_device(tegra_gpio_of_match, &pdev->dev); - if (match) - config = (struct tegra_gpio_soc_config *)match->data; - else - config = &tegra20_gpio_config; + if (!match) { + dev_err(&pdev->dev, "Error: No device match found\n"); + return -ENODEV; + } + config = (struct tegra_gpio_soc_config *)match->data; tegra_gpio_bank_stride = config->bank_stride; tegra_gpio_upper_offset = config->upper_offset; @@ -462,9 +463,7 @@ static int tegra_gpio_probe(struct platform_device *pdev) } } -#ifdef CONFIG_OF_GPIO tegra_gpio_chip.of_node = pdev->dev.of_node; -#endif gpiochip_add(&tegra_gpio_chip); -- GitLab From 462fce46065ec4b200c08619c047b9e5a8fd154a Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 27 Feb 2013 19:41:56 +0900 Subject: [PATCH 0029/3163] KVM: set_memory_region: Drop user_alloc from prepare/commit_memory_region() X86 does not use this any more. The remaining user, s390's !user_alloc check, can be simply removed since KVM_SET_MEMORY_REGION ioctl is no longer supported. Note: fixed powerpc's indentations with spaces to suppress checkpatch errors. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/arm/kvm/arm.c | 6 ++---- arch/ia64/kvm/kvm-ia64.c | 6 ++---- arch/powerpc/kvm/powerpc.c | 12 +++++------- arch/s390/kvm/kvm-s390.c | 9 ++------- arch/x86/kvm/x86.c | 6 ++---- include/linux/kvm_host.h | 6 ++---- virt/kvm/kvm_main.c | 4 ++-- 7 files changed, 17 insertions(+), 32 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 5a936988eb24..24cb5f66787d 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -231,16 +231,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm, int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { return 0; } void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + struct kvm_memory_slot old) { } diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index ad3126a58644..cbc5b0417dab 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1579,8 +1579,7 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { unsigned long i; unsigned long pfn; @@ -1610,8 +1609,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + struct kvm_memory_slot old) { return; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 934413cd3a1b..22b33159fbc4 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -411,18 +411,16 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) } int kvm_arch_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_memory_slot *memslot, + struct kvm_memory_slot old, + struct kvm_userspace_memory_region *mem) { return kvmppc_core_prepare_memory_region(kvm, memslot, mem); } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old) { kvmppc_core_commit_memory_region(kvm, mem, old); } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4cf35a0a79e7..07ac302ce246 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -975,8 +975,7 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { /* A few sanity checks. We can have exactly one memory slot which has to start at guest virtual zero and which has to be located at a @@ -997,16 +996,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (mem->memory_size & 0xffffful) return -EINVAL; - if (!user_alloc) - return -EINVAL; - return 0; } void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + struct kvm_memory_slot old) { int rc; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 811c5c9c8880..26216bb4403f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6907,8 +6907,7 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { int npages = memslot->npages; @@ -6938,8 +6937,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + struct kvm_memory_slot old) { int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cad77fe09d77..b4757a1cc4c4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -464,12 +464,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem, - bool user_alloc); + struct kvm_userspace_memory_region *mem); void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc); + struct kvm_memory_slot old); bool kvm_largepages_enabled(void); void kvm_disable_largepages(void); /* flush all memory translations */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index adc68feb5c5a..fd3037010e75 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -875,7 +875,7 @@ int __kvm_set_memory_region(struct kvm *kvm, slots = old_memslots; } - r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); + r = kvm_arch_prepare_memory_region(kvm, &new, old, mem); if (r) goto out_slots; @@ -915,7 +915,7 @@ int __kvm_set_memory_region(struct kvm *kvm, old_memslots = install_new_memslots(kvm, slots, &new); - kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); + kvm_arch_commit_memory_region(kvm, mem, old); kvm_free_physmem_slot(&old, &new); kfree(old_memslots); -- GitLab From 47ae31e257c548abdb199e0d26723139a9a967ba Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 27 Feb 2013 19:43:00 +0900 Subject: [PATCH 0030/3163] KVM: set_memory_region: Drop user_alloc from set_memory_region() Except ia64's stale code, KVM_SET_MEMORY_REGION support, this is only used for sanity checks in __kvm_set_memory_region() which can easily be changed to use slot id instead. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/ia64/kvm/kvm-ia64.c | 18 ------------------ arch/x86/kvm/vmx.c | 6 +++--- include/linux/kvm_host.h | 10 +++------- virt/kvm/kvm_main.c | 18 +++++++----------- 4 files changed, 13 insertions(+), 39 deletions(-) diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index cbc5b0417dab..43701f0c0f71 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -942,24 +942,6 @@ long kvm_arch_vm_ioctl(struct file *filp, int r = -ENOTTY; switch (ioctl) { - case KVM_SET_MEMORY_REGION: { - struct kvm_memory_region kvm_mem; - struct kvm_userspace_memory_region kvm_userspace_mem; - - r = -EFAULT; - if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) - goto out; - kvm_userspace_mem.slot = kvm_mem.slot; - kvm_userspace_mem.flags = kvm_mem.flags; - kvm_userspace_mem.guest_phys_addr = - kvm_mem.guest_phys_addr; - kvm_userspace_mem.memory_size = kvm_mem.memory_size; - r = kvm_vm_ioctl_set_memory_region(kvm, - &kvm_userspace_mem, false); - if (r) - goto out; - break; - } case KVM_CREATE_IRQCHIP: r = -EFAULT; r = kvm_ioapic_init(kvm); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7cc566b09ff2..58fb7c27e3b5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3694,7 +3694,7 @@ static int alloc_apic_access_page(struct kvm *kvm) kvm_userspace_mem.flags = 0; kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); if (r) goto out; @@ -3724,7 +3724,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) kvm_userspace_mem.guest_phys_addr = kvm->arch.ept_identity_map_addr; kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); if (r) goto out; @@ -4364,7 +4364,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) .flags = 0, }; - ret = kvm_set_memory_region(kvm, &tss_mem, false); + ret = kvm_set_memory_region(kvm, &tss_mem); if (ret) return ret; kvm->arch.tss_addr = addr; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b4757a1cc4c4..84a994c7a5c5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -453,11 +453,9 @@ id_to_memslot(struct kvm_memslots *slots, int id) } int kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc); + struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc); + struct kvm_userspace_memory_region *mem); void kvm_arch_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont); int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); @@ -553,9 +551,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct - kvm_userspace_memory_region *mem, - bool user_alloc); + struct kvm_userspace_memory_region *mem); int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level); long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index fd3037010e75..5b3e41b81f0d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -745,8 +745,7 @@ enum kvm_mr_change { * Must be called holding mmap_sem for write. */ int __kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { int r; gfn_t base_gfn; @@ -767,7 +766,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (mem->guest_phys_addr & (PAGE_SIZE - 1)) goto out; /* We can read the guest memory with __xxx_user() later on. */ - if (user_alloc && + if ((mem->slot < KVM_USER_MEM_SLOTS) && ((mem->userspace_addr & (PAGE_SIZE - 1)) || !access_ok(VERIFY_WRITE, (void __user *)(unsigned long)mem->userspace_addr, @@ -932,26 +931,23 @@ int __kvm_set_memory_region(struct kvm *kvm, EXPORT_SYMBOL_GPL(__kvm_set_memory_region); int kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { int r; mutex_lock(&kvm->slots_lock); - r = __kvm_set_memory_region(kvm, mem, user_alloc); + r = __kvm_set_memory_region(kvm, mem); mutex_unlock(&kvm->slots_lock); return r; } EXPORT_SYMBOL_GPL(kvm_set_memory_region); int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct - kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { if (mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; - return kvm_set_memory_region(kvm, mem, user_alloc); + return kvm_set_memory_region(kvm, mem); } int kvm_get_dirty_log(struct kvm *kvm, @@ -2198,7 +2194,7 @@ static long kvm_vm_ioctl(struct file *filp, sizeof kvm_userspace_mem)) goto out; - r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, true); + r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem); break; } case KVM_GET_DIRTY_LOG: { -- GitLab From 74d0727cb7aaaea48a6353209093be26abc8d160 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 27 Feb 2013 19:43:44 +0900 Subject: [PATCH 0031/3163] KVM: set_memory_region: Make kvm_mr_change available to arch code This will be used for cleaning up prepare/commit_memory_region() later. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 18 ++++++++++++++++++ virt/kvm/kvm_main.c | 18 ------------------ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 84a994c7a5c5..8eaf61f7b02d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -452,6 +452,24 @@ id_to_memslot(struct kvm_memslots *slots, int id) return slot; } +/* + * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: + * - create a new memory slot + * - delete an existing memory slot + * - modify an existing memory slot + * -- move it in the guest physical memory space + * -- just change its flags + * + * Since flags can be changed by some of these operations, the following + * differentiation is the best we can do for __kvm_set_memory_region(): + */ +enum kvm_mr_change { + KVM_MR_CREATE, + KVM_MR_DELETE, + KVM_MR_MOVE, + KVM_MR_FLAGS_ONLY, +}; + int kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5b3e41b81f0d..c7979ed41923 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -718,24 +718,6 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, return old_memslots; } -/* - * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: - * - create a new memory slot - * - delete an existing memory slot - * - modify an existing memory slot - * -- move it in the guest physical memory space - * -- just change its flags - * - * Since flags can be changed by some of these operations, the following - * differentiation is the best we can do for __kvm_set_memory_region(): - */ -enum kvm_mr_change { - KVM_MR_CREATE, - KVM_MR_DELETE, - KVM_MR_MOVE, - KVM_MR_FLAGS_ONLY, -}; - /* * Allocate some memory and give it an address in the guest physical address * space. -- GitLab From 7b6195a91d60909a2834ab7181e2b9476e6fe749 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 27 Feb 2013 19:44:34 +0900 Subject: [PATCH 0032/3163] KVM: set_memory_region: Refactor prepare_memory_region() This patch drops the parameter old, a copy of the old memory slot, and adds a new parameter named change to know the change being requested. This not only cleans up the code but also removes extra copying of the memory slot structure. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/arm/kvm/arm.c | 4 ++-- arch/ia64/kvm/kvm-ia64.c | 4 ++-- arch/powerpc/kvm/powerpc.c | 4 ++-- arch/s390/kvm/kvm-s390.c | 4 ++-- arch/x86/kvm/x86.c | 10 ++++------ include/linux/kvm_host.h | 4 ++-- virt/kvm/kvm_main.c | 2 +- 7 files changed, 15 insertions(+), 17 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 24cb5f66787d..96ebab7a1959 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -230,8 +230,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm, int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { return 0; } diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 43701f0c0f71..5c2b07e8c3d6 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1560,8 +1560,8 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { unsigned long i; unsigned long pfn; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 22b33159fbc4..8aa51cd67c28 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -412,8 +412,8 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { return kvmppc_core_prepare_memory_region(kvm, memslot, mem); } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 07ac302ce246..4288780c86b8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -974,8 +974,8 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { /* A few sanity checks. We can have exactly one memory slot which has to start at guest virtual zero and which has to be located at a diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 26216bb4403f..7198234fa088 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6906,23 +6906,21 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { - int npages = memslot->npages; - /* * Only private memory slots need to be mapped here since * KVM_SET_MEMORY_REGION ioctl is no longer supported. */ - if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) { + if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) { unsigned long userspace_addr; /* * MAP_SHARED to prevent internal slot pages from being moved * by fork()/COW. */ - userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE, + userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8eaf61f7b02d..caa72cf7e8e7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -479,8 +479,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem); + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change); void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c7979ed41923..8f85bae862c7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -856,7 +856,7 @@ int __kvm_set_memory_region(struct kvm *kvm, slots = old_memslots; } - r = kvm_arch_prepare_memory_region(kvm, &new, old, mem); + r = kvm_arch_prepare_memory_region(kvm, &new, mem, change); if (r) goto out_slots; -- GitLab From 8482644aea11e0647867732319ccf35879a9acc2 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 27 Feb 2013 19:45:25 +0900 Subject: [PATCH 0033/3163] KVM: set_memory_region: Refactor commit_memory_region() This patch makes the parameter old a const pointer to the old memory slot and adds a new parameter named change to know the change being requested: the former is for removing extra copying and the latter is for cleaning up the code. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/arm/kvm/arm.c | 3 ++- arch/ia64/kvm/kvm-ia64.c | 3 ++- arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kvm/book3s_hv.c | 4 ++-- arch/powerpc/kvm/book3s_pr.c | 2 +- arch/powerpc/kvm/booke.c | 2 +- arch/powerpc/kvm/powerpc.c | 3 ++- arch/s390/kvm/kvm-s390.c | 3 ++- arch/x86/kvm/x86.c | 15 ++++++++------- include/linux/kvm_host.h | 3 ++- virt/kvm/kvm_main.c | 2 +- 11 files changed, 24 insertions(+), 18 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 96ebab7a1959..b32dc446e802 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -238,7 +238,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { } diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 5c2b07e8c3d6..7a54455dde39 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1591,7 +1591,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { return; } diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 44a657adf416..44fa9ad1d62c 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -152,7 +152,7 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old); + const struct kvm_memory_slot *old); extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info); extern void kvmppc_core_flush_memslot(struct kvm *kvm, diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 80dcc53a1aba..1e521baf9a7d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1639,12 +1639,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old) { unsigned long npages = mem->memory_size >> PAGE_SHIFT; struct kvm_memory_slot *memslot; - if (npages && old.npages) { + if (npages && old->npages) { /* * If modifying a memslot, reset all the rmap dirty bits. * If this is a new memslot, we don't need to do anything diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 5e93438afb06..286e23e6b92d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1283,7 +1283,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old) { } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 020923e43134..eb88fa621073 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1531,7 +1531,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old) { } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8aa51cd67c28..7b5d4d20cdc5 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -420,7 +420,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { kvmppc_core_commit_memory_region(kvm, mem, old); } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4288780c86b8..6cae4ad647a9 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1001,7 +1001,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { int rc; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7198234fa088..35b491229c3a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6935,16 +6935,17 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { - int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; + int nr_mmu_pages = 0; - if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) { + if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) { int ret; - ret = vm_munmap(old.userspace_addr, - old.npages * PAGE_SIZE); + ret = vm_munmap(old->userspace_addr, + old->npages * PAGE_SIZE); if (ret < 0) printk(KERN_WARNING "kvm_vm_ioctl_set_memory_region: " @@ -6961,13 +6962,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * Existing largepage mappings are destroyed here and new ones will * not be created until the end of the logging. */ - if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) + if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) kvm_mmu_slot_remove_write_access(kvm, mem->slot); /* * If memory slot is created, or moved, we need to clear all * mmio sptes. */ - if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) { + if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { kvm_mmu_zap_all(kvm); kvm_reload_remote_mmus(kvm); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index caa72cf7e8e7..ac584cc53581 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -483,7 +483,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, enum kvm_mr_change change); void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old); + const struct kvm_memory_slot *old, + enum kvm_mr_change change); bool kvm_largepages_enabled(void); void kvm_disable_largepages(void); /* flush all memory translations */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8f85bae862c7..0e919a1d4d56 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -896,7 +896,7 @@ int __kvm_set_memory_region(struct kvm *kvm, old_memslots = install_new_memslots(kvm, slots, &new); - kvm_arch_commit_memory_region(kvm, mem, old); + kvm_arch_commit_memory_region(kvm, mem, &old, change); kvm_free_physmem_slot(&old, &new); kfree(old_memslots); -- GitLab From 16014753b10b76385600cd59450a70b8650c72cb Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 27 Feb 2013 19:46:27 +0900 Subject: [PATCH 0034/3163] KVM: ARM: Remove kvm_arch_set_memory_region() This was replaced with prepare/commit long before: commit f7784b8ec9b6a041fa828cfbe9012fe51933f5ac KVM: split kvm_arch_set_memory_region into prepare and commit Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/arm/kvm/arm.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index b32dc446e802..e4ad0bb01843 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -220,14 +220,6 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -int kvm_arch_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - int user_alloc) -{ - return 0; -} - int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, -- GitLab From 503cd0c50ac7161eb5c3891b48b620cb0a5521cd Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 3 Mar 2013 13:05:44 +0100 Subject: [PATCH 0035/3163] KVM: nVMX: Fix switching of debug state First of all, do not blindly overwrite GUEST_DR7 on L2 entry. The host may have guest debugging enabled. Then properly reset DR7 and DEBUG_CTL on L2->L1 switch as specified in the SDM. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 58fb7c27e3b5..097f5d662275 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6978,7 +6978,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_interruptibility_info); vmcs_write32(GUEST_ACTIVITY_STATE, vmcs12->guest_activity_state); vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); - vmcs_writel(GUEST_DR7, vmcs12->guest_dr7); + kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags); vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, vmcs12->guest_pending_dbg_exceptions); @@ -7492,6 +7492,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, vmcs12->host_ia32_perf_global_ctrl); + + kvm_set_dr(vcpu, 7, 0x400); + vmcs_write64(GUEST_IA32_DEBUGCTL, 0); } /* -- GitLab From 07e169335ff0570c6e67b5ccf74d793f00ab0834 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 28 Feb 2013 12:33:16 +0100 Subject: [PATCH 0036/3163] virtio_ccw: pass a cookie value to kvm hypercall Lookups by channel/vq pair on host during virtio notifications might be expensive. Interpret hypercall return value as a cookie which host can use to do device lookups for the next notification more efficiently. [CH: Fix line > 80 chars] Tested-by: Christian Borntraeger Reviewed-by: Christian Borntraeger Signed-off-by: Michael S. Tsirkin Signed-off-by: Cornelia Huck Signed-off-by: Marcelo Tosatti --- drivers/s390/kvm/virtio_ccw.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index 2029b6caa595..3d657522b3e2 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c @@ -77,6 +77,7 @@ struct virtio_ccw_vq_info { void *queue; struct vq_info_block *info_block; struct list_head node; + long cookie; }; #define KVM_VIRTIO_CCW_RING_ALIGN 4096 @@ -145,15 +146,18 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev, } static inline long do_kvm_notify(struct subchannel_id schid, - unsigned long queue_index) + unsigned long queue_index, + long cookie) { register unsigned long __nr asm("1") = KVM_S390_VIRTIO_CCW_NOTIFY; register struct subchannel_id __schid asm("2") = schid; register unsigned long __index asm("3") = queue_index; register long __rc asm("2"); + register long __cookie asm("4") = cookie; asm volatile ("diag 2,4,0x500\n" - : "=d" (__rc) : "d" (__nr), "d" (__schid), "d" (__index) + : "=d" (__rc) : "d" (__nr), "d" (__schid), "d" (__index), + "d"(__cookie) : "memory", "cc"); return __rc; } @@ -166,7 +170,8 @@ static void virtio_ccw_kvm_notify(struct virtqueue *vq) vcdev = to_vc_device(info->vq->vdev); ccw_device_get_schid(vcdev->cdev, &schid); - do_kvm_notify(schid, virtqueue_get_queue_index(vq)); + info->cookie = do_kvm_notify(schid, virtqueue_get_queue_index(vq), + info->cookie); } static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev, -- GitLab From 6a773cb825afb74a600a08fe87fab55ee98ec2ac Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 28 Feb 2013 12:33:17 +0100 Subject: [PATCH 0037/3163] KVM: s390: Export virtio-ccw api. Export the virtio-ccw api in a header for usage by other code. Signed-off-by: Cornelia Huck Signed-off-by: Marcelo Tosatti --- arch/s390/include/uapi/asm/Kbuild | 1 + arch/s390/include/uapi/asm/virtio-ccw.h | 21 +++++++++++++++++++++ drivers/s390/kvm/virtio_ccw.c | 5 +---- 3 files changed, 23 insertions(+), 4 deletions(-) create mode 100644 arch/s390/include/uapi/asm/virtio-ccw.h diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 7bf68fff7c5d..9ccd1905bdad 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -44,5 +44,6 @@ header-y += termios.h header-y += types.h header-y += ucontext.h header-y += unistd.h +header-y += virtio-ccw.h header-y += vtoc.h header-y += zcrypt.h diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h new file mode 100644 index 000000000000..a9a4ebf79fa7 --- /dev/null +++ b/arch/s390/include/uapi/asm/virtio-ccw.h @@ -0,0 +1,21 @@ +/* + * Definitions for virtio-ccw devices. + * + * Copyright IBM Corp. 2013 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Cornelia Huck + */ +#ifndef __KVM_VIRTIO_CCW_H +#define __KVM_VIRTIO_CCW_H + +/* Alignment of vring buffers. */ +#define KVM_VIRTIO_CCW_RING_ALIGN 4096 + +/* Subcode for diagnose 500 (virtio hypercall). */ +#define KVM_S390_VIRTIO_CCW_NOTIFY 3 + +#endif diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index 3d657522b3e2..42d507c4e06b 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c @@ -31,6 +31,7 @@ #include #include #include +#include /* * virtio related functions @@ -80,10 +81,6 @@ struct virtio_ccw_vq_info { long cookie; }; -#define KVM_VIRTIO_CCW_RING_ALIGN 4096 - -#define KVM_S390_VIRTIO_CCW_NOTIFY 3 - #define CCW_CMD_SET_VQ 0x13 #define CCW_CMD_VDEV_RESET 0x33 #define CCW_CMD_SET_IND 0x43 -- GitLab From a0f155e9646d5f1c263f6f9aae880151100243bb Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 28 Feb 2013 12:33:18 +0100 Subject: [PATCH 0038/3163] KVM: Initialize irqfd from kvm_init(). Currently, eventfd introduces module_init/module_exit functions to initialize/cleanup the irqfd workqueue. This only works, however, if no other module_init/module_exit functions are built into the same module. Let's just move the initialization and cleanup to kvm_init and kvm_exit. This way, it is also clearer where kvm startup may fail. Signed-off-by: Cornelia Huck Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 13 +++++++++++++ virt/kvm/eventfd.c | 7 ++----- virt/kvm/kvm_main.c | 6 ++++++ 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ac584cc53581..d50fe173028b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -424,6 +424,19 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); int __must_check vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); +#ifdef __KVM_HAVE_IOAPIC +int kvm_irqfd_init(void); +void kvm_irqfd_exit(void); +#else +static inline int kvm_irqfd_init(void) +{ + return 0; +} + +static inline void kvm_irqfd_exit(void) +{ +} +#endif int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, struct module *module); void kvm_exit(void); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index adb17f266b28..0b6fe69bb03d 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -543,7 +543,7 @@ void kvm_irq_routing_update(struct kvm *kvm, * aggregated from all vm* instances. We need our own isolated single-thread * queue to prevent deadlock against flushing the normal work-queue. */ -static int __init irqfd_module_init(void) +int kvm_irqfd_init(void) { irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); if (!irqfd_cleanup_wq) @@ -552,13 +552,10 @@ static int __init irqfd_module_init(void) return 0; } -static void __exit irqfd_module_exit(void) +void kvm_irqfd_exit(void) { destroy_workqueue(irqfd_cleanup_wq); } - -module_init(irqfd_module_init); -module_exit(irqfd_module_exit); #endif /* diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0e919a1d4d56..faf05bddd131 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2898,6 +2898,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, int r; int cpu; + r = kvm_irqfd_init(); + if (r) + goto out_irqfd; r = kvm_arch_init(opaque); if (r) goto out_fail; @@ -2978,6 +2981,8 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, out_free_0: kvm_arch_exit(); out_fail: + kvm_irqfd_exit(); +out_irqfd: return r; } EXPORT_SYMBOL_GPL(kvm_init); @@ -2994,6 +2999,7 @@ void kvm_exit(void) on_each_cpu(hardware_disable_nolock, NULL, 1); kvm_arch_hardware_unsetup(); kvm_arch_exit(); + kvm_irqfd_exit(); free_cpumask_var(cpus_hardware_enabled); } EXPORT_SYMBOL_GPL(kvm_exit); -- GitLab From 060f0ce6ff975decd1e0ee318c08e228bccbee1e Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 28 Feb 2013 12:33:19 +0100 Subject: [PATCH 0039/3163] KVM: Introduce KVM_VIRTIO_CCW_NOTIFY_BUS. Add a new bus type for virtio-ccw devices on s390. Signed-off-by: Cornelia Huck Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d50fe173028b..9fa13ebc3381 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -149,6 +149,7 @@ struct kvm_io_bus { enum kvm_bus { KVM_MMIO_BUS, KVM_PIO_BUS, + KVM_VIRTIO_CCW_NOTIFY_BUS, KVM_NR_BUSES }; -- GitLab From 2b83451b45d720ca38c03878ce42ff9139cad9e3 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 28 Feb 2013 12:33:20 +0100 Subject: [PATCH 0040/3163] KVM: ioeventfd for virtio-ccw devices. Enhance KVM_IOEVENTFD with a new flag that allows to attach to virtio-ccw devices on s390 via the KVM_VIRTIO_CCW_NOTIFY_BUS. Signed-off-by: Cornelia Huck Signed-off-by: Marcelo Tosatti --- Documentation/virtual/kvm/api.txt | 8 ++++++++ include/uapi/linux/kvm.h | 3 +++ virt/kvm/eventfd.c | 17 +++++++++++++---- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 119358dfb742..c16b442556e8 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1486,15 +1486,23 @@ struct kvm_ioeventfd { __u8 pad[36]; }; +For the special case of virtio-ccw devices on s390, the ioevent is matched +to a subchannel/virtqueue tuple instead. + The following flags are defined: #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) #define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) +#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \ + (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify) If datamatch flag is set, the event will be signaled only if the written value to the registered address is equal to datamatch in struct kvm_ioeventfd. +For virtio-ccw devices, addr contains the subchannel id and datamatch the +virtqueue index. + 4.60 KVM_DIRTY_TLB diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 3c56ba3d80c1..74d0ff3dfd66 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -449,12 +449,15 @@ enum { kvm_ioeventfd_flag_nr_datamatch, kvm_ioeventfd_flag_nr_pio, kvm_ioeventfd_flag_nr_deassign, + kvm_ioeventfd_flag_nr_virtio_ccw_notify, kvm_ioeventfd_flag_nr_max, }; #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) #define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) +#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \ + (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify) #define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 0b6fe69bb03d..020522ed9094 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -674,15 +674,24 @@ ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) return false; } +static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) +{ + if (flags & KVM_IOEVENTFD_FLAG_PIO) + return KVM_PIO_BUS; + if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY) + return KVM_VIRTIO_CCW_NOTIFY_BUS; + return KVM_MMIO_BUS; +} + static int kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { - int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; - enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; + enum kvm_bus bus_idx; struct _ioeventfd *p; struct eventfd_ctx *eventfd; int ret; + bus_idx = ioeventfd_bus_from_flags(args->flags); /* must be natural-word sized */ switch (args->len) { case 1: @@ -757,12 +766,12 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { - int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; - enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; + enum kvm_bus bus_idx; struct _ioeventfd *p, *tmp; struct eventfd_ctx *eventfd; int ret = -ENOENT; + bus_idx = ioeventfd_bus_from_flags(args->flags); eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd); -- GitLab From 10ccaa1e7057d8a9dc3e9ce833af40ec8187b25e Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 28 Feb 2013 12:33:21 +0100 Subject: [PATCH 0041/3163] KVM: s390: Wire up ioeventfd. Enable ioeventfd support on s390 and hook up diagnose 500 virtio-ccw notifications. Signed-off-by: Cornelia Huck Signed-off-by: Marcelo Tosatti --- arch/s390/kvm/Kconfig | 1 + arch/s390/kvm/Makefile | 2 +- arch/s390/kvm/diag.c | 26 ++++++++++++++++++++++++++ arch/s390/kvm/kvm-s390.c | 1 + 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 60f9f8ae0fc8..70b46eacf8e1 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -22,6 +22,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT + select HAVE_KVM_EVENTFD ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 3975722bb19d..8fe9d65a4585 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License (version 2 only) # as published by the Free Software Foundation. -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o) ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index a390687feb13..1c01a9912989 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -13,6 +13,7 @@ #include #include +#include #include "kvm-s390.h" #include "trace.h" #include "trace-s390.h" @@ -104,6 +105,29 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) return -EREMOTE; } +static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) +{ + int ret, idx; + + /* No virtio-ccw notification? Get out quickly. */ + if (!vcpu->kvm->arch.css_support || + (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) + return -EOPNOTSUPP; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + /* + * The layout is as follows: + * - gpr 2 contains the subchannel id (passed as addr) + * - gpr 3 contains the virtqueue index (passed as datamatch) + */ + ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, + vcpu->run->s.regs.gprs[2], + 8, &vcpu->run->s.regs.gprs[3]); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */ + return ret < 0 ? ret : 0; +} + int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) { int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; @@ -118,6 +142,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) return __diag_time_slice_end_directed(vcpu); case 0x308: return __diag_ipl_functions(vcpu); + case 0x500: + return __diag_virtio_hypercall(vcpu); default: return -EOPNOTSUPP; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6cae4ad647a9..33161b4a8280 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -142,6 +142,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: case KVM_CAP_S390_CSS_SUPPORT: + case KVM_CAP_IOEVENTFD: r = 1; break; case KVM_CAP_NR_VCPUS: -- GitLab From c4627c72e9c9e0fc35af2e9d612888fe4564377d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 3 Mar 2013 20:47:11 +0100 Subject: [PATCH 0042/3163] KVM: nVMX: Reset RFLAGS on VM-exit Ouch, how could this work so well that far? We need to clear RFLAGS to the reset value as specified by the SDM. Particularly, IF must be off after VM-exit! Reviewed-by: Gleb Natapov Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 097f5d662275..aacf6a458ae1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7434,6 +7434,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); + vmx_set_rflags(vcpu, X86_EFLAGS_BIT1); /* * Note that calling vmx_set_cr0 is important, even if cr0 hasn't * actually changed, because it depends on the current state of -- GitLab From ede4d7a5b9835510fd1f724367f68d2fa4128453 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 1 Mar 2013 11:22:47 -0600 Subject: [PATCH 0043/3163] gpio/omap: convert gpio irq domain to linear mapping Currently the OMAP GPIO driver uses a legacy mapping for the GPIO IRQ domain. This is not necessary because we do not need to assign a specific interrupt number to the GPIO IRQ domain. Therefore, convert the OMAP GPIO driver to use a linear mapping instead. Please note that this also allows to simplify the logic in the OMAP gpio_irq_handler() routine, by using irq_find_mapping() to obtain the virtual irq number from the GPIO bank and bank index. Reported-by: Linus Walleij Signed-off-by: Jon Hunter Reviewed-by: Felipe Balbi Acked-by: Santosh Shilimkar Acked-by: Kevin Hilman Tested-by: Javier Martinez Canillas Signed-off-by: Grant Likely --- drivers/gpio/gpio-omap.c | 72 +++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 41 deletions(-) diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 159f5c57eb45..c3598d143aa9 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -53,7 +53,6 @@ struct gpio_bank { struct list_head node; void __iomem *base; u16 irq; - int irq_base; struct irq_domain *domain; u32 non_wakeup_gpios; u32 enabled_non_wakeup_gpios; @@ -89,7 +88,14 @@ struct gpio_bank { static int irq_to_gpio(struct gpio_bank *bank, unsigned int gpio_irq) { - return gpio_irq - bank->irq_base + bank->chip.base; + return bank->chip.base + gpio_irq; +} + +static int omap_gpio_to_irq(struct gpio_chip *chip, unsigned offset) +{ + struct gpio_bank *bank = container_of(chip, struct gpio_bank, chip); + + return irq_find_mapping(bank->domain, offset); } static void _set_gpio_direction(struct gpio_bank *bank, int gpio, int is_input) @@ -427,7 +433,7 @@ static int gpio_irq_type(struct irq_data *d, unsigned type) #endif if (!gpio) - gpio = irq_to_gpio(bank, d->irq); + gpio = irq_to_gpio(bank, d->hwirq); if (type & ~IRQ_TYPE_SENSE_MASK) return -EINVAL; @@ -580,7 +586,7 @@ static void _reset_gpio(struct gpio_bank *bank, int gpio) static int gpio_wake_enable(struct irq_data *d, unsigned int enable) { struct gpio_bank *bank = irq_data_get_irq_chip_data(d); - unsigned int gpio = irq_to_gpio(bank, d->irq); + unsigned int gpio = irq_to_gpio(bank, d->hwirq); return _set_gpio_wakeup(bank, gpio, enable); } @@ -680,7 +686,7 @@ static void gpio_irq_handler(unsigned int irq, struct irq_desc *desc) { void __iomem *isr_reg = NULL; u32 isr; - unsigned int gpio_irq, gpio_index; + unsigned int i; struct gpio_bank *bank; int unmasked = 0; struct irq_chip *chip = irq_desc_get_chip(desc); @@ -721,15 +727,10 @@ static void gpio_irq_handler(unsigned int irq, struct irq_desc *desc) if (!isr) break; - gpio_irq = bank->irq_base; - for (; isr != 0; isr >>= 1, gpio_irq++) { - int gpio = irq_to_gpio(bank, gpio_irq); - + for (i = 0; isr != 0; isr >>= 1, i++) { if (!(isr & 1)) continue; - gpio_index = GPIO_INDEX(bank, gpio); - /* * Some chips can't respond to both rising and falling * at the same time. If this irq was requested with @@ -737,10 +738,10 @@ static void gpio_irq_handler(unsigned int irq, struct irq_desc *desc) * to respond to the IRQ for the opposite direction. * This will be indicated in the bank toggle_mask. */ - if (bank->toggle_mask & (1 << gpio_index)) - _toggle_gpio_edge_triggering(bank, gpio_index); + if (bank->toggle_mask & (1 << i)) + _toggle_gpio_edge_triggering(bank, i); - generic_handle_irq(gpio_irq); + generic_handle_irq(irq_find_mapping(bank->domain, i)); } } /* if bank has any level sensitive GPIO pin interrupt @@ -756,7 +757,7 @@ static void gpio_irq_handler(unsigned int irq, struct irq_desc *desc) static void gpio_irq_shutdown(struct irq_data *d) { struct gpio_bank *bank = irq_data_get_irq_chip_data(d); - unsigned int gpio = irq_to_gpio(bank, d->irq); + unsigned int gpio = irq_to_gpio(bank, d->hwirq); unsigned long flags; spin_lock_irqsave(&bank->lock, flags); @@ -767,7 +768,7 @@ static void gpio_irq_shutdown(struct irq_data *d) static void gpio_ack_irq(struct irq_data *d) { struct gpio_bank *bank = irq_data_get_irq_chip_data(d); - unsigned int gpio = irq_to_gpio(bank, d->irq); + unsigned int gpio = irq_to_gpio(bank, d->hwirq); _clear_gpio_irqstatus(bank, gpio); } @@ -775,7 +776,7 @@ static void gpio_ack_irq(struct irq_data *d) static void gpio_mask_irq(struct irq_data *d) { struct gpio_bank *bank = irq_data_get_irq_chip_data(d); - unsigned int gpio = irq_to_gpio(bank, d->irq); + unsigned int gpio = irq_to_gpio(bank, d->hwirq); unsigned long flags; spin_lock_irqsave(&bank->lock, flags); @@ -787,7 +788,7 @@ static void gpio_mask_irq(struct irq_data *d) static void gpio_unmask_irq(struct irq_data *d) { struct gpio_bank *bank = irq_data_get_irq_chip_data(d); - unsigned int gpio = irq_to_gpio(bank, d->irq); + unsigned int gpio = irq_to_gpio(bank, d->hwirq); unsigned int irq_mask = GPIO_BIT(bank, gpio); u32 trigger = irqd_get_trigger_type(d); unsigned long flags; @@ -953,14 +954,6 @@ static void gpio_set(struct gpio_chip *chip, unsigned offset, int value) spin_unlock_irqrestore(&bank->lock, flags); } -static int gpio_2irq(struct gpio_chip *chip, unsigned offset) -{ - struct gpio_bank *bank; - - bank = container_of(chip, struct gpio_bank, chip); - return bank->irq_base + offset; -} - /*---------------------------------------------------------------------*/ static void __init omap_gpio_show_rev(struct gpio_bank *bank) @@ -1057,7 +1050,7 @@ static void omap_gpio_chip_init(struct gpio_bank *bank) bank->chip.direction_output = gpio_output; bank->chip.set_debounce = gpio_debounce; bank->chip.set = gpio_set; - bank->chip.to_irq = gpio_2irq; + bank->chip.to_irq = omap_gpio_to_irq; if (bank->is_mpuio) { bank->chip.label = "mpuio"; if (bank->regs->wkup_en) @@ -1072,15 +1065,16 @@ static void omap_gpio_chip_init(struct gpio_bank *bank) gpiochip_add(&bank->chip); - for (j = bank->irq_base; j < bank->irq_base + bank->width; j++) { - irq_set_lockdep_class(j, &gpio_lock_class); - irq_set_chip_data(j, bank); + for (j = 0; j < bank->width; j++) { + int irq = irq_create_mapping(bank->domain, j); + irq_set_lockdep_class(irq, &gpio_lock_class); + irq_set_chip_data(irq, bank); if (bank->is_mpuio) { - omap_mpuio_alloc_gc(bank, j, bank->width); + omap_mpuio_alloc_gc(bank, irq, bank->width); } else { - irq_set_chip(j, &gpio_irq_chip); - irq_set_handler(j, handle_simple_irq); - set_irq_flags(j, IRQF_VALID); + irq_set_chip_and_handler(irq, &gpio_irq_chip, + handle_simple_irq); + set_irq_flags(irq, IRQF_VALID); } } irq_set_chained_handler(bank->irq, gpio_irq_handler); @@ -1130,14 +1124,10 @@ static int omap_gpio_probe(struct platform_device *pdev) bank->chip.of_node = of_node_get(node); #endif - bank->irq_base = irq_alloc_descs(-1, 0, bank->width, 0); - if (bank->irq_base < 0) { - dev_err(dev, "Couldn't allocate IRQ numbers\n"); + bank->domain = irq_domain_add_linear(node, bank->width, + &irq_domain_simple_ops, NULL); + if (!bank->domain) return -ENODEV; - } - - bank->domain = irq_domain_add_legacy(node, bank->width, bank->irq_base, - 0, &irq_domain_simple_ops, NULL); if (bank->regs->set_dataout && bank->regs->clr_dataout) bank->set_dataout = _set_gpio_dataout_reg; -- GitLab From 8d4c277e185c31359cf70573d8b0351fb7dd0dfe Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 1 Mar 2013 11:22:48 -0600 Subject: [PATCH 0044/3163] gpio/omap: warn if bank is not enabled on setting irq type For OMAP devices, if a gpio is being used as an interrupt source but has not been requested by calling gpio_request(), a call to request_irq() may cause the kernel hang because the gpio bank may be disabled and hence the register access will fail. To prevent such hangs, test for this case and warn if this is detected. Signed-off-by: Jon Hunter Reviewed-by: Felipe Balbi Acked-by: Santosh Shilimkar Acked-by: Kevin Hilman Tested-by: Javier Martinez Canillas Signed-off-by: Grant Likely --- drivers/gpio/gpio-omap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index c3598d143aa9..0d30c7acf0c7 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -427,6 +427,9 @@ static int gpio_irq_type(struct irq_data *d, unsigned type) int retval; unsigned long flags; + if (WARN_ON(!bank->mod_usage)) + return -EINVAL; + #ifdef CONFIG_ARCH_OMAP1 if (d->irq > IH_MPUIO_BASE) gpio = OMAP_MPUIO(d->irq - IH_MPUIO_BASE); -- GitLab From 753c5983ddd38022a680a36f5d66b23b185c9b62 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Feb 2013 22:26:23 +0900 Subject: [PATCH 0045/3163] gpio/em: Add Device Tree support Update the Emma Mobile GPIO driver to add DT support. The patch simply adds a two-cell xlate function and updates the probe code to allow configuration via DT using the "ngpios" property plus OF id in the same style as gpio-mvebu.c. The code is also adjusted to use postcore_initcall() to force early setup. Signed-off-by: Magnus Damm Signed-off-by: Grant Likely --- drivers/gpio/gpio-em.c | 45 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-em.c b/drivers/gpio/gpio-em.c index deca78f99316..d05369735857 100644 --- a/drivers/gpio/gpio-em.c +++ b/drivers/gpio/gpio-em.c @@ -231,10 +231,12 @@ static int em_gio_irq_domain_map(struct irq_domain *h, unsigned int virq, static struct irq_domain_ops em_gio_irq_domain_ops = { .map = em_gio_irq_domain_map, + .xlate = irq_domain_xlate_twocell, }; static int em_gio_probe(struct platform_device *pdev) { + struct gpio_em_config pdata_dt; struct gpio_em_config *pdata = pdev->dev.platform_data; struct em_gio_priv *p; struct resource *io[2], *irq[2]; @@ -259,8 +261,8 @@ static int em_gio_probe(struct platform_device *pdev) irq[0] = platform_get_resource(pdev, IORESOURCE_IRQ, 0); irq[1] = platform_get_resource(pdev, IORESOURCE_IRQ, 1); - if (!io[0] || !io[1] || !irq[0] || !irq[1] || !pdata) { - dev_err(&pdev->dev, "missing IRQ, IOMEM or configuration\n"); + if (!io[0] || !io[1] || !irq[0] || !irq[1]) { + dev_err(&pdev->dev, "missing IRQ or IOMEM\n"); ret = -EINVAL; goto err1; } @@ -279,6 +281,25 @@ static int em_gio_probe(struct platform_device *pdev) goto err2; } + if (!pdata) { + memset(&pdata_dt, 0, sizeof(pdata_dt)); + pdata = &pdata_dt; + + if (of_property_read_u32(pdev->dev.of_node, "ngpios", + &pdata->number_of_pins)) { + dev_err(&pdev->dev, "Missing ngpios OF property\n"); + ret = -EINVAL; + goto err3; + } + + ret = of_alias_get_id(pdev->dev.of_node, "gpio"); + if (ret < 0) { + dev_err(&pdev->dev, "Couldn't get OF id\n"); + goto err3; + } + pdata->gpio_base = ret * 32; /* 32 GPIOs per instance */ + } + gpio_chip = &p->gpio_chip; gpio_chip->direction_input = em_gio_direction_input; gpio_chip->get = em_gio_get; @@ -366,15 +387,33 @@ static int em_gio_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id em_gio_dt_ids[] = { + { .compatible = "renesas,em-gio", }, + {}, +}; +MODULE_DEVICE_TABLE(of, em_gio_dt_ids); + static struct platform_driver em_gio_device_driver = { .probe = em_gio_probe, .remove = em_gio_remove, .driver = { .name = "em_gio", + .of_match_table = em_gio_dt_ids, + .owner = THIS_MODULE, } }; -module_platform_driver(em_gio_device_driver); +static int __init em_gio_init(void) +{ + return platform_driver_register(&em_gio_device_driver); +} +postcore_initcall(em_gio_init); + +static void __exit em_gio_exit(void) +{ + platform_driver_unregister(&em_gio_device_driver); +} +module_exit(em_gio_exit); MODULE_AUTHOR("Magnus Damm"); MODULE_DESCRIPTION("Renesas Emma Mobile GIO Driver"); -- GitLab From fc9c6e000f62c2eeda426d4d5f022b3d57729bb0 Mon Sep 17 00:00:00 2001 From: Justin Lecher Date: Wed, 6 Mar 2013 14:02:01 +0100 Subject: [PATCH 0046/3163] menuconfig: optionally use pkg-config to detect ncurses libs When building ncurses with --with-termlib several symbols get moved from libncurses.so to libtinfo.so. Thus when linking with libncurses.so, one additionally needs to link with libtinfo.so. The ncurses pkg-config module will be used to detect the necessary libs for linking. If not available the old heuristic for detection of the ncurses libs will be used. Signed-off-by: Justin Lecher Tested-by: "Yann E. MORIN" Signed-off-by: "Yann E. MORIN" --- scripts/kconfig/lxdialog/check-lxdialog.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/kconfig/lxdialog/check-lxdialog.sh b/scripts/kconfig/lxdialog/check-lxdialog.sh index 80788137c670..782d20085e1c 100644 --- a/scripts/kconfig/lxdialog/check-lxdialog.sh +++ b/scripts/kconfig/lxdialog/check-lxdialog.sh @@ -4,6 +4,8 @@ # What library to link ldflags() { + pkg-config --libs ncursesw 2>/dev/null && exit + pkg-config --libs ncurses 2>/dev/null && exit for ext in so a dll.a dylib ; do for lib in ncursesw ncurses curses ; do $cc -print-file-name=lib${lib}.${ext} | grep -q / -- GitLab From 544e7e548fbe75cb93784cc02fc01ce1501286b6 Mon Sep 17 00:00:00 2001 From: Justin Lecher Date: Wed, 6 Mar 2013 14:02:02 +0100 Subject: [PATCH 0047/3163] kconfig: optionally use pkg-config to detect ncurses libs When building ncurses with --with-termlib several symbols get moved from libncurses.so to libtinfo.so. Thus when linking with libncurses.so, one additionally needs to link with libtinfo.so. The ncurses pkg-config module will be used to detect the necessary libs for linking. If not available the old way of directly specifying libs will be used. Signed-off-by: Justin Lecher [yann.morin.1998@free.fr: fix typo: '-ncurses' --> '-lncurses'] Tested-by: "Yann E. MORIN" Signed-off-by: "Yann E. MORIN" --- scripts/kconfig/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index 231b4759c714..844bc9da08da 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -219,7 +219,9 @@ HOSTCFLAGS_gconf.o = `pkg-config --cflags gtk+-2.0 gmodule-2.0 libglade-2.0` \ HOSTLOADLIBES_mconf = $(shell $(CONFIG_SHELL) $(check-lxdialog) -ldflags $(HOSTCC)) -HOSTLOADLIBES_nconf = -lmenu -lpanel -lncurses +HOSTLOADLIBES_nconf = $(shell \ + pkg-config --libs menu panel ncurses 2>/dev/null \ + || echo "-lmenu -lpanel -lncurses" ) $(obj)/qconf.o: $(obj)/.tmp_qtcheck ifeq ($(qconf-target),1) -- GitLab From 33fb20c39e98b90813b5ab2d9a0d6faa6300caca Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 6 Mar 2013 15:44:03 +0100 Subject: [PATCH 0048/3163] KVM: nVMX: Fix content of MSR_IA32_VMX_ENTRY/EXIT_CTLS Properly set those bits to 1 that the spec demands in case bit 55 of VMX_BASIC is 0 - like in our case. Reviewed-by: Paolo Bonzini Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/vmx.h | 4 ++++ arch/x86/kvm/vmx.c | 13 ++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index b6fbf860e398..5fb6e24f0649 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -81,6 +81,8 @@ #define VM_EXIT_LOAD_IA32_EFER 0x00200000 #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 +#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff + #define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 #define VM_ENTRY_IA32E_MODE 0x00000200 #define VM_ENTRY_SMM 0x00000400 @@ -89,6 +91,8 @@ #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 #define VM_ENTRY_LOAD_IA32_EFER 0x00008000 +#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff + /* VMCS Encodings */ enum vmcs_field { VIRTUAL_PROCESSOR_ID = 0x00000000, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index aacf6a458ae1..a9d885353108 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2049,21 +2049,28 @@ static __init void nested_vmx_setup_ctls_msrs(void) PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; - /* exit controls */ - nested_vmx_exit_ctls_low = 0; + /* + * Exit controls + * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and + * 17 must be 1. + */ + nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ #ifdef CONFIG_X86_64 nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; #else nested_vmx_exit_ctls_high = 0; #endif + nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; /* entry controls */ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); - nested_vmx_entry_ctls_low = 0; + /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ + nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_entry_ctls_high &= VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; + nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; /* cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, -- GitLab From 1a0d74e66405a795bb37a4a23ece50f8d8e5e81e Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 7 Mar 2013 14:08:07 +0100 Subject: [PATCH 0049/3163] KVM: nVMX: Fix setting of CR0 and CR4 in guest mode The logic for calculating the value with which we call kvm_set_cr0/4 was broken (will definitely be visible with nested unrestricted guest mode support). Also, we performed the check regarding CR0_ALWAYSON too early when in guest mode. What really needs to be done on both CR0 and CR4 is to mask out L1-owned bits and merge them in from L1's guest_cr0/4. In contrast, arch.cr0/4 and arch.cr0/4_guest_owned_bits contain the mangled L0+L1 state and, thus, are not suited as input. For both CRs, we can then apply the check against VMXON_CRx_ALWAYSON and refuse the update if it fails. To be fully consistent, we implement this check now also for CR4. For CR4, we move the check into vmx_set_cr4 while we keep it in handle_set_cr0. This is because the CR0 checks for vmxon vs. guest mode will diverge soon when adding unrestricted guest mode support. Finally, we have to set the shadow to the value L2 wanted to write originally. Reviewed-by: Gleb Natapov Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 48 +++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a9d885353108..260da9ac1678 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3223,7 +3223,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) */ if (!nested_vmx_allowed(vcpu)) return 1; - } else if (to_vmx(vcpu)->nested.vmxon) + } + if (to_vmx(vcpu)->nested.vmxon && + ((cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) return 1; vcpu->arch.cr4 = cr4; @@ -4612,34 +4614,50 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) { - if (to_vmx(vcpu)->nested.vmxon && - ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) - return 1; - if (is_guest_mode(vcpu)) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned long orig_val = val; + /* * We get here when L2 changed cr0 in a way that did not change * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), - * but did change L0 shadowed bits. This can currently happen - * with the TS bit: L0 may want to leave TS on (for lazy fpu - * loading) while pretending to allow the guest to change it. + * but did change L0 shadowed bits. So we first calculate the + * effective cr0 value that L1 would like to write into the + * hardware. It consists of the L2-owned bits from the new + * value combined with the L1-owned bits from L1's guest_cr0. */ - if (kvm_set_cr0(vcpu, (val & vcpu->arch.cr0_guest_owned_bits) | - (vcpu->arch.cr0 & ~vcpu->arch.cr0_guest_owned_bits))) + val = (val & ~vmcs12->cr0_guest_host_mask) | + (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); + + /* TODO: will have to take unrestricted guest mode into + * account */ + if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) return 1; - vmcs_writel(CR0_READ_SHADOW, val); + + if (kvm_set_cr0(vcpu, val)) + return 1; + vmcs_writel(CR0_READ_SHADOW, orig_val); return 0; - } else + } else { + if (to_vmx(vcpu)->nested.vmxon && + ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) + return 1; return kvm_set_cr0(vcpu, val); + } } static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) { if (is_guest_mode(vcpu)) { - if (kvm_set_cr4(vcpu, (val & vcpu->arch.cr4_guest_owned_bits) | - (vcpu->arch.cr4 & ~vcpu->arch.cr4_guest_owned_bits))) + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned long orig_val = val; + + /* analogously to handle_set_cr0 */ + val = (val & ~vmcs12->cr4_guest_host_mask) | + (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask); + if (kvm_set_cr4(vcpu, val)) return 1; - vmcs_writel(CR4_READ_SHADOW, val); + vmcs_writel(CR4_READ_SHADOW, orig_val); return 0; } else return kvm_set_cr4(vcpu, val); -- GitLab From 744b37fb5a63d45e92e590967bae82d8ac62e950 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 5 Mar 2013 13:14:40 +0100 Subject: [PATCH 0050/3163] s390/kvm,gaccess: fix guest access return code handling Guest access functions like copy_to/from_guest() call __guestaddr_to_user() which in turn call gmap_fault() in order to translate a guest address to a user space address. In error case __guest_addr_to_user() returns either -EFAULT or -ENOMEM. The copy_to/from_guest functions just pass these return values down to the callers. The -ENOMEM case however is problematic since there are several places which access guest memory like: rc = copy_to_guest(...); if (rc == -EFAULT) error_handling(); So in case of -ENOMEM the code assumes that the guest memory access succeeded even though it failed. This can cause guest data or state corruption. If __guestaddr_to_user() returns -ENOMEM the meaning is that a valid user space mapping exists, but there was not enough memory available when trying to build the guest mapping. In other words an out-of-memory situation occured. For normal user space accesses an out-of-memory situation causes the page fault handler to map -ENOMEM to -EFAULT (see fixup code in do_no_context()). We need to do exactly the same for the kvm gaccess functions. So __guestaddr_to_user() should just map all error codes to -EFAULT. Signed-off-by: Heiko Carstens Reviewed-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti --- arch/s390/kvm/gaccess.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 4703f129e95e..84d01dd7a8e4 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -22,13 +22,16 @@ static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu, unsigned long guestaddr) { unsigned long prefix = vcpu->arch.sie_block->prefix; + unsigned long uaddress; if (guestaddr < 2 * PAGE_SIZE) guestaddr += prefix; else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE)) guestaddr -= prefix; - - return (void __user *) gmap_fault(guestaddr, vcpu->arch.gmap); + uaddress = gmap_fault(guestaddr, vcpu->arch.gmap); + if (IS_ERR_VALUE(uaddress)) + uaddress = -EFAULT; + return (void __user *)uaddress; } static inline int get_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, -- GitLab From 9e0fdb4145205bea95c2888a195c3ead2652f120 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 5 Mar 2013 13:14:41 +0100 Subject: [PATCH 0051/3163] s390/mm,gmap: implement gmap_translate() Implement gmap_translate() function which translates a guest absolute address to a user space process address without establishing the guest page table entries. This is useful for kvm guest address translations where no memory access is expected to happen soon (e.g. tprot exception handler). Signed-off-by: Heiko Carstens Reviewed-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti --- arch/s390/include/asm/pgtable.h | 2 + arch/s390/mm/pgtable.c | 107 +++++++++++++++++++++++++------- 2 files changed, 87 insertions(+), 22 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 4a2930844d43..75b8750a16ff 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -759,6 +759,8 @@ void gmap_disable(struct gmap *gmap); int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long length); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); +unsigned long __gmap_translate(unsigned long address, struct gmap *); +unsigned long gmap_translate(unsigned long address, struct gmap *); unsigned long __gmap_fault(unsigned long address, struct gmap *); unsigned long gmap_fault(unsigned long address, struct gmap *); void gmap_discard(unsigned long from, unsigned long to, struct gmap *); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index ae44d2a34313..2accf7113d13 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -379,45 +379,108 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, } EXPORT_SYMBOL_GPL(gmap_map_segment); +static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) +{ + unsigned long *table; + + table = gmap->table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return ERR_PTR(-EFAULT); + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return ERR_PTR(-EFAULT); + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return ERR_PTR(-EFAULT); + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 20) & 0x7ff); + return table; +} + +/** + * __gmap_translate - translate a guest address to a user space address + * @address: guest address + * @gmap: pointer to guest mapping meta data structure + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + * The mmap_sem of the mm that belongs to the address space must be held + * when this function gets called. + */ +unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) +{ + unsigned long *segment_ptr, vmaddr, segment; + struct gmap_pgtable *mp; + struct page *page; + + current->thread.gmap_addr = address; + segment_ptr = gmap_table_walk(address, gmap); + if (IS_ERR(segment_ptr)) + return PTR_ERR(segment_ptr); + /* Convert the gmap address to an mm address. */ + segment = *segment_ptr; + if (!(segment & _SEGMENT_ENTRY_INV)) { + page = pfn_to_page(segment >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + return mp->vmaddr | (address & ~PMD_MASK); + } else if (segment & _SEGMENT_ENTRY_RO) { + vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; + return vmaddr | (address & ~PMD_MASK); + } + return -EFAULT; +} +EXPORT_SYMBOL_GPL(__gmap_translate); + +/** + * gmap_translate - translate a guest address to a user space address + * @address: guest address + * @gmap: pointer to guest mapping meta data structure + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + */ +unsigned long gmap_translate(unsigned long address, struct gmap *gmap) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_translate(address, gmap); + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_translate); + /* * this function is assumed to be called with mmap_sem held */ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) { - unsigned long *table, vmaddr, segment; - struct mm_struct *mm; + unsigned long *segment_ptr, vmaddr, segment; + struct vm_area_struct *vma; struct gmap_pgtable *mp; struct gmap_rmap *rmap; - struct vm_area_struct *vma; + struct mm_struct *mm; struct page *page; pgd_t *pgd; pud_t *pud; pmd_t *pmd; current->thread.gmap_addr = address; - mm = gmap->mm; - /* Walk the gmap address space page table */ - table = gmap->table + ((address >> 53) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) - return -EFAULT; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((address >> 42) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + segment_ptr = gmap_table_walk(address, gmap); + if (IS_ERR(segment_ptr)) return -EFAULT; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((address >> 31) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) - return -EFAULT; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((address >> 20) & 0x7ff); - /* Convert the gmap address to an mm address. */ - segment = *table; - if (likely(!(segment & _SEGMENT_ENTRY_INV))) { + segment = *segment_ptr; + if (!(segment & _SEGMENT_ENTRY_INV)) { page = pfn_to_page(segment >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; return mp->vmaddr | (address & ~PMD_MASK); } else if (segment & _SEGMENT_ENTRY_RO) { + mm = gmap->mm; vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; vma = find_vma(mm, vmaddr); if (!vma || vma->vm_start > vmaddr) @@ -441,12 +504,12 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) /* Link gmap segment table entry location to page table. */ page = pmd_page(*pmd); mp = (struct gmap_pgtable *) page->index; - rmap->entry = table; + rmap->entry = segment_ptr; spin_lock(&mm->page_table_lock); list_add(&rmap->list, &mp->mapper); spin_unlock(&mm->page_table_lock); /* Set gmap segment table entry to page table. */ - *table = pmd_val(*pmd) & PAGE_MASK; + *segment_ptr = pmd_val(*pmd) & PAGE_MASK; return vmaddr | (address & ~PMD_MASK); } return -EFAULT; -- GitLab From 59a1fa2d80c0d351755cb29273b2b256dc4b3a11 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 5 Mar 2013 13:14:42 +0100 Subject: [PATCH 0052/3163] s390/kvm,tprot: use new gmap_translate() function When out-of-memory the tprot code incorrectly injected a program check for the guest which reported an addressing exception even if the guest address was valid. Let's use the new gmap_translate() which translates a guest address to a user space address whithout the chance of running into an out-of-memory situation. Also make it more explicit that for -EFAULT we won't find a vma. Signed-off-by: Heiko Carstens Reviewed-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti --- arch/s390/kvm/priv.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 0ef9894606e5..75ad91e38e8a 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -575,20 +575,13 @@ static int handle_tprot(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) return -EOPNOTSUPP; - - /* we must resolve the address without holding the mmap semaphore. - * This is ok since the userspace hypervisor is not supposed to change - * the mapping while the guest queries the memory. Otherwise the guest - * might crash or get wrong info anyway. */ - user_address = (unsigned long) __guestaddr_to_user(vcpu, address1); - down_read(¤t->mm->mmap_sem); + user_address = __gmap_translate(address1, vcpu->arch.gmap); + if (IS_ERR_VALUE(user_address)) + goto out_inject; vma = find_vma(current->mm, user_address); - if (!vma) { - up_read(¤t->mm->mmap_sem); - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - } - + if (!vma) + goto out_inject; vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); if (!(vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_READ)) vcpu->arch.sie_block->gpsw.mask |= (1ul << 44); @@ -597,6 +590,10 @@ static int handle_tprot(struct kvm_vcpu *vcpu) up_read(¤t->mm->mmap_sem); return 0; + +out_inject: + up_read(¤t->mm->mmap_sem); + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) -- GitLab From dc5008b9bf6adb0c0a5afba6fb376a85451b2697 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 5 Mar 2013 13:14:43 +0100 Subject: [PATCH 0053/3163] s390/kvm: remove explicit -EFAULT return code checking on guest access Let's change to the paradigm that every return code from guest memory access functions that is not zero translates to -EFAULT and do not explictly compare. Explictly comparing the return value with -EFAULT has already shown to be a bit fragile. In addition this is closer to the handling of copy_to/from_user functions, which imho is in general a good idea. Also shorten the return code handling in interrupt.c a bit. Signed-off-by: Heiko Carstens Acked-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti --- arch/s390/kvm/intercept.c | 4 +- arch/s390/kvm/interrupt.c | 241 +++++++++++--------------------------- arch/s390/kvm/priv.c | 6 +- 3 files changed, 74 insertions(+), 177 deletions(-) diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index f26ff1e31bdb..9b2204759445 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -45,7 +45,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) do { rc = get_guest_u64(vcpu, useraddr, &vcpu->arch.sie_block->gcr[reg]); - if (rc == -EFAULT) { + if (rc) { kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); break; } @@ -79,7 +79,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu) reg = reg1; do { rc = get_guest_u32(vcpu, useraddr, &val); - if (rc == -EFAULT) { + if (rc) { kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); break; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 37116a77cb4b..5afa931aed11 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -180,7 +180,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { const unsigned short table[] = { 2, 4, 4, 6 }; - int rc, exception = 0; + int rc = 0; switch (inti->type) { case KVM_S390_INT_EMERGENCY: @@ -188,74 +188,38 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_emergency_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->emerg.code, 0); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->emerg.code); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201); + rc |= put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->emerg.code); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); break; - case KVM_S390_INT_EXTERNAL_CALL: VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); vcpu->stat.deliver_external_call++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->extcall.code, 0); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->extcall.code); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202); + rc |= put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->extcall.code); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); break; - case KVM_S390_INT_SERVICE: VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", inti->ext.ext_params); vcpu->stat.deliver_service_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->ext.ext_params, 0); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); - if (rc == -EFAULT) - exception = 1; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + rc |= put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); break; - case KVM_S390_INT_VIRTIO: VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", inti->ext.ext_params, inti->ext.ext_params2); @@ -263,34 +227,16 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->ext.ext_params, inti->ext.ext_params2); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, 0x0d00); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u64(vcpu, __LC_EXT_PARAMS2, - inti->ext.ext_params2); - if (rc == -EFAULT) - exception = 1; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603); + rc |= put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, 0x0d00); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + rc |= put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); + rc |= put_guest_u64(vcpu, __LC_EXT_PARAMS2, + inti->ext.ext_params2); break; - case KVM_S390_SIGP_STOP: VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); vcpu->stat.deliver_stop_signal++; @@ -313,18 +259,14 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_restart_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, 0); - rc = copy_to_guest(vcpu, offsetof(struct _lowcore, - restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - offsetof(struct _lowcore, restart_psw), sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = copy_to_guest(vcpu, + offsetof(struct _lowcore, restart_old_psw), + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + offsetof(struct _lowcore, restart_psw), + sizeof(psw_t)); atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); break; - case KVM_S390_PROGRAM_INT: VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", inti->pgm.code, @@ -332,24 +274,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_program_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->pgm.code, 0); - rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_PGM_ILC, - table[vcpu->arch.sie_block->ipa >> 14]); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_PGM_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code); + rc |= put_guest_u16(vcpu, __LC_PGM_ILC, + table[vcpu->arch.sie_block->ipa >> 14]); + rc |= copy_to_guest(vcpu, __LC_PGM_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_PGM_NEW_PSW, sizeof(psw_t)); break; case KVM_S390_MCHK: @@ -358,24 +289,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->mchk.cr14, inti->mchk.mcic); - rc = kvm_s390_vcpu_store_status(vcpu, - KVM_S390_STORE_STATUS_PREFIXED); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u64(vcpu, __LC_MCCK_CODE, inti->mchk.mcic); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_MCK_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_MCK_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_PREFIXED); + rc |= put_guest_u64(vcpu, __LC_MCCK_CODE, inti->mchk.mcic); + rc |= copy_to_guest(vcpu, __LC_MCK_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_MCK_NEW_PSW, sizeof(psw_t)); break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: @@ -388,67 +308,44 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_io_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, param0, param1); - rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_ID, - inti->io.subchannel_id); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_NR, - inti->io.subchannel_nr); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_IO_INT_PARM, - inti->io.io_int_parm); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_IO_INT_WORD, - inti->io.io_int_word); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_IO_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_IO_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_ID, + inti->io.subchannel_id); + rc |= put_guest_u16(vcpu, __LC_SUBCHANNEL_NR, + inti->io.subchannel_nr); + rc |= put_guest_u32(vcpu, __LC_IO_INT_PARM, + inti->io.io_int_parm); + rc |= put_guest_u32(vcpu, __LC_IO_INT_WORD, + inti->io.io_int_word); + rc |= copy_to_guest(vcpu, __LC_IO_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_IO_NEW_PSW, sizeof(psw_t)); break; } default: BUG(); } - if (exception) { + if (rc) { printk("kvm: The guest lowcore is not mapped during interrupt " - "delivery, killing userspace\n"); + "delivery, killing userspace\n"); do_exit(SIGKILL); } } static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) { - int rc, exception = 0; + int rc; if (psw_extint_disabled(vcpu)) return 0; if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) return 0; - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004); - if (rc == -EFAULT) - exception = 1; - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - if (exception) { + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc) { printk("kvm: The guest lowcore is not mapped during interrupt " "delivery, killing userspace\n"); do_exit(SIGKILL); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 75ad91e38e8a..34b42dc285ee 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -108,7 +108,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) } rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id); - if (rc == -EFAULT) { + if (rc) { kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); goto out; } @@ -230,7 +230,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu) rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), &facility_list, sizeof(facility_list)); - if (rc == -EFAULT) + if (rc) kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); else { VCPU_EVENT(vcpu, 5, "store facility list value %x", @@ -348,7 +348,7 @@ static int handle_stidp(struct kvm_vcpu *vcpu) } rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data); - if (rc == -EFAULT) { + if (rc) { kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); goto out; } -- GitLab From 396083a964aa4e86061d0e3449b1e0548a8197a9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 5 Mar 2013 13:14:44 +0100 Subject: [PATCH 0054/3163] s390/kvm,gaccess: shorten put/get_guest code The put_guest_u*/get_guest_u* are nothing but wrappers for the regular put_user/get_user uaccess functions. The only difference is that before accessing user space the guest address must be translated to a user space address. Change the order of arguments for the guest access functions so they match their uaccess parts. Also remove the u* suffix, so we simply have put_guest/get_guest which will automatically use the right size dependent on pointer type of the destination/source that now must be correct. In result the same behaviour as put_user/get_user except that accesses must be aligned. Signed-off-by: Heiko Carstens Acked-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti --- arch/s390/kvm/gaccess.h | 153 ++++++++++---------------------------- arch/s390/kvm/intercept.c | 6 +- arch/s390/kvm/interrupt.c | 52 +++++++------ arch/s390/kvm/priv.c | 22 +++--- 4 files changed, 81 insertions(+), 152 deletions(-) diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 84d01dd7a8e4..82f450ecb585 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -18,122 +18,47 @@ #include #include "kvm-s390.h" -static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu, - unsigned long guestaddr) +static inline void *__gptr_to_uptr(struct kvm_vcpu *vcpu, void *gptr) { unsigned long prefix = vcpu->arch.sie_block->prefix; - unsigned long uaddress; - - if (guestaddr < 2 * PAGE_SIZE) - guestaddr += prefix; - else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE)) - guestaddr -= prefix; - uaddress = gmap_fault(guestaddr, vcpu->arch.gmap); - if (IS_ERR_VALUE(uaddress)) - uaddress = -EFAULT; - return (void __user *)uaddress; -} - -static inline int get_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u64 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 7); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return get_user(*result, (unsigned long __user *) uptr); -} - -static inline int get_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u32 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 3); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return get_user(*result, (u32 __user *) uptr); -} - -static inline int get_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u16 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 1); - - if (IS_ERR(uptr)) - return PTR_ERR(uptr); - - return get_user(*result, (u16 __user *) uptr); -} - -static inline int get_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u8 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return get_user(*result, (u8 __user *) uptr); -} - -static inline int put_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u64 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 7); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u64 __user *) uptr); -} - -static inline int put_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u32 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 3); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u32 __user *) uptr); -} - -static inline int put_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u16 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 1); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u16 __user *) uptr); -} - -static inline int put_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u8 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u8 __user *) uptr); + unsigned long gaddr = (unsigned long) gptr; + unsigned long uaddr; + + if (gaddr < 2 * PAGE_SIZE) + gaddr += prefix; + else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE)) + gaddr -= prefix; + uaddr = gmap_fault(gaddr, vcpu->arch.gmap); + if (IS_ERR_VALUE(uaddr)) + uaddr = -EFAULT; + return (void *)uaddr; } +#define get_guest(vcpu, x, gptr) \ +({ \ + __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr); \ + int __mask = sizeof(__typeof__(*(gptr))) - 1; \ + int __ret = PTR_RET(__uptr); \ + \ + if (!__ret) { \ + BUG_ON((unsigned long)__uptr & __mask); \ + __ret = get_user(x, __uptr); \ + } \ + __ret; \ +}) + +#define put_guest(vcpu, x, gptr) \ +({ \ + __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr); \ + int __mask = sizeof(__typeof__(*(gptr))) - 1; \ + int __ret = PTR_RET(__uptr); \ + \ + if (!__ret) { \ + BUG_ON((unsigned long)__uptr & __mask); \ + __ret = put_user(x, __uptr); \ + } \ + __ret; \ +}) static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, unsigned long guestdest, @@ -144,7 +69,7 @@ static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, u8 *data = from; for (i = 0; i < n; i++) { - rc = put_guest_u8(vcpu, guestdest++, *(data++)); + rc = put_guest(vcpu, *(data++), (u8 *)guestdest++); if (rc < 0) return rc; } @@ -270,7 +195,7 @@ static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to, u8 *data = to; for (i = 0; i < n; i++) { - rc = get_guest_u8(vcpu, guestsrc++, data++); + rc = get_guest(vcpu, *(data++), (u8 *)guestsrc++); if (rc < 0) return rc; } diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 9b2204759445..64744003a66e 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -43,8 +43,8 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); do { - rc = get_guest_u64(vcpu, useraddr, - &vcpu->arch.sie_block->gcr[reg]); + rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], + (u64 *) useraddr); if (rc) { kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); break; @@ -78,7 +78,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu) reg = reg1; do { - rc = get_guest_u32(vcpu, useraddr, &val); + rc = get_guest(vcpu, val, (u32 *) useraddr); if (rc) { kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); break; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5afa931aed11..d78824b18e9d 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -188,8 +188,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_emergency_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->emerg.code, 0); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201); - rc |= put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->emerg.code); + rc = put_guest(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest(vcpu, inti->emerg.code, + (u16 *)__LC_EXT_CPU_ADDR); rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, @@ -200,8 +201,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_external_call++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->extcall.code, 0); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202); - rc |= put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->extcall.code); + rc = put_guest(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest(vcpu, inti->extcall.code, + (u16 *)__LC_EXT_CPU_ADDR); rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, @@ -213,12 +215,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_service_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->ext.ext_params, 0); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401); + rc = put_guest(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE); rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, __LC_EXT_NEW_PSW, sizeof(psw_t)); - rc |= put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); + rc |= put_guest(vcpu, inti->ext.ext_params, + (u32 *)__LC_EXT_PARAMS); break; case KVM_S390_INT_VIRTIO: VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", @@ -227,15 +230,16 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->ext.ext_params, inti->ext.ext_params2); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603); - rc |= put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, 0x0d00); + rc = put_guest(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR); rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, __LC_EXT_NEW_PSW, sizeof(psw_t)); - rc |= put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); - rc |= put_guest_u64(vcpu, __LC_EXT_PARAMS2, - inti->ext.ext_params2); + rc |= put_guest(vcpu, inti->ext.ext_params, + (u32 *)__LC_EXT_PARAMS); + rc |= put_guest(vcpu, inti->ext.ext_params2, + (u64 *)__LC_EXT_PARAMS2); break; case KVM_S390_SIGP_STOP: VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); @@ -274,9 +278,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_program_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->pgm.code, 0); - rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code); - rc |= put_guest_u16(vcpu, __LC_PGM_ILC, - table[vcpu->arch.sie_block->ipa >> 14]); + rc = put_guest(vcpu, inti->pgm.code, (u16 *)__LC_PGM_INT_CODE); + rc |= put_guest(vcpu, table[vcpu->arch.sie_block->ipa >> 14], + (u16 *)__LC_PGM_ILC); rc |= copy_to_guest(vcpu, __LC_PGM_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, @@ -291,7 +295,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, inti->mchk.mcic); rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED); - rc |= put_guest_u64(vcpu, __LC_MCCK_CODE, inti->mchk.mcic); + rc |= put_guest(vcpu, inti->mchk.mcic, (u64 *) __LC_MCCK_CODE); rc |= copy_to_guest(vcpu, __LC_MCK_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, @@ -308,14 +312,14 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_io_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, param0, param1); - rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_ID, - inti->io.subchannel_id); - rc |= put_guest_u16(vcpu, __LC_SUBCHANNEL_NR, - inti->io.subchannel_nr); - rc |= put_guest_u32(vcpu, __LC_IO_INT_PARM, - inti->io.io_int_parm); - rc |= put_guest_u32(vcpu, __LC_IO_INT_WORD, - inti->io.io_int_word); + rc = put_guest(vcpu, inti->io.subchannel_id, + (u16 *) __LC_SUBCHANNEL_ID); + rc |= put_guest(vcpu, inti->io.subchannel_nr, + (u16 *) __LC_SUBCHANNEL_NR); + rc |= put_guest(vcpu, inti->io.io_int_parm, + (u32 *) __LC_IO_INT_PARM); + rc |= put_guest(vcpu, inti->io.io_int_word, + (u32 *) __LC_IO_INT_WORD); rc |= copy_to_guest(vcpu, __LC_IO_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, @@ -340,7 +344,7 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) return 0; if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) return 0; - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004); + rc = put_guest(vcpu, 0x1004, (u16 *)__LC_EXT_INT_CODE); rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 34b42dc285ee..cb07147cda73 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -41,7 +41,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) } /* get the value */ - if (get_guest_u32(vcpu, operand2, &address)) { + if (get_guest(vcpu, address, (u32 *) operand2)) { kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); goto out; } @@ -82,7 +82,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) address = address & 0x7fffe000u; /* get the value */ - if (put_guest_u32(vcpu, operand2, address)) { + if (put_guest(vcpu, address, (u32 *)operand2)) { kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); goto out; } @@ -107,7 +107,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) goto out; } - rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id); + rc = put_guest(vcpu, vcpu->vcpu_id, (u16 *)useraddr); if (rc) { kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); goto out; @@ -142,18 +142,18 @@ static int handle_tpi(struct kvm_vcpu *vcpu) * Store the two-word I/O interruption code into the * provided area. */ - put_guest_u16(vcpu, addr, inti->io.subchannel_id); - put_guest_u16(vcpu, addr + 2, inti->io.subchannel_nr); - put_guest_u32(vcpu, addr + 4, inti->io.io_int_parm); + put_guest(vcpu, inti->io.subchannel_id, (u16 *) addr); + put_guest(vcpu, inti->io.subchannel_nr, (u16 *) (addr + 2)); + put_guest(vcpu, inti->io.io_int_parm, (u32 *) (addr + 4)); } else { /* * Store the three-word I/O interruption code into * the appropriate lowcore area. */ - put_guest_u16(vcpu, 184, inti->io.subchannel_id); - put_guest_u16(vcpu, 186, inti->io.subchannel_nr); - put_guest_u32(vcpu, 188, inti->io.io_int_parm); - put_guest_u32(vcpu, 192, inti->io.io_int_word); + put_guest(vcpu, inti->io.subchannel_id, (u16 *) 184); + put_guest(vcpu, inti->io.subchannel_nr, (u16 *) 186); + put_guest(vcpu, inti->io.io_int_parm, (u32 *) 188); + put_guest(vcpu, inti->io.io_int_word, (u32 *) 192); } cc = 1; } else @@ -347,7 +347,7 @@ static int handle_stidp(struct kvm_vcpu *vcpu) goto out; } - rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data); + rc = put_guest(vcpu, vcpu->arch.stidp_data, (u64 *)operand2); if (rc) { kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); goto out; -- GitLab From f9dc72e82d32cc9fe40d1dea7709d434bba2d4a9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 5 Mar 2013 13:14:45 +0100 Subject: [PATCH 0055/3163] s390/kvm,gaccess: shorten copy_to/from_guest code The code can be significantly shortened. There is no functional change, except that for large (> PAGE_SIZE) copies the guest translation would be done more frequently. However, there is not a single user which does this currently. If one gets added later on this functionality can be added easily again. Signed-off-by: Heiko Carstens Reviewed-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti --- arch/s390/kvm/gaccess.h | 294 ++++++---------------------------------- 1 file changed, 41 insertions(+), 253 deletions(-) diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 82f450ecb585..8608d7e6a334 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -18,16 +18,19 @@ #include #include "kvm-s390.h" -static inline void *__gptr_to_uptr(struct kvm_vcpu *vcpu, void *gptr) +static inline void *__gptr_to_uptr(struct kvm_vcpu *vcpu, void *gptr, + int prefixing) { unsigned long prefix = vcpu->arch.sie_block->prefix; unsigned long gaddr = (unsigned long) gptr; unsigned long uaddr; - if (gaddr < 2 * PAGE_SIZE) - gaddr += prefix; - else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE)) - gaddr -= prefix; + if (prefixing) { + if (gaddr < 2 * PAGE_SIZE) + gaddr += prefix; + else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE)) + gaddr -= prefix; + } uaddr = gmap_fault(gaddr, vcpu->arch.gmap); if (IS_ERR_VALUE(uaddr)) uaddr = -EFAULT; @@ -36,7 +39,7 @@ static inline void *__gptr_to_uptr(struct kvm_vcpu *vcpu, void *gptr) #define get_guest(vcpu, x, gptr) \ ({ \ - __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr); \ + __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ int __mask = sizeof(__typeof__(*(gptr))) - 1; \ int __ret = PTR_RET(__uptr); \ \ @@ -49,7 +52,7 @@ static inline void *__gptr_to_uptr(struct kvm_vcpu *vcpu, void *gptr) #define put_guest(vcpu, x, gptr) \ ({ \ - __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr); \ + __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ int __mask = sizeof(__typeof__(*(gptr))) - 1; \ int __ret = PTR_RET(__uptr); \ \ @@ -60,255 +63,40 @@ static inline void *__gptr_to_uptr(struct kvm_vcpu *vcpu, void *gptr) __ret; \ }) -static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, - unsigned long guestdest, - void *from, unsigned long n) -{ - int rc; - unsigned long i; - u8 *data = from; - - for (i = 0; i < n; i++) { - rc = put_guest(vcpu, *(data++), (u8 *)guestdest++); - if (rc < 0) - return rc; - } - return 0; -} - -static inline int __copy_to_guest_fast(struct kvm_vcpu *vcpu, - unsigned long guestdest, - void *from, unsigned long n) -{ - int r; - void __user *uptr; - unsigned long size; - - if (guestdest + n < guestdest) - return -EFAULT; - - /* simple case: all within one segment table entry? */ - if ((guestdest & PMD_MASK) == ((guestdest+n) & PMD_MASK)) { - uptr = (void __user *) gmap_fault(guestdest, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_to_user(uptr, from, n); - - if (r) - r = -EFAULT; - - goto out; - } - - /* copy first segment */ - uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - size = PMD_SIZE - (guestdest & ~PMD_MASK); - - r = copy_to_user(uptr, from, size); - - if (r) { - r = -EFAULT; - goto out; - } - from += size; - n -= size; - guestdest += size; - - /* copy full segments */ - while (n >= PMD_SIZE) { - uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_to_user(uptr, from, PMD_SIZE); - - if (r) { - r = -EFAULT; - goto out; - } - from += PMD_SIZE; - n -= PMD_SIZE; - guestdest += PMD_SIZE; - } - - /* copy the tail segment */ - if (n) { - uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_to_user(uptr, from, n); - - if (r) - r = -EFAULT; - } -out: - return r; -} - -static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, - unsigned long guestdest, - void *from, unsigned long n) -{ - return __copy_to_guest_fast(vcpu, guestdest, from, n); -} - -static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest, - void *from, unsigned long n) -{ - unsigned long prefix = vcpu->arch.sie_block->prefix; - - if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE)) - goto slowpath; - - if ((guestdest < prefix) && (guestdest + n > prefix)) - goto slowpath; - - if ((guestdest < prefix + 2 * PAGE_SIZE) - && (guestdest + n > prefix + 2 * PAGE_SIZE)) - goto slowpath; - - if (guestdest < 2 * PAGE_SIZE) - guestdest += prefix; - else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE)) - guestdest -= prefix; - - return __copy_to_guest_fast(vcpu, guestdest, from, n); -slowpath: - return __copy_to_guest_slow(vcpu, guestdest, from, n); -} - -static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, - unsigned long n) +static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to, + unsigned long from, unsigned long len, + int to_guest, int prefixing) { - int rc; - unsigned long i; - u8 *data = to; - - for (i = 0; i < n; i++) { - rc = get_guest(vcpu, *(data++), (u8 *)guestsrc++); - if (rc < 0) - return rc; + unsigned long _len, rc; + void *uptr; + + while (len) { + uptr = to_guest ? (void *)to : (void *)from; + uptr = __gptr_to_uptr(vcpu, uptr, prefixing); + if (IS_ERR(uptr)) + return -EFAULT; + _len = PAGE_SIZE - ((unsigned long)uptr & (PAGE_SIZE - 1)); + _len = min(_len, len); + if (to_guest) + rc = copy_to_user(uptr, (void *)from, _len); + else + rc = copy_from_user((void *)to, uptr, _len); + if (rc) + return -EFAULT; + len -= _len; + from += _len; + to += _len; } return 0; } -static inline int __copy_from_guest_fast(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, - unsigned long n) -{ - int r; - void __user *uptr; - unsigned long size; - - if (guestsrc + n < guestsrc) - return -EFAULT; - - /* simple case: all within one segment table entry? */ - if ((guestsrc & PMD_MASK) == ((guestsrc+n) & PMD_MASK)) { - uptr = (void __user *) gmap_fault(guestsrc, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_from_user(to, uptr, n); - - if (r) - r = -EFAULT; - - goto out; - } - - /* copy first segment */ - uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - size = PMD_SIZE - (guestsrc & ~PMD_MASK); - - r = copy_from_user(to, uptr, size); - - if (r) { - r = -EFAULT; - goto out; - } - to += size; - n -= size; - guestsrc += size; - - /* copy full segments */ - while (n >= PMD_SIZE) { - uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); +#define copy_to_guest(vcpu, to, from, size) \ + __copy_guest(vcpu, to, (unsigned long)from, size, 1, 1) +#define copy_from_guest(vcpu, to, from, size) \ + __copy_guest(vcpu, (unsigned long)to, from, size, 0, 1) +#define copy_to_guest_absolute(vcpu, to, from, size) \ + __copy_guest(vcpu, to, (unsigned long)from, size, 1, 0) +#define copy_from_guest_absolute(vcpu, to, from, size) \ + __copy_guest(vcpu, (unsigned long)to, from, size, 0, 0) - r = copy_from_user(to, uptr, PMD_SIZE); - - if (r) { - r = -EFAULT; - goto out; - } - to += PMD_SIZE; - n -= PMD_SIZE; - guestsrc += PMD_SIZE; - } - - /* copy the tail segment */ - if (n) { - uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_from_user(to, uptr, n); - - if (r) - r = -EFAULT; - } -out: - return r; -} - -static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, - unsigned long n) -{ - return __copy_from_guest_fast(vcpu, to, guestsrc, n); -} - -static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, unsigned long n) -{ - unsigned long prefix = vcpu->arch.sie_block->prefix; - - if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE)) - goto slowpath; - - if ((guestsrc < prefix) && (guestsrc + n > prefix)) - goto slowpath; - - if ((guestsrc < prefix + 2 * PAGE_SIZE) - && (guestsrc + n > prefix + 2 * PAGE_SIZE)) - goto slowpath; - - if (guestsrc < 2 * PAGE_SIZE) - guestsrc += prefix; - else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE)) - guestsrc -= prefix; - - return __copy_from_guest_fast(vcpu, to, guestsrc, n); -slowpath: - return __copy_from_guest_slow(vcpu, to, guestsrc, n); -} -#endif +#endif /* __KVM_S390_GACCESS_H */ -- GitLab From 7c959e82ac331396d05e7118a48c7c1debbefdf8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 5 Mar 2013 13:14:46 +0100 Subject: [PATCH 0056/3163] s390/kvm: cleanup/fix handle_tpi() - add missing specification exception check - remove one level of indentation - use defines instead of magic numbers Signed-off-by: Heiko Carstens Reviewed-by: Cornelia Huck Signed-off-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti --- arch/s390/kvm/priv.c | 54 ++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index cb07147cda73..d64382c1ed61 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -129,39 +130,44 @@ static int handle_skey(struct kvm_vcpu *vcpu) static int handle_tpi(struct kvm_vcpu *vcpu) { - u64 addr; struct kvm_s390_interrupt_info *inti; + u64 addr; int cc; addr = kvm_s390_get_base_disp_s(vcpu); - + if (addr & 3) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + cc = 0; inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); - if (inti) { - if (addr) { - /* - * Store the two-word I/O interruption code into the - * provided area. - */ - put_guest(vcpu, inti->io.subchannel_id, (u16 *) addr); - put_guest(vcpu, inti->io.subchannel_nr, (u16 *) (addr + 2)); - put_guest(vcpu, inti->io.io_int_parm, (u32 *) (addr + 4)); - } else { - /* - * Store the three-word I/O interruption code into - * the appropriate lowcore area. - */ - put_guest(vcpu, inti->io.subchannel_id, (u16 *) 184); - put_guest(vcpu, inti->io.subchannel_nr, (u16 *) 186); - put_guest(vcpu, inti->io.io_int_parm, (u32 *) 188); - put_guest(vcpu, inti->io.io_int_word, (u32 *) 192); - } - cc = 1; - } else - cc = 0; + if (!inti) + goto no_interrupt; + cc = 1; + if (addr) { + /* + * Store the two-word I/O interruption code into the + * provided area. + */ + put_guest(vcpu, inti->io.subchannel_id, (u16 *) addr); + put_guest(vcpu, inti->io.subchannel_nr, (u16 *) (addr + 2)); + put_guest(vcpu, inti->io.io_int_parm, (u32 *) (addr + 4)); + } else { + /* + * Store the three-word I/O interruption code into + * the appropriate lowcore area. + */ + put_guest(vcpu, inti->io.subchannel_id, (u16 *) __LC_SUBCHANNEL_ID); + put_guest(vcpu, inti->io.subchannel_nr, (u16 *) __LC_SUBCHANNEL_NR); + put_guest(vcpu, inti->io.io_int_parm, (u32 *) __LC_IO_INT_PARM); + put_guest(vcpu, inti->io.io_int_word, (u32 *) __LC_IO_INT_WORD); + } kfree(inti); +no_interrupt: /* Set condition code and we're done. */ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; +out: return 0; } -- GitLab From 0a75ca277c9f1145df37f8bbad10aecf0049a554 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 5 Mar 2013 13:14:47 +0100 Subject: [PATCH 0057/3163] s390/kvm,gaccess: add address space annotations Add missing address space annotations to all put_guest()/get_guest() callers. Signed-off-by: Heiko Carstens Acked-by: Christian Borntraeger Acked-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti --- arch/s390/kvm/gaccess.h | 21 +++++++++++---------- arch/s390/kvm/intercept.c | 4 ++-- arch/s390/kvm/interrupt.c | 36 ++++++++++++++++++------------------ arch/s390/kvm/priv.c | 22 +++++++++++----------- 4 files changed, 42 insertions(+), 41 deletions(-) diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 8608d7e6a334..302e0e52b009 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -18,8 +18,9 @@ #include #include "kvm-s390.h" -static inline void *__gptr_to_uptr(struct kvm_vcpu *vcpu, void *gptr, - int prefixing) +static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu, + void __user *gptr, + int prefixing) { unsigned long prefix = vcpu->arch.sie_block->prefix; unsigned long gaddr = (unsigned long) gptr; @@ -34,14 +35,14 @@ static inline void *__gptr_to_uptr(struct kvm_vcpu *vcpu, void *gptr, uaddr = gmap_fault(gaddr, vcpu->arch.gmap); if (IS_ERR_VALUE(uaddr)) uaddr = -EFAULT; - return (void *)uaddr; + return (void __user *)uaddr; } #define get_guest(vcpu, x, gptr) \ ({ \ __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ int __mask = sizeof(__typeof__(*(gptr))) - 1; \ - int __ret = PTR_RET(__uptr); \ + int __ret = PTR_RET((void __force *)__uptr); \ \ if (!__ret) { \ BUG_ON((unsigned long)__uptr & __mask); \ @@ -54,7 +55,7 @@ static inline void *__gptr_to_uptr(struct kvm_vcpu *vcpu, void *gptr, ({ \ __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ int __mask = sizeof(__typeof__(*(gptr))) - 1; \ - int __ret = PTR_RET(__uptr); \ + int __ret = PTR_RET((void __force *)__uptr); \ \ if (!__ret) { \ BUG_ON((unsigned long)__uptr & __mask); \ @@ -68,19 +69,19 @@ static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to, int to_guest, int prefixing) { unsigned long _len, rc; - void *uptr; + void __user *uptr; while (len) { - uptr = to_guest ? (void *)to : (void *)from; + uptr = to_guest ? (void __user *)to : (void __user *)from; uptr = __gptr_to_uptr(vcpu, uptr, prefixing); - if (IS_ERR(uptr)) + if (IS_ERR((void __force *)uptr)) return -EFAULT; _len = PAGE_SIZE - ((unsigned long)uptr & (PAGE_SIZE - 1)); _len = min(_len, len); if (to_guest) - rc = copy_to_user(uptr, (void *)from, _len); + rc = copy_to_user((void __user *) uptr, (void *)from, _len); else - rc = copy_from_user((void *)to, uptr, _len); + rc = copy_from_user((void *)to, (void __user *)uptr, _len); if (rc) return -EFAULT; len -= _len; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 64744003a66e..c6ba4dfd7f1e 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -44,7 +44,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) do { rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], - (u64 *) useraddr); + (u64 __user *) useraddr); if (rc) { kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); break; @@ -78,7 +78,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu) reg = reg1; do { - rc = get_guest(vcpu, val, (u32 *) useraddr); + rc = get_guest(vcpu, val, (u32 __user *) useraddr); if (rc) { kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); break; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index d78824b18e9d..5c948177529e 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -188,9 +188,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_emergency_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->emerg.code, 0); - rc = put_guest(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE); + rc = put_guest(vcpu, 0x1201, (u16 __user *)__LC_EXT_INT_CODE); rc |= put_guest(vcpu, inti->emerg.code, - (u16 *)__LC_EXT_CPU_ADDR); + (u16 __user *)__LC_EXT_CPU_ADDR); rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, @@ -201,9 +201,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_external_call++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->extcall.code, 0); - rc = put_guest(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE); + rc = put_guest(vcpu, 0x1202, (u16 __user *)__LC_EXT_INT_CODE); rc |= put_guest(vcpu, inti->extcall.code, - (u16 *)__LC_EXT_CPU_ADDR); + (u16 __user *)__LC_EXT_CPU_ADDR); rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, @@ -215,13 +215,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_service_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->ext.ext_params, 0); - rc = put_guest(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE); + rc = put_guest(vcpu, 0x2401, (u16 __user *)__LC_EXT_INT_CODE); rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, __LC_EXT_NEW_PSW, sizeof(psw_t)); rc |= put_guest(vcpu, inti->ext.ext_params, - (u32 *)__LC_EXT_PARAMS); + (u32 __user *)__LC_EXT_PARAMS); break; case KVM_S390_INT_VIRTIO: VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", @@ -230,16 +230,16 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->ext.ext_params, inti->ext.ext_params2); - rc = put_guest(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR); + rc = put_guest(vcpu, 0x2603, (u16 __user *)__LC_EXT_INT_CODE); + rc |= put_guest(vcpu, 0x0d00, (u16 __user *)__LC_EXT_CPU_ADDR); rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, __LC_EXT_NEW_PSW, sizeof(psw_t)); rc |= put_guest(vcpu, inti->ext.ext_params, - (u32 *)__LC_EXT_PARAMS); + (u32 __user *)__LC_EXT_PARAMS); rc |= put_guest(vcpu, inti->ext.ext_params2, - (u64 *)__LC_EXT_PARAMS2); + (u64 __user *)__LC_EXT_PARAMS2); break; case KVM_S390_SIGP_STOP: VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); @@ -278,9 +278,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_program_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->pgm.code, 0); - rc = put_guest(vcpu, inti->pgm.code, (u16 *)__LC_PGM_INT_CODE); + rc = put_guest(vcpu, inti->pgm.code, (u16 __user *)__LC_PGM_INT_CODE); rc |= put_guest(vcpu, table[vcpu->arch.sie_block->ipa >> 14], - (u16 *)__LC_PGM_ILC); + (u16 __user *)__LC_PGM_ILC); rc |= copy_to_guest(vcpu, __LC_PGM_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, @@ -295,7 +295,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, inti->mchk.mcic); rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED); - rc |= put_guest(vcpu, inti->mchk.mcic, (u64 *) __LC_MCCK_CODE); + rc |= put_guest(vcpu, inti->mchk.mcic, (u64 __user *) __LC_MCCK_CODE); rc |= copy_to_guest(vcpu, __LC_MCK_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, @@ -313,13 +313,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, param0, param1); rc = put_guest(vcpu, inti->io.subchannel_id, - (u16 *) __LC_SUBCHANNEL_ID); + (u16 __user *) __LC_SUBCHANNEL_ID); rc |= put_guest(vcpu, inti->io.subchannel_nr, - (u16 *) __LC_SUBCHANNEL_NR); + (u16 __user *) __LC_SUBCHANNEL_NR); rc |= put_guest(vcpu, inti->io.io_int_parm, - (u32 *) __LC_IO_INT_PARM); + (u32 __user *) __LC_IO_INT_PARM); rc |= put_guest(vcpu, inti->io.io_int_word, - (u32 *) __LC_IO_INT_WORD); + (u32 __user *) __LC_IO_INT_WORD); rc |= copy_to_guest(vcpu, __LC_IO_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, @@ -344,7 +344,7 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) return 0; if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) return 0; - rc = put_guest(vcpu, 0x1004, (u16 *)__LC_EXT_INT_CODE); + rc = put_guest(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE); rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index d64382c1ed61..7db2ad076f31 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -42,7 +42,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) } /* get the value */ - if (get_guest(vcpu, address, (u32 *) operand2)) { + if (get_guest(vcpu, address, (u32 __user *) operand2)) { kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); goto out; } @@ -83,7 +83,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) address = address & 0x7fffe000u; /* get the value */ - if (put_guest(vcpu, address, (u32 *)operand2)) { + if (put_guest(vcpu, address, (u32 __user *)operand2)) { kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); goto out; } @@ -108,7 +108,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) goto out; } - rc = put_guest(vcpu, vcpu->vcpu_id, (u16 *)useraddr); + rc = put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr); if (rc) { kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); goto out; @@ -149,18 +149,18 @@ static int handle_tpi(struct kvm_vcpu *vcpu) * Store the two-word I/O interruption code into the * provided area. */ - put_guest(vcpu, inti->io.subchannel_id, (u16 *) addr); - put_guest(vcpu, inti->io.subchannel_nr, (u16 *) (addr + 2)); - put_guest(vcpu, inti->io.io_int_parm, (u32 *) (addr + 4)); + put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr); + put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2)); + put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4)); } else { /* * Store the three-word I/O interruption code into * the appropriate lowcore area. */ - put_guest(vcpu, inti->io.subchannel_id, (u16 *) __LC_SUBCHANNEL_ID); - put_guest(vcpu, inti->io.subchannel_nr, (u16 *) __LC_SUBCHANNEL_NR); - put_guest(vcpu, inti->io.io_int_parm, (u32 *) __LC_IO_INT_PARM); - put_guest(vcpu, inti->io.io_int_word, (u32 *) __LC_IO_INT_WORD); + put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) __LC_SUBCHANNEL_ID); + put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) __LC_SUBCHANNEL_NR); + put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) __LC_IO_INT_PARM); + put_guest(vcpu, inti->io.io_int_word, (u32 __user *) __LC_IO_INT_WORD); } kfree(inti); no_interrupt: @@ -353,7 +353,7 @@ static int handle_stidp(struct kvm_vcpu *vcpu) goto out; } - rc = put_guest(vcpu, vcpu->arch.stidp_data, (u64 *)operand2); + rc = put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2); if (rc) { kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); goto out; -- GitLab From 1044b0303464788820984a5b01c0a81860dce749 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 6 Mar 2013 16:05:07 +0900 Subject: [PATCH 0058/3163] KVM: MMU: Fix and clean up for_each_gfn_* macros The expression (sp)->gfn should not be expanded using @gfn. Although no user of these macros passes a string other than gfn now, this should be fixed before anyone sees strange errors. Note: ignored the following checkpatch errors: ERROR: Macros with complex values should be enclosed in parenthesis ERROR: trailing statements should be on next line Reviewed-by: Gleb Natapov Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 956ca358108a..3e4822b449da 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1644,16 +1644,14 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list); -#define for_each_gfn_sp(kvm, sp, gfn) \ - hlist_for_each_entry(sp, \ - &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ - if ((sp)->gfn != (gfn)) {} else - -#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn) \ - hlist_for_each_entry(sp, \ - &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ - if ((sp)->gfn != (gfn) || (sp)->role.direct || \ - (sp)->role.invalid) {} else +#define for_each_gfn_sp(_kvm, _sp, _gfn) \ + hlist_for_each_entry(_sp, \ + &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ + if ((_sp)->gfn != (_gfn)) {} else + +#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ + for_each_gfn_sp(_kvm, _sp, _gfn) \ + if ((_sp)->role.direct || (_sp)->role.invalid) {} else /* @sp->gfn should be write-protected at the call site */ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, -- GitLab From 945315b9dbbe102bb3393a34ea4a10fb2a5ff303 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 6 Mar 2013 16:05:52 +0900 Subject: [PATCH 0059/3163] KVM: MMU: Use list_for_each_entry_safe in kvm_mmu_commit_zap_page() We are traversing the linked list, invalid_list, deleting each entry by kvm_mmu_free_page(). _safe version is there for such a case. Reviewed-by: Gleb Natapov Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3e4822b449da..0f42645a063c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2087,7 +2087,7 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list) { - struct kvm_mmu_page *sp; + struct kvm_mmu_page *sp, *nsp; if (list_empty(invalid_list)) return; @@ -2104,11 +2104,10 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, */ kvm_flush_remote_tlbs(kvm); - do { - sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); + list_for_each_entry_safe(sp, nsp, invalid_list, link) { WARN_ON(!sp->role.invalid || sp->root_count); kvm_mmu_free_page(sp); - } while (!list_empty(invalid_list)); + } } /* -- GitLab From 5da596078f915a62e39a20e582308eab91b88c9a Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 6 Mar 2013 16:06:58 +0900 Subject: [PATCH 0060/3163] KVM: MMU: Introduce a helper function for FIFO zapping Make the code for zapping the oldest mmu page, placed at the tail of the active list, a separate function. Reviewed-by: Gleb Natapov Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 55 +++++++++++++++++++--------------------------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 0f42645a063c..fdacabba6f62 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2110,6 +2110,21 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, } } +static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, + struct list_head *invalid_list) +{ + struct kvm_mmu_page *sp; + + if (list_empty(&kvm->arch.active_mmu_pages)) + return false; + + sp = list_entry(kvm->arch.active_mmu_pages.prev, + struct kvm_mmu_page, link); + kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); + + return true; +} + /* * Changing the number of mmu pages allocated to the vm * Note: if goal_nr_mmu_pages is too small, you will get dead lock @@ -2117,23 +2132,15 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) { LIST_HEAD(invalid_list); - /* - * If we set the number of mmu pages to be smaller be than the - * number of actived pages , we must to free some mmu pages before we - * change the value - */ spin_lock(&kvm->mmu_lock); if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { - while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages && - !list_empty(&kvm->arch.active_mmu_pages)) { - struct kvm_mmu_page *page; + /* Need to free some mmu pages to achieve the goal. */ + while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) + if (!prepare_zap_oldest_mmu_page(kvm, &invalid_list)) + break; - page = container_of(kvm->arch.active_mmu_pages.prev, - struct kvm_mmu_page, link); - kvm_mmu_prepare_zap_page(kvm, page, &invalid_list); - } kvm_mmu_commit_zap_page(kvm, &invalid_list); goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages; } @@ -4007,13 +4014,10 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) { LIST_HEAD(invalid_list); - while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES && - !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { - struct kvm_mmu_page *sp; + while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { + if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) + break; - sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, - struct kvm_mmu_page, link); - kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); ++vcpu->kvm->stat.mmu_recycled; } kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); @@ -4182,19 +4186,6 @@ void kvm_mmu_zap_all(struct kvm *kvm) spin_unlock(&kvm->mmu_lock); } -static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, - struct list_head *invalid_list) -{ - struct kvm_mmu_page *page; - - if (list_empty(&kvm->arch.active_mmu_pages)) - return; - - page = container_of(kvm->arch.active_mmu_pages.prev, - struct kvm_mmu_page, link); - kvm_mmu_prepare_zap_page(kvm, page, invalid_list); -} - static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) { struct kvm *kvm; @@ -4229,7 +4220,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); - kvm_mmu_remove_some_alloc_mmu_pages(kvm, &invalid_list); + prepare_zap_oldest_mmu_page(kvm, &invalid_list); kvm_mmu_commit_zap_page(kvm, &invalid_list); spin_unlock(&kvm->mmu_lock); -- GitLab From 12fc760fd632a96f49e96f519c4aed4eb279bb61 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Mon, 25 Feb 2013 17:38:02 +0900 Subject: [PATCH 0061/3163] f2fs: fix overflow when calculating utilization on 32-bit Use div_u64 to fix overflow when calculating utilization. *long int* is 4-bytes on 32-bit so (user blocks * 100) might be overflow if disk size is over e.g. 512GB. Signed-off-by: Changman Lee Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 552dadbb2327..e399bd4d3af8 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -464,8 +464,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) static inline int utilization(struct f2fs_sb_info *sbi) { - return (long int)valid_user_blocks(sbi) * 100 / - (long int)sbi->user_block_count; + return div_u64(valid_user_blocks(sbi) * 100, sbi->user_block_count); } /* -- GitLab From c791126b93e800c68557483235321b201c082910 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Mon, 25 Feb 2013 17:15:47 +0900 Subject: [PATCH 0062/3163] pwm_backlight: remove unnecessary ifdefs When the macro such as SIMPLE_DEV_PM_OPS is used, there is no need to use '#ifdef CONFIG_PM' to prevent build error. Thus, this patch removes unnecessary ifdefs. Signed-off-by: Jingoo Han Cc: Thierry Reding Signed-off-by: Thierry Reding --- drivers/video/backlight/pwm_bl.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index fa00304a63d8..1fea627394d7 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -274,7 +274,7 @@ static int pwm_backlight_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int pwm_backlight_suspend(struct device *dev) { struct backlight_device *bl = dev_get_drvdata(dev); @@ -296,19 +296,16 @@ static int pwm_backlight_resume(struct device *dev) backlight_update_status(bl); return 0; } +#endif static SIMPLE_DEV_PM_OPS(pwm_backlight_pm_ops, pwm_backlight_suspend, pwm_backlight_resume); -#endif - static struct platform_driver pwm_backlight_driver = { .driver = { .name = "pwm-backlight", .owner = THIS_MODULE, -#ifdef CONFIG_PM .pm = &pwm_backlight_pm_ops, -#endif .of_match_table = of_match_ptr(pwm_backlight_of_match), }, .probe = pwm_backlight_probe, -- GitLab From 482467ad97b633b28f57c347440d97c108dc4bfb Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Fri, 8 Mar 2013 12:45:58 +0900 Subject: [PATCH 0063/3163] pwm: ab8500: use devm_kzalloc() Use devm_kzalloc() to make cleanup paths more simple. Signed-off-by: Jingoo Han Signed-off-by: Thierry Reding --- drivers/pwm/pwm-ab8500.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/pwm/pwm-ab8500.c b/drivers/pwm/pwm-ab8500.c index 4248d0418273..93af1bbf88ce 100644 --- a/drivers/pwm/pwm-ab8500.c +++ b/drivers/pwm/pwm-ab8500.c @@ -99,7 +99,7 @@ static int ab8500_pwm_probe(struct platform_device *pdev) * Nothing to be done in probe, this is required to get the * device which is required for ab8500 read and write */ - ab8500 = kzalloc(sizeof(*ab8500), GFP_KERNEL); + ab8500 = devm_kzalloc(&pdev->dev, sizeof(*ab8500), GFP_KERNEL); if (ab8500 == NULL) { dev_err(&pdev->dev, "failed to allocate memory\n"); return -ENOMEM; @@ -111,10 +111,8 @@ static int ab8500_pwm_probe(struct platform_device *pdev) ab8500->chip.npwm = 1; err = pwmchip_add(&ab8500->chip); - if (err < 0) { - kfree(ab8500); + if (err < 0) return err; - } dev_dbg(&pdev->dev, "pwm probe successful\n"); platform_set_drvdata(pdev, ab8500); @@ -132,7 +130,6 @@ static int ab8500_pwm_remove(struct platform_device *pdev) return err; dev_dbg(&pdev->dev, "pwm driver removed\n"); - kfree(ab8500); return 0; } -- GitLab From c509a8e521939ca1fd4ba31feca4718328044230 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Fri, 8 Mar 2013 13:03:31 +0900 Subject: [PATCH 0064/3163] pwm: samsung: convert s3c_pwm to dev_pm_ops Instead of using legacy suspend/resume methods, using newer dev_pm_ops structure allows better control over power management. Signed-off-by: Jingoo Han Signed-off-by: Thierry Reding --- drivers/pwm/pwm-samsung.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c index 5207e6cd8648..a0ece50d70bb 100644 --- a/drivers/pwm/pwm-samsung.c +++ b/drivers/pwm/pwm-samsung.c @@ -289,10 +289,10 @@ static int s3c_pwm_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int s3c_pwm_suspend(struct platform_device *pdev, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int s3c_pwm_suspend(struct device *dev) { - struct s3c_chip *s3c = platform_get_drvdata(pdev); + struct s3c_chip *s3c = dev_get_drvdata(dev); /* No one preserve these values during suspend so reset them * Otherwise driver leaves PWM unconfigured if same values @@ -304,9 +304,9 @@ static int s3c_pwm_suspend(struct platform_device *pdev, pm_message_t state) return 0; } -static int s3c_pwm_resume(struct platform_device *pdev) +static int s3c_pwm_resume(struct device *dev) { - struct s3c_chip *s3c = platform_get_drvdata(pdev); + struct s3c_chip *s3c = dev_get_drvdata(dev); unsigned long tcon; /* Restore invertion */ @@ -316,21 +316,19 @@ static int s3c_pwm_resume(struct platform_device *pdev) return 0; } - -#else -#define s3c_pwm_suspend NULL -#define s3c_pwm_resume NULL #endif +static SIMPLE_DEV_PM_OPS(s3c_pwm_pm_ops, s3c_pwm_suspend, + s3c_pwm_resume); + static struct platform_driver s3c_pwm_driver = { .driver = { .name = "s3c24xx-pwm", .owner = THIS_MODULE, + .pm = &s3c_pwm_pm_ops, }, .probe = s3c_pwm_probe, .remove = s3c_pwm_remove, - .suspend = s3c_pwm_suspend, - .resume = s3c_pwm_resume, }; static int __init pwm_init(void) -- GitLab From b78f5fc92a836259b69d49129c6c1cad9b03c322 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Mon, 11 Mar 2013 11:12:58 +0900 Subject: [PATCH 0065/3163] pwm: pwm-tiecap: add CONFIG_PM_SLEEP to suspend/resume functions This patch adds CONFIG_PM_SLEEP to suspend/resume functions to fix the following build warning when CONFIG_PM_SLEEP is not selected. drivers/pwm/pwm-tiecap.c:314:12: warning: 'ecap_pwm_suspend' defined but not used [-Wunused-function] drivers/pwm/pwm-tiecap.c:328:12: warning: 'ecap_pwm_resume' defined but not used [-Wunused-function] Signed-off-by: Jingoo Han Signed-off-by: Thierry Reding --- drivers/pwm/pwm-tiecap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c index 22e96e2bffd3..f024db06e6c4 100644 --- a/drivers/pwm/pwm-tiecap.c +++ b/drivers/pwm/pwm-tiecap.c @@ -311,6 +311,7 @@ void ecap_pwm_restore_context(struct ecap_pwm_chip *pc) writew(pc->ctx.ecctl2, pc->mmio_base + ECCTL2); } +#ifdef CONFIG_PM_SLEEP static int ecap_pwm_suspend(struct device *dev) { struct ecap_pwm_chip *pc = dev_get_drvdata(dev); @@ -337,6 +338,7 @@ static int ecap_pwm_resume(struct device *dev) ecap_pwm_restore_context(pc); return 0; } +#endif static SIMPLE_DEV_PM_OPS(ecap_pwm_pm_ops, ecap_pwm_suspend, ecap_pwm_resume); -- GitLab From c26e9bb4ac72312386dc8ffa4dc8c1b560d9b29b Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Mon, 11 Mar 2013 11:14:16 +0900 Subject: [PATCH 0066/3163] pwm: pwm-tipwmss: add CONFIG_PM_SLEEP to suspend/resume functions This patch adds CONFIG_PM_SLEEP to suspend/resume functions to fix the following build warning when CONFIG_PM_SLEEP is not selected. drivers/pwm/pwm-tipwmss.c:104:12: warning: 'pwmss_suspend' defined but not used [-Wunused-function] drivers/pwm/pwm-tipwmss.c:113:12: warning: 'pwmss_resume' defined but not used [-Wunused-function] Signed-off-by: Jingoo Han Signed-off-by: Thierry Reding --- drivers/pwm/pwm-tipwmss.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pwm/pwm-tipwmss.c b/drivers/pwm/pwm-tipwmss.c index 17cbc59660ec..c9c3d3a1e0eb 100644 --- a/drivers/pwm/pwm-tipwmss.c +++ b/drivers/pwm/pwm-tipwmss.c @@ -101,6 +101,7 @@ static int pwmss_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP static int pwmss_suspend(struct device *dev) { struct pwmss_info *info = dev_get_drvdata(dev); @@ -118,6 +119,7 @@ static int pwmss_resume(struct device *dev) writew(info->pwmss_clkconfig, info->mmio_base + PWMSS_CLKCONFIG); return 0; } +#endif static SIMPLE_DEV_PM_OPS(pwmss_pm_ops, pwmss_suspend, pwmss_resume); -- GitLab From 29258b215af83dbb4ad143cefe7a1e5d0384a605 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Mon, 11 Mar 2013 11:14:41 +0900 Subject: [PATCH 0067/3163] pwm: pwm-tiehrpwm: add CONFIG_PM_SLEEP to suspend/resume functions This patch adds CONFIG_PM_SLEEP to suspend/resume functions to fix the following build warning when CONFIG_PM_SLEEP is not selected. drivers/pwm/pwm-tiehrpwm.c:562:12: warning: 'ehrpwm_pwm_suspend' defined but not used [-Wunused-function] drivers/pwm/pwm-tiehrpwm.c:580:12: warning: 'ehrpwm_pwm_resume' defined but not used [-Wunused-function Signed-off-by: Jingoo Han Signed-off-by: Thierry Reding --- drivers/pwm/pwm-tiehrpwm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c index 8b4c86fa99c8..d058ba90845b 100644 --- a/drivers/pwm/pwm-tiehrpwm.c +++ b/drivers/pwm/pwm-tiehrpwm.c @@ -559,6 +559,7 @@ void ehrpwm_pwm_restore_context(struct ehrpwm_pwm_chip *pc) ehrpwm_write(pc->mmio_base, TBCTL, pc->ctx.tbctl); } +#ifdef CONFIG_PM_SLEEP static int ehrpwm_pwm_suspend(struct device *dev) { struct ehrpwm_pwm_chip *pc = dev_get_drvdata(dev); @@ -594,6 +595,7 @@ static int ehrpwm_pwm_resume(struct device *dev) ehrpwm_pwm_restore_context(pc); return 0; } +#endif static SIMPLE_DEV_PM_OPS(ehrpwm_pwm_pm_ops, ehrpwm_pwm_suspend, ehrpwm_pwm_resume); -- GitLab From 3a08a8f9f0936e182d387afd85fdc5d303381521 Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Mon, 4 Mar 2013 23:32:07 +0530 Subject: [PATCH 0068/3163] kvm: Record the preemption status of vcpus using preempt notifiers Note that we mark as preempted only when vcpu's task state was Running during preemption. Thanks Jiannan, Avi for preemption notifier ideas. Thanks Gleb, PeterZ for their precious suggestions. Thanks Srikar for an idea on avoiding rcu lock while checking task state that improved overcommit numbers. Reviewed-by: Chegu Vinod Reviewed-by: Marcelo Tosatti Signed-off-by: Raghavendra K T Signed-off-by: Gleb Natapov --- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9fa13ebc3381..0f4941a9c9c8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -253,6 +253,7 @@ struct kvm_vcpu { bool dy_eligible; } spin_loop; #endif + bool preempted; struct kvm_vcpu_arch arch; }; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index faf05bddd131..470f2bc8205a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -244,6 +244,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) kvm_vcpu_set_in_spin_loop(vcpu, false); kvm_vcpu_set_dy_eligible(vcpu, false); + vcpu->preempted = false; r = kvm_arch_vcpu_init(vcpu); if (r < 0) @@ -2880,6 +2881,8 @@ struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + if (vcpu->preempted) + vcpu->preempted = false; kvm_arch_vcpu_load(vcpu, cpu); } @@ -2889,6 +2892,8 @@ static void kvm_sched_out(struct preempt_notifier *pn, { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + if (current->state == TASK_RUNNING) + vcpu->preempted = true; kvm_arch_vcpu_put(vcpu); } -- GitLab From 7bc7ae25b1438bb9fe1f176b951d758789847640 Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Mon, 4 Mar 2013 23:32:27 +0530 Subject: [PATCH 0069/3163] kvm: Iterate over only vcpus that are preempted This helps in filtering out the eligible candidates further and thus potentially helps in quickly allowing preempted lockholders to run. Note that if a vcpu was spinning during preemption we filter them by checking whether they are preempted due to pause loop exit. Reviewed-by: Chegu Vinod Reviewed-by: Marcelo Tosatti Signed-off-by: Raghavendra K T Signed-off-by: Gleb Natapov --- virt/kvm/kvm_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 470f2bc8205a..ff7154188b5f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1768,6 +1768,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; } else if (pass && i > last_boosted_vcpu) break; + if (!ACCESS_ONCE(vcpu->preempted)) + continue; if (vcpu == me) continue; if (waitqueue_active(&vcpu->wq)) -- GitLab From 0fa24ce3f57144e390a1566774c23434975a52a9 Mon Sep 17 00:00:00 2001 From: Ioan Orghici Date: Sun, 10 Mar 2013 15:46:00 +0200 Subject: [PATCH 0070/3163] kvm: remove cast for kmalloc return value Signed-off-by: Ioan Orghici Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 260da9ac1678..473a5fe7e006 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5286,8 +5286,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) } /* Create a new VMCS */ - item = (struct vmcs02_list *) - kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); + item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); if (!item) return NULL; item->vmcs02.vmcs = alloc_vmcs(); -- GitLab From eeb65d9cb5159752e672b5164110838bb5936a4a Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghiu Date: Sat, 9 Mar 2013 13:57:28 +0200 Subject: [PATCH 0071/3163] fs: pstore: Replaced calls to kmalloc and memcpy with kmemdup Replaced calls to kmalloc and memcpy with a single call to kmemdup. This patch was found using coccicheck. Signed-off-by: Alexandru Gheorghiu Acked-by: Kees Cook Signed-off-by: Tony Luck --- fs/pstore/ram.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 288f068740f6..38babb3a9384 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -156,10 +156,9 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, time->tv_nsec = 0; size = persistent_ram_old_size(prz); - *buf = kmalloc(size, GFP_KERNEL); + *buf = kmemdup(persistent_ram_old(prz), size, GFP_KERNEL); if (*buf == NULL) return -ENOMEM; - memcpy(*buf, persistent_ram_old(prz), size); return size; } -- GitLab From 03ba32cae66e3798d697e582633af2c7dd6907e5 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 11 Mar 2013 23:10:24 -0300 Subject: [PATCH 0072/3163] VMX: x86: handle host TSC calibration failure If the host TSC calibration fails, tsc_khz is zero (see tsc_init.c). Handle such case properly in KVM (instead of dividing by zero). https://bugzilla.redhat.com/show_bug.cgi?id=859282 Signed-off-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/x86.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 35b491229c3a..b67985af1753 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1079,6 +1079,10 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) u32 thresh_lo, thresh_hi; int use_scaling = 0; + /* tsc_khz can be zero if TSC calibration fails */ + if (this_tsc_khz == 0) + return; + /* Compute a scale to convert nanoseconds in TSC cycles */ kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, &vcpu->arch.virtual_tsc_shift, @@ -1156,20 +1160,23 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) ns = get_kernel_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; - /* n.b - signed multiplication and division required */ - usdiff = data - kvm->arch.last_tsc_write; + if (vcpu->arch.virtual_tsc_khz) { + /* n.b - signed multiplication and division required */ + usdiff = data - kvm->arch.last_tsc_write; #ifdef CONFIG_X86_64 - usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; + usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; #else - /* do_div() only does unsigned */ - asm("idivl %2; xor %%edx, %%edx" - : "=A"(usdiff) - : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); + /* do_div() only does unsigned */ + asm("idivl %2; xor %%edx, %%edx" + : "=A"(usdiff) + : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); #endif - do_div(elapsed, 1000); - usdiff -= elapsed; - if (usdiff < 0) - usdiff = -usdiff; + do_div(elapsed, 1000); + usdiff -= elapsed; + if (usdiff < 0) + usdiff = -usdiff; + } else + usdiff = USEC_PER_SEC; /* disable TSC match window below */ /* * Special case: TSC write with a small delta (1 second) of virtual -- GitLab From 57f252f22908535e04d520f3833a6e3116eb159d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 12 Mar 2013 10:20:24 +0100 Subject: [PATCH 0073/3163] KVM: x86: Drop unused return code from VCPU reset callback Neither vmx nor svm nor the common part may generate an error on kvm_vcpu_reset. So drop the return code. Reviewed-by: Paolo Bonzini Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm.c | 4 +--- arch/x86/kvm/vmx.c | 7 +------ arch/x86/kvm/x86.c | 15 ++++++--------- 4 files changed, 9 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 635a74d22409..348d85965ead 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -643,7 +643,7 @@ struct kvm_x86_ops { /* Create, but do not attach this VCPU */ struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); void (*vcpu_free)(struct kvm_vcpu *vcpu); - int (*vcpu_reset)(struct kvm_vcpu *vcpu); + void (*vcpu_reset)(struct kvm_vcpu *vcpu); void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e1b1ce21bc00..907e4280116d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1191,7 +1191,7 @@ static void init_vmcb(struct vcpu_svm *svm) enable_gif(svm); } -static int svm_vcpu_reset(struct kvm_vcpu *vcpu) +static void svm_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); u32 dummy; @@ -1207,8 +1207,6 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu) kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); kvm_register_write(vcpu, VCPU_REGS_RDX, eax); - - return 0; } static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 473a5fe7e006..f588171be177 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4100,11 +4100,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) return 0; } -static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) +static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u64 msr; - int ret; vmx->rmode.vm86_active = 0; @@ -4195,10 +4194,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) update_exception_bitmap(&vmx->vcpu); vpid_sync_context(vmx); - - ret = 0; - - return ret; } /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b67985af1753..fadd5a750476 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -162,7 +162,7 @@ u64 __read_mostly host_xcr0; static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); -static int kvm_vcpu_reset(struct kvm_vcpu *vcpu); +static void kvm_vcpu_reset(struct kvm_vcpu *vcpu); static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) { @@ -5858,9 +5858,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) pr_debug("vcpu %d received sipi with vector # %x\n", vcpu->vcpu_id, vcpu->arch.sipi_vector); kvm_lapic_reset(vcpu); - r = kvm_vcpu_reset(vcpu); - if (r) - return r; + kvm_vcpu_reset(vcpu); vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; } @@ -6486,9 +6484,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) r = vcpu_load(vcpu); if (r) return r; - r = kvm_vcpu_reset(vcpu); - if (r == 0) - r = kvm_mmu_setup(vcpu); + kvm_vcpu_reset(vcpu); + r = kvm_mmu_setup(vcpu); vcpu_put(vcpu); return r; @@ -6525,7 +6522,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_x86_ops->vcpu_free(vcpu); } -static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) +static void kvm_vcpu_reset(struct kvm_vcpu *vcpu) { atomic_set(&vcpu->arch.nmi_queued, 0); vcpu->arch.nmi_pending = 0; @@ -6552,7 +6549,7 @@ static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) vcpu->arch.regs_avail = ~0; vcpu->arch.regs_dirty = ~0; - return kvm_x86_ops->vcpu_reset(vcpu); + kvm_x86_ops->vcpu_reset(vcpu); } int kvm_arch_hardware_enable(void *garbage) -- GitLab From dc715452e9145156840b09df765ea2ede4851eda Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 Jan 2013 16:24:37 +0000 Subject: [PATCH 0074/3163] spi: pl022: use generic DMA slave configuration if possible With the new OF DMA binding, it is possible to completely avoid the need for platform_data for configuring a DMA channel. In cases where the platform has already been converted, calling dma_request_slave_channel should get all the necessary information from the device tree. Like the patch that converts the dw_dma controller, this is completely untested and is looking for someone to try it out. Signed-off-by: Arnd Bergmann Acked-by: Grant Likely Acked-by: Mark Brown Acked-by: Linus Walleij Cc: spi-devel-general@lists.sourceforge.net Cc: Viresh Kumar Cc: Andy Shevchenko Cc: Vinod Koul Cc: devicetree-discuss@lists.ozlabs.org Cc: linux-arm-kernel@lists.infradead.org --- .../devicetree/bindings/spi/spi_pl022.txt | 36 ++++++++++++++++ drivers/spi/spi-pl022.c | 43 ++++++++++++++++++- 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/spi_pl022.txt b/Documentation/devicetree/bindings/spi/spi_pl022.txt index f158fd31cfda..22ed6797216d 100644 --- a/Documentation/devicetree/bindings/spi/spi_pl022.txt +++ b/Documentation/devicetree/bindings/spi/spi_pl022.txt @@ -16,6 +16,11 @@ Optional properties: device will be suspended immediately - pl022,rt : indicates the controller should run the message pump with realtime priority to minimise the transfer latency on the bus (boolean) +- dmas : Two or more DMA channel specifiers following the convention outlined + in bindings/dma/dma.txt +- dma-names: Names for the dma channels, if present. There must be at + least one channel named "tx" for transmit and named "rx" for + receive. SPI slave nodes must be children of the SPI master node and can @@ -32,3 +37,34 @@ contain the following properties. - pl022,wait-state : Microwire interface: Wait state - pl022,duplex : Microwire interface: Full/Half duplex + +Example: + + spi@e0100000 { + compatible = "arm,pl022", "arm,primecell"; + reg = <0xe0100000 0x1000>; + #address-cells = <1>; + #size-cells = <0>; + interrupts = <0 31 0x4>; + dmas = <&dma-controller 23 1>, + <&dma-controller 24 0>; + dma-names = "rx", "tx"; + + m25p80@1 { + compatible = "st,m25p80"; + reg = <1>; + spi-max-frequency = <12000000>; + spi-cpol; + spi-cpha; + pl022,hierarchy = <0>; + pl022,interface = <0>; + pl022,slave-tx-disable; + pl022,com-mode = <0x2>; + pl022,rx-level-trig = <0>; + pl022,tx-level-trig = <0>; + pl022,ctrl-len = <0x11>; + pl022,wait-state = <0>; + pl022,duplex = <0>; + }; + }; + diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c index b0fe393c882c..371cc66f1a0e 100644 --- a/drivers/spi/spi-pl022.c +++ b/drivers/spi/spi-pl022.c @@ -1139,6 +1139,35 @@ static int pl022_dma_probe(struct pl022 *pl022) return -ENODEV; } +static int pl022_dma_autoprobe(struct pl022 *pl022) +{ + struct device *dev = &pl022->adev->dev; + + /* automatically configure DMA channels from platform, normally using DT */ + pl022->dma_rx_channel = dma_request_slave_channel(dev, "rx"); + if (!pl022->dma_rx_channel) + goto err_no_rxchan; + + pl022->dma_tx_channel = dma_request_slave_channel(dev, "tx"); + if (!pl022->dma_tx_channel) + goto err_no_txchan; + + pl022->dummypage = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!pl022->dummypage) + goto err_no_dummypage; + + return 0; + +err_no_dummypage: + dma_release_channel(pl022->dma_tx_channel); + pl022->dma_tx_channel = NULL; +err_no_txchan: + dma_release_channel(pl022->dma_rx_channel); + pl022->dma_rx_channel = NULL; +err_no_rxchan: + return -ENODEV; +} + static void terminate_dma(struct pl022 *pl022) { struct dma_chan *rxchan = pl022->dma_rx_channel; @@ -1167,6 +1196,11 @@ static inline int configure_dma(struct pl022 *pl022) return -ENODEV; } +static inline int pl022_dma_autoprobe(struct pl022 *pl022) +{ + return 0; +} + static inline int pl022_dma_probe(struct pl022 *pl022) { return 0; @@ -2226,8 +2260,13 @@ static int pl022_probe(struct amba_device *adev, const struct amba_id *id) goto err_no_irq; } - /* Get DMA channels */ - if (platform_info->enable_dma) { + /* Get DMA channels, try autoconfiguration first */ + status = pl022_dma_autoprobe(pl022); + + /* If that failed, use channels from platform_info */ + if (status == 0) + platform_info->enable_dma = 1; + else if (platform_info->enable_dma) { status = pl022_dma_probe(pl022); if (status != 0) platform_info->enable_dma = 0; -- GitLab From 787b0c1f8e1975157fe73104e67cac18f955281b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 Jan 2013 16:24:37 +0000 Subject: [PATCH 0075/3163] serial: pl011: use generic DMA slave configuration if possible With the new OF DMA binding, it is possible to completely avoid the need for platform_data for configuring a DMA channel. In cases where the platform has already been converted, calling dma_request_slave_channel should get all the necessary information from the device tree. This also adds a binding document specific to the pl011 controller, and extends the generic primecell binding to mention "dmas" and other common properties. Like the patch that converts the dw_dma controller, this is completely untested and is looking for someone to try it out. Signed-off-by: Arnd Bergmann Acked-by: Grant Likely Acked-by: Greg Kroah-Hartman Cc: Russell King Cc: Jiri Slaby Cc: Viresh Kumar Cc: devicetree-discuss@lists.ozlabs.org Cc: linux-arm-kernel@lists.infradead.org --- .../devicetree/bindings/arm/primecell.txt | 19 +++++- .../devicetree/bindings/serial/pl011.txt | 17 +++++ drivers/tty/serial/amba-pl011.c | 62 +++++++++++-------- 3 files changed, 72 insertions(+), 26 deletions(-) create mode 100644 Documentation/devicetree/bindings/serial/pl011.txt diff --git a/Documentation/devicetree/bindings/arm/primecell.txt b/Documentation/devicetree/bindings/arm/primecell.txt index 64fc82bc8928..0df6acacfaea 100644 --- a/Documentation/devicetree/bindings/arm/primecell.txt +++ b/Documentation/devicetree/bindings/arm/primecell.txt @@ -16,14 +16,31 @@ Optional properties: - clocks : From common clock binding. First clock is phandle to clock for apb pclk. Additional clocks are optional and specific to those peripherals. - clock-names : From common clock binding. Shall be "apb_pclk" for first clock. +- dmas : From common DMA binding. If present, refers to one or more dma channels. +- dma-names : From common DMA binding, needs to match the 'dmas' property. + Devices with exactly one receive and transmit channel shall name + these "rx" and "tx", respectively. +- pinctrl- : Pinctrl states as described in bindings/pinctrl/pinctrl-bindings.txt +- pinctrl-names : Names corresponding to the numbered pinctrl states +- interrupts : one or more interrupt specifiers +- interrupt-names : names corresponding to the interrupts properties Example: serial@fff36000 { compatible = "arm,pl011", "arm,primecell"; arm,primecell-periphid = <0x00341011>; + clocks = <&pclk>; clock-names = "apb_pclk"; - + + dmas = <&dma-controller 4>, <&dma-controller 5>; + dma-names = "rx", "tx"; + + pinctrl-0 = <&uart0_default_mux>, <&uart0_default_mode>; + pinctrl-1 = <&uart0_sleep_mode>; + pinctrl-names = "default","sleep"; + + interrupts = <0 11 0x4>; }; diff --git a/Documentation/devicetree/bindings/serial/pl011.txt b/Documentation/devicetree/bindings/serial/pl011.txt new file mode 100644 index 000000000000..5d2e840ae65c --- /dev/null +++ b/Documentation/devicetree/bindings/serial/pl011.txt @@ -0,0 +1,17 @@ +* ARM AMBA Primecell PL011 serial UART + +Required properties: +- compatible: must be "arm,primecell", "arm,pl011" +- reg: exactly one register range with length 0x1000 +- interrupts: exactly one interrupt specifier + +Optional properties: +- pinctrl: When present, must have one state named "sleep" + and one state named "default" +- clocks: When present, must refer to exactly one clock named + "apb_pclk" +- dmas: When present, may have one or two dma channels. + The first one must be named "rx", the second one + must be named "tx". + +See also bindings/arm/primecell.txt diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 3ea5408fcbeb..c25b00ef9dbb 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -245,7 +245,7 @@ static void pl011_sgbuf_free(struct dma_chan *chan, struct pl011_sgbuf *sg, } } -static void pl011_dma_probe_initcall(struct uart_amba_port *uap) +static void pl011_dma_probe_initcall(struct device *dev, struct uart_amba_port *uap) { /* DMA is the sole user of the platform data right now */ struct amba_pl011_data *plat = uap->port.dev->platform_data; @@ -259,20 +259,25 @@ static void pl011_dma_probe_initcall(struct uart_amba_port *uap) struct dma_chan *chan; dma_cap_mask_t mask; - /* We need platform data */ - if (!plat || !plat->dma_filter) { - dev_info(uap->port.dev, "no DMA platform data\n"); - return; - } + chan = dma_request_slave_channel(dev, "tx"); - /* Try to acquire a generic DMA engine slave TX channel */ - dma_cap_zero(mask); - dma_cap_set(DMA_SLAVE, mask); - - chan = dma_request_channel(mask, plat->dma_filter, plat->dma_tx_param); if (!chan) { - dev_err(uap->port.dev, "no TX DMA channel!\n"); - return; + /* We need platform data */ + if (!plat || !plat->dma_filter) { + dev_info(uap->port.dev, "no DMA platform data\n"); + return; + } + + /* Try to acquire a generic DMA engine slave TX channel */ + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + chan = dma_request_channel(mask, plat->dma_filter, + plat->dma_tx_param); + if (!chan) { + dev_err(uap->port.dev, "no TX DMA channel!\n"); + return; + } } dmaengine_slave_config(chan, &tx_conf); @@ -282,7 +287,18 @@ static void pl011_dma_probe_initcall(struct uart_amba_port *uap) dma_chan_name(uap->dmatx.chan)); /* Optionally make use of an RX channel as well */ - if (plat->dma_rx_param) { + chan = dma_request_slave_channel(dev, "rx"); + + if (!chan && plat->dma_rx_param) { + chan = dma_request_channel(mask, plat->dma_filter, plat->dma_rx_param); + + if (!chan) { + dev_err(uap->port.dev, "no RX DMA channel!\n"); + return; + } + } + + if (chan) { struct dma_slave_config rx_conf = { .src_addr = uap->port.mapbase + UART01x_DR, .src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE, @@ -291,12 +307,6 @@ static void pl011_dma_probe_initcall(struct uart_amba_port *uap) .device_fc = false, }; - chan = dma_request_channel(mask, plat->dma_filter, plat->dma_rx_param); - if (!chan) { - dev_err(uap->port.dev, "no RX DMA channel!\n"); - return; - } - dmaengine_slave_config(chan, &rx_conf); uap->dmarx.chan = chan; @@ -315,6 +325,7 @@ static void pl011_dma_probe_initcall(struct uart_amba_port *uap) struct dma_uap { struct list_head node; struct uart_amba_port *uap; + struct device *dev; }; static LIST_HEAD(pl011_dma_uarts); @@ -325,7 +336,7 @@ static int __init pl011_dma_initcall(void) list_for_each_safe(node, tmp, &pl011_dma_uarts) { struct dma_uap *dmau = list_entry(node, struct dma_uap, node); - pl011_dma_probe_initcall(dmau->uap); + pl011_dma_probe_initcall(dmau->dev, dmau->uap); list_del(node); kfree(dmau); } @@ -334,18 +345,19 @@ static int __init pl011_dma_initcall(void) device_initcall(pl011_dma_initcall); -static void pl011_dma_probe(struct uart_amba_port *uap) +static void pl011_dma_probe(struct device *dev, struct uart_amba_port *uap) { struct dma_uap *dmau = kzalloc(sizeof(struct dma_uap), GFP_KERNEL); if (dmau) { dmau->uap = uap; + dmau->dev = dev; list_add_tail(&dmau->node, &pl011_dma_uarts); } } #else -static void pl011_dma_probe(struct uart_amba_port *uap) +static void pl011_dma_probe(struct device *dev, struct uart_amba_port *uap) { - pl011_dma_probe_initcall(uap); + pl011_dma_probe_initcall(dev, uap); } #endif @@ -2020,7 +2032,7 @@ static int pl011_probe(struct amba_device *dev, const struct amba_id *id) uap->port.ops = &amba_pl011_pops; uap->port.flags = UPF_BOOT_AUTOCONF; uap->port.line = i; - pl011_dma_probe(uap); + pl011_dma_probe(&dev->dev, uap); /* Ensure interrupts from this UART are masked and cleared */ writew(0, uap->port.membase + UART011_IMSC); -- GitLab From 6e8887f60f6038e822462ff815b30074af62b847 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 Jan 2013 21:48:47 +0000 Subject: [PATCH 0076/3163] ARM: SPEAr13xx: Pass generic DW DMAC platform data from DT This replaces an earlier patch from Viresh Kumar to move the spear platform over to the generic DMA binding. This version is now based on the merged multiplatform capable spear platform, rather than the separate spear13xx/3xx/6xx directories. Signed-off-by: Arnd Bergmann Cc: Viresh Kumar Cc: Vinod Koul Cc: devicetree-discuss@lists.ozlabs.org Cc: Shiraz Hashim Cc: spear-devel@list.st.com --- arch/arm/boot/dts/spear1340.dtsi | 3 + arch/arm/boot/dts/spear13xx.dtsi | 25 ++++- arch/arm/mach-spear/generic.h | 5 - arch/arm/mach-spear/include/mach/spear.h | 2 - arch/arm/mach-spear/spear1310.c | 30 +----- arch/arm/mach-spear/spear1340.c | 32 +----- arch/arm/mach-spear/spear13xx-dma.h | 128 ----------------------- arch/arm/mach-spear/spear13xx.c | 58 ---------- 8 files changed, 29 insertions(+), 254 deletions(-) delete mode 100644 arch/arm/mach-spear/spear13xx-dma.h diff --git a/arch/arm/boot/dts/spear1340.dtsi b/arch/arm/boot/dts/spear1340.dtsi index 34da11aa6795..e1786a0b2fcd 100644 --- a/arch/arm/boot/dts/spear1340.dtsi +++ b/arch/arm/boot/dts/spear1340.dtsi @@ -113,6 +113,9 @@ reg = <0xb4100000 0x1000>; interrupts = <0 105 0x4>; status = "disabled"; + dmas = <&dwdma0 0x600 0 0 1>, /* 0xC << 11 */ + <&dwdma0 0x680 0 1 0>; /* 0xD << 7 */ + dma-names = "tx", "rx"; }; thermal@e07008c4 { diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi index b4ca60f4eb42..45597fd91050 100644 --- a/arch/arm/boot/dts/spear13xx.dtsi +++ b/arch/arm/boot/dts/spear13xx.dtsi @@ -98,13 +98,24 @@ reg = <0xb2800000 0x1000>; interrupts = <0 29 0x4>; status = "disabled"; + dmas = <&dwdma0 0 0 0 0>; + dma-names = "data"; }; - dma@ea800000 { + dwdma0: dma@ea800000 { compatible = "snps,dma-spear1340"; reg = <0xea800000 0x1000>; interrupts = <0 19 0x4>; status = "disabled"; + + dma-channels = <8>; + #dma-cells = <3>; + dma-requests = <32>; + chan_allocation_order = <1>; + chan_priority = <1>; + block_size = <0xfff>; + dma-masters = <2>; + data_width = <3 3 0 0>; }; dma@eb000000 { @@ -112,6 +123,15 @@ reg = <0xeb000000 0x1000>; interrupts = <0 59 0x4>; status = "disabled"; + + dma-requests = <32>; + dma-channels = <8>; + dma-masters = <2>; + #dma-cells = <3>; + chan_allocation_order = <1>; + chan_priority = <1>; + block_size = <0xfff>; + data_width = <3 3 0 0>; }; fsmc: flash@b0000000 { @@ -261,6 +281,9 @@ #size-cells = <0>; interrupts = <0 31 0x4>; status = "disabled"; + dmas = <&dwdma0 0x2000 0 0 0>, /* 0x4 << 11 */ + <&dwdma0 0x0280 0 0 0>; /* 0x5 << 7 */ + dma-names = "tx", "rx"; }; rtc@e0580000 { diff --git a/arch/arm/mach-spear/generic.h b/arch/arm/mach-spear/generic.h index 8ba7e75b648d..a9fd45362fee 100644 --- a/arch/arm/mach-spear/generic.h +++ b/arch/arm/mach-spear/generic.h @@ -22,11 +22,6 @@ extern void spear13xx_timer_init(void); extern void spear3xx_timer_init(void); extern struct pl022_ssp_controller pl022_plat_data; extern struct pl08x_platform_data pl080_plat_data; -extern struct dw_dma_platform_data dmac_plat_data; -extern struct dw_dma_slave cf_dma_priv; -extern struct dw_dma_slave nand_read_dma_priv; -extern struct dw_dma_slave nand_write_dma_priv; -bool dw_dma_filter(struct dma_chan *chan, void *slave); void __init spear_setup_of_timer(void); void __init spear3xx_clk_init(void __iomem *misc_base, diff --git a/arch/arm/mach-spear/include/mach/spear.h b/arch/arm/mach-spear/include/mach/spear.h index 374ddc393df1..cf3a5369eeca 100644 --- a/arch/arm/mach-spear/include/mach/spear.h +++ b/arch/arm/mach-spear/include/mach/spear.h @@ -82,8 +82,6 @@ #define VA_L2CC_BASE IOMEM(UL(0xFB000000)) /* others */ -#define DMAC0_BASE UL(0xEA800000) -#define DMAC1_BASE UL(0xEB000000) #define MCIF_CF_BASE UL(0xB2800000) /* Debug uart for linux, will be used for debug and uncompress messages */ diff --git a/arch/arm/mach-spear/spear1310.c b/arch/arm/mach-spear/spear1310.c index ed3b5c287a7b..9eaac2c881ea 100644 --- a/arch/arm/mach-spear/spear1310.c +++ b/arch/arm/mach-spear/spear1310.c @@ -23,40 +23,12 @@ #include /* Base addresses */ -#define SPEAR1310_SSP1_BASE UL(0x5D400000) -#define SPEAR1310_SATA0_BASE UL(0xB1000000) -#define SPEAR1310_SATA1_BASE UL(0xB1800000) -#define SPEAR1310_SATA2_BASE UL(0xB4000000) - #define SPEAR1310_RAS_GRP1_BASE UL(0xD8000000) #define VA_SPEAR1310_RAS_GRP1_BASE UL(0xFA000000) -static struct arasan_cf_pdata cf_pdata = { - .cf_if_clk = CF_IF_CLK_166M, - .quirk = CF_BROKEN_UDMA, - .dma_priv = &cf_dma_priv, -}; - -/* ssp device registration */ -static struct pl022_ssp_controller ssp1_plat_data = { - .enable_dma = 0, -}; - -/* Add SPEAr1310 auxdata to pass platform data */ -static struct of_dev_auxdata spear1310_auxdata_lookup[] __initdata = { - OF_DEV_AUXDATA("arasan,cf-spear1340", MCIF_CF_BASE, NULL, &cf_pdata), - OF_DEV_AUXDATA("snps,dma-spear1340", DMAC0_BASE, NULL, &dmac_plat_data), - OF_DEV_AUXDATA("snps,dma-spear1340", DMAC1_BASE, NULL, &dmac_plat_data), - OF_DEV_AUXDATA("arm,pl022", SSP_BASE, NULL, &pl022_plat_data), - - OF_DEV_AUXDATA("arm,pl022", SPEAR1310_SSP1_BASE, NULL, &ssp1_plat_data), - {} -}; - static void __init spear1310_dt_init(void) { - of_platform_populate(NULL, of_default_bus_match_table, - spear1310_auxdata_lookup, NULL); + of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); } static const char * const spear1310_dt_board_compat[] = { diff --git a/arch/arm/mach-spear/spear1340.c b/arch/arm/mach-spear/spear1340.c index 75e38644bbfb..a04a7fe76f71 100644 --- a/arch/arm/mach-spear/spear1340.c +++ b/arch/arm/mach-spear/spear1340.c @@ -16,18 +16,16 @@ #include #include #include -#include #include #include #include #include "generic.h" #include -#include "spear13xx-dma.h" +/* FIXME: Move SATA PHY code into a standalone driver */ /* Base addresses */ #define SPEAR1340_SATA_BASE UL(0xB1000000) -#define SPEAR1340_UART1_BASE UL(0xB4100000) /* Power Management Registers */ #define SPEAR1340_PCM_CFG (VA_MISC_BASE + 0x100) @@ -79,28 +77,6 @@ (SPEAR1340_MIPHY_OSC_BYPASS_EXT | \ SPEAR1340_MIPHY_PLL_RATIO_TOP(25)) -static struct dw_dma_slave uart1_dma_param[] = { - { - /* Tx */ - .cfg_hi = DWC_CFGH_DST_PER(SPEAR1340_DMA_REQ_UART1_TX), - .cfg_lo = 0, - .src_master = DMA_MASTER_MEMORY, - .dst_master = SPEAR1340_DMA_MASTER_UART1, - }, { - /* Rx */ - .cfg_hi = DWC_CFGH_SRC_PER(SPEAR1340_DMA_REQ_UART1_RX), - .cfg_lo = 0, - .src_master = SPEAR1340_DMA_MASTER_UART1, - .dst_master = DMA_MASTER_MEMORY, - } -}; - -static struct amba_pl011_data uart1_data = { - .dma_filter = dw_dma_filter, - .dma_tx_param = &uart1_dma_param[0], - .dma_rx_param = &uart1_dma_param[1], -}; - /* SATA device registration */ static int sata_miphy_init(struct device *dev, void __iomem *addr) { @@ -159,14 +135,8 @@ static struct ahci_platform_data sata_pdata = { /* Add SPEAr1340 auxdata to pass platform data */ static struct of_dev_auxdata spear1340_auxdata_lookup[] __initdata = { - OF_DEV_AUXDATA("arasan,cf-spear1340", MCIF_CF_BASE, NULL, &cf_dma_priv), - OF_DEV_AUXDATA("snps,dma-spear1340", DMAC0_BASE, NULL, &dmac_plat_data), - OF_DEV_AUXDATA("snps,dma-spear1340", DMAC1_BASE, NULL, &dmac_plat_data), - OF_DEV_AUXDATA("arm,pl022", SSP_BASE, NULL, &pl022_plat_data), - OF_DEV_AUXDATA("snps,spear-ahci", SPEAR1340_SATA_BASE, NULL, &sata_pdata), - OF_DEV_AUXDATA("arm,pl011", SPEAR1340_UART1_BASE, NULL, &uart1_data), {} }; diff --git a/arch/arm/mach-spear/spear13xx-dma.h b/arch/arm/mach-spear/spear13xx-dma.h deleted file mode 100644 index d50bdb605925..000000000000 --- a/arch/arm/mach-spear/spear13xx-dma.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - * arch/arm/mach-spear13xx/include/mach/dma.h - * - * DMA information for SPEAr13xx machine family - * - * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#ifndef __MACH_DMA_H -#define __MACH_DMA_H - -/* request id of all the peripherals */ -enum dma_master_info { - /* Accessible from only one master */ - DMA_MASTER_MCIF = 0, - DMA_MASTER_FSMC = 1, - /* Accessible from both 0 & 1 */ - DMA_MASTER_MEMORY = 0, - DMA_MASTER_ADC = 0, - DMA_MASTER_UART0 = 0, - DMA_MASTER_SSP0 = 0, - DMA_MASTER_I2C0 = 0, - -#ifdef CONFIG_MACH_SPEAR1310 - /* Accessible from only one master */ - SPEAR1310_DMA_MASTER_JPEG = 1, - - /* Accessible from both 0 & 1 */ - SPEAR1310_DMA_MASTER_I2S = 0, - SPEAR1310_DMA_MASTER_UART1 = 0, - SPEAR1310_DMA_MASTER_UART2 = 0, - SPEAR1310_DMA_MASTER_UART3 = 0, - SPEAR1310_DMA_MASTER_UART4 = 0, - SPEAR1310_DMA_MASTER_UART5 = 0, - SPEAR1310_DMA_MASTER_I2C1 = 0, - SPEAR1310_DMA_MASTER_I2C2 = 0, - SPEAR1310_DMA_MASTER_I2C3 = 0, - SPEAR1310_DMA_MASTER_I2C4 = 0, - SPEAR1310_DMA_MASTER_I2C5 = 0, - SPEAR1310_DMA_MASTER_I2C6 = 0, - SPEAR1310_DMA_MASTER_I2C7 = 0, - SPEAR1310_DMA_MASTER_SSP1 = 0, -#endif - -#ifdef CONFIG_MACH_SPEAR1340 - /* Accessible from only one master */ - SPEAR1340_DMA_MASTER_I2S_PLAY = 1, - SPEAR1340_DMA_MASTER_I2S_REC = 1, - SPEAR1340_DMA_MASTER_I2C1 = 1, - SPEAR1340_DMA_MASTER_UART1 = 1, - - /* following are accessible from both master 0 & 1 */ - SPEAR1340_DMA_MASTER_SPDIF = 0, - SPEAR1340_DMA_MASTER_CAM = 1, - SPEAR1340_DMA_MASTER_VIDEO_IN = 0, - SPEAR1340_DMA_MASTER_MALI = 0, -#endif -}; - -enum request_id { - DMA_REQ_ADC = 0, - DMA_REQ_SSP0_TX = 4, - DMA_REQ_SSP0_RX = 5, - DMA_REQ_UART0_TX = 6, - DMA_REQ_UART0_RX = 7, - DMA_REQ_I2C0_TX = 8, - DMA_REQ_I2C0_RX = 9, - -#ifdef CONFIG_MACH_SPEAR1310 - SPEAR1310_DMA_REQ_FROM_JPEG = 2, - SPEAR1310_DMA_REQ_TO_JPEG = 3, - SPEAR1310_DMA_REQ_I2S_TX = 10, - SPEAR1310_DMA_REQ_I2S_RX = 11, - - SPEAR1310_DMA_REQ_I2C1_RX = 0, - SPEAR1310_DMA_REQ_I2C1_TX = 1, - SPEAR1310_DMA_REQ_I2C2_RX = 2, - SPEAR1310_DMA_REQ_I2C2_TX = 3, - SPEAR1310_DMA_REQ_I2C3_RX = 4, - SPEAR1310_DMA_REQ_I2C3_TX = 5, - SPEAR1310_DMA_REQ_I2C4_RX = 6, - SPEAR1310_DMA_REQ_I2C4_TX = 7, - SPEAR1310_DMA_REQ_I2C5_RX = 8, - SPEAR1310_DMA_REQ_I2C5_TX = 9, - SPEAR1310_DMA_REQ_I2C6_RX = 10, - SPEAR1310_DMA_REQ_I2C6_TX = 11, - SPEAR1310_DMA_REQ_UART1_RX = 12, - SPEAR1310_DMA_REQ_UART1_TX = 13, - SPEAR1310_DMA_REQ_UART2_RX = 14, - SPEAR1310_DMA_REQ_UART2_TX = 15, - SPEAR1310_DMA_REQ_UART5_RX = 16, - SPEAR1310_DMA_REQ_UART5_TX = 17, - SPEAR1310_DMA_REQ_SSP1_RX = 18, - SPEAR1310_DMA_REQ_SSP1_TX = 19, - SPEAR1310_DMA_REQ_I2C7_RX = 20, - SPEAR1310_DMA_REQ_I2C7_TX = 21, - SPEAR1310_DMA_REQ_UART3_RX = 28, - SPEAR1310_DMA_REQ_UART3_TX = 29, - SPEAR1310_DMA_REQ_UART4_RX = 30, - SPEAR1310_DMA_REQ_UART4_TX = 31, -#endif - -#ifdef CONFIG_MACH_SPEAR1340 - SPEAR1340_DMA_REQ_SPDIF_TX = 2, - SPEAR1340_DMA_REQ_SPDIF_RX = 3, - SPEAR1340_DMA_REQ_I2S_TX = 10, - SPEAR1340_DMA_REQ_I2S_RX = 11, - SPEAR1340_DMA_REQ_UART1_TX = 12, - SPEAR1340_DMA_REQ_UART1_RX = 13, - SPEAR1340_DMA_REQ_I2C1_TX = 14, - SPEAR1340_DMA_REQ_I2C1_RX = 15, - SPEAR1340_DMA_REQ_CAM0_EVEN = 0, - SPEAR1340_DMA_REQ_CAM0_ODD = 1, - SPEAR1340_DMA_REQ_CAM1_EVEN = 2, - SPEAR1340_DMA_REQ_CAM1_ODD = 3, - SPEAR1340_DMA_REQ_CAM2_EVEN = 4, - SPEAR1340_DMA_REQ_CAM2_ODD = 5, - SPEAR1340_DMA_REQ_CAM3_EVEN = 6, - SPEAR1340_DMA_REQ_CAM3_ODD = 7, -#endif -}; - -#endif /* __MACH_DMA_H */ diff --git a/arch/arm/mach-spear/spear13xx.c b/arch/arm/mach-spear/spear13xx.c index 1b97e8623472..3975916666a0 100644 --- a/arch/arm/mach-spear/spear13xx.c +++ b/arch/arm/mach-spear/spear13xx.c @@ -15,7 +15,6 @@ #include #include -#include #include #include #include @@ -24,63 +23,6 @@ #include "generic.h" #include -#include "spear13xx-dma.h" - -/* common dw_dma filter routine to be used by peripherals */ -bool dw_dma_filter(struct dma_chan *chan, void *slave) -{ - struct dw_dma_slave *dws = (struct dw_dma_slave *)slave; - - if (chan->device->dev == dws->dma_dev) { - chan->private = slave; - return true; - } else { - return false; - } -} - -/* ssp device registration */ -static struct dw_dma_slave ssp_dma_param[] = { - { - /* Tx */ - .cfg_hi = DWC_CFGH_DST_PER(DMA_REQ_SSP0_TX), - .cfg_lo = 0, - .src_master = DMA_MASTER_MEMORY, - .dst_master = DMA_MASTER_SSP0, - }, { - /* Rx */ - .cfg_hi = DWC_CFGH_SRC_PER(DMA_REQ_SSP0_RX), - .cfg_lo = 0, - .src_master = DMA_MASTER_SSP0, - .dst_master = DMA_MASTER_MEMORY, - } -}; - -struct pl022_ssp_controller pl022_plat_data = { - .enable_dma = 1, - .dma_filter = dw_dma_filter, - .dma_rx_param = &ssp_dma_param[1], - .dma_tx_param = &ssp_dma_param[0], -}; - -/* CF device registration */ -struct dw_dma_slave cf_dma_priv = { - .cfg_hi = 0, - .cfg_lo = 0, - .src_master = 0, - .dst_master = 0, -}; - -/* dmac device registeration */ -struct dw_dma_platform_data dmac_plat_data = { - .nr_channels = 8, - .chan_allocation_order = CHAN_ALLOCATION_DESCENDING, - .chan_priority = CHAN_PRIORITY_DESCENDING, - .block_size = 4095U, - .nr_masters = 2, - .data_width = { 3, 3, 0, 0 }, -}; - void __init spear13xx_l2x0_init(void) { /* -- GitLab From 5d218814328da91a27e982748443e7e375e11396 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 12 Mar 2013 22:36:43 -0300 Subject: [PATCH 0077/3163] KVM: MMU: make kvm_mmu_available_pages robust against n_used_mmu_pages > n_max_mmu_pages As noticed by Ulrich Obergfell , the mmu counters are for beancounting purposes only - so n_used_mmu_pages and n_max_mmu_pages could be relaxed (example: before f0f5933a1626c8df7b), resulting in n_used_mmu_pages > n_max_mmu_pages. Make code robust against n_used_mmu_pages > n_max_mmu_pages. Reviewed-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/mmu.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 69871080e866..3b1ad0049ea4 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -57,8 +57,11 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) { - return kvm->arch.n_max_mmu_pages - - kvm->arch.n_used_mmu_pages; + if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages) + return kvm->arch.n_max_mmu_pages - + kvm->arch.n_used_mmu_pages; + + return 0; } static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) -- GitLab From 66450a21f99636af4fafac2afd33f1a40631bc3a Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 13 Mar 2013 12:42:34 +0100 Subject: [PATCH 0078/3163] KVM: x86: Rework INIT and SIPI handling A VCPU sending INIT or SIPI to some other VCPU races for setting the remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED was overwritten by kvm_emulate_halt and, thus, got lost. This introduces APIC events for those two signals, keeping them in kvm_apic until kvm_apic_accept_events is run over the target vcpu context. kvm_apic_has_events reports to kvm_arch_vcpu_runnable if there are pending events, thus if vcpu blocking should end. The patch comes with the side effect of effectively obsoleting KVM_MP_STATE_SIPI_RECEIVED. We still accept it from user space, but immediately translate it to KVM_MP_STATE_INIT_RECEIVED + KVM_APIC_SIPI. The vcpu itself will no longer enter the KVM_MP_STATE_SIPI_RECEIVED state. That also means we no longer exit to user space after receiving a SIPI event. Furthermore, we already reset the VCPU on INIT, only fixing up the code segment later on when SIPI arrives. Moreover, we fix INIT handling for the BSP: it never enter wait-for-SIPI but directly starts over on INIT. Tested-by: Paolo Bonzini Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 3 +- arch/x86/kvm/lapic.c | 48 ++++++++++++++++++++++----- arch/x86/kvm/lapic.h | 11 +++++++ arch/x86/kvm/svm.c | 6 ---- arch/x86/kvm/vmx.c | 12 ++----- arch/x86/kvm/x86.c | 58 +++++++++++++++++++++------------ 6 files changed, 93 insertions(+), 45 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 348d85965ead..ef7f4a5cf8c7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -345,7 +345,6 @@ struct kvm_vcpu_arch { unsigned long apic_attention; int32_t apic_arb_prio; int mp_state; - int sipi_vector; u64 ia32_misc_enable_msr; bool tpr_access_reporting; @@ -819,6 +818,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); +void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector); int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); @@ -1002,6 +1002,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); +void kvm_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_define_shared_msr(unsigned index, u32 msr); void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 02b51dd4e4ad..a8e9369f41c5 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -731,7 +731,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_INIT: if (!trig_mode || level) { result = 1; - vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + /* assumes that there are only KVM_APIC_INIT/SIPI */ + apic->pending_events = (1UL << KVM_APIC_INIT); + /* make sure pending_events is visible before sending + * the request */ + smp_wmb(); kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); } else { @@ -743,13 +747,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_STARTUP: apic_debug("SIPI to vcpu %d vector 0x%02x\n", vcpu->vcpu_id, vector); - if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { - result = 1; - vcpu->arch.sipi_vector = vector; - vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; - kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_vcpu_kick(vcpu); - } + result = 1; + apic->sipi_vector = vector; + /* make sure sipi_vector is visible for the receiver */ + smp_wmb(); + set_bit(KVM_APIC_SIPI, &apic->pending_events); + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); break; case APIC_DM_EXTINT: @@ -1860,6 +1864,34 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) addr); } +void kvm_apic_accept_events(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + unsigned int sipi_vector; + + if (!kvm_vcpu_has_lapic(vcpu)) + return; + + if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) { + kvm_lapic_reset(vcpu); + kvm_vcpu_reset(vcpu); + if (kvm_vcpu_is_bsp(apic->vcpu)) + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + else + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + } + if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events) && + vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + /* evaluate pending_events before reading the vector */ + smp_rmb(); + sipi_vector = apic->sipi_vector; + pr_debug("vcpu %d received sipi with vector # %x\n", + vcpu->vcpu_id, sipi_vector); + kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + } +} + void kvm_lapic_init(void) { /* do not patch jump label more than once per second */ diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 1676d34ddb4e..2c721b986eec 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -5,6 +5,9 @@ #include +#define KVM_APIC_INIT 0 +#define KVM_APIC_SIPI 1 + struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ @@ -32,6 +35,8 @@ struct kvm_lapic { void *regs; gpa_t vapic_addr; struct page *vapic_page; + unsigned long pending_events; + unsigned int sipi_vector; }; int kvm_create_lapic(struct kvm_vcpu *vcpu); void kvm_free_lapic(struct kvm_vcpu *vcpu); @@ -39,6 +44,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); +void kvm_apic_accept_events(struct kvm_vcpu *vcpu); void kvm_lapic_reset(struct kvm_vcpu *vcpu); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); @@ -158,4 +164,9 @@ void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, u64 *eoi_bitmap); +static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.apic->pending_events; +} + #endif diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 907e4280116d..7219a4012a0e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1199,12 +1199,6 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu) init_vmcb(svm); - if (!kvm_vcpu_is_bsp(vcpu)) { - kvm_rip_write(vcpu, 0); - svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12; - svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8; - } - kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); kvm_register_write(vcpu, VCPU_REGS_RDX, eax); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f588171be177..af1ffaf20892 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4119,12 +4119,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx_segment_cache_clear(vmx); seg_setup(VCPU_SREG_CS); - if (kvm_vcpu_is_bsp(&vmx->vcpu)) - vmcs_write16(GUEST_CS_SELECTOR, 0xf000); - else { - vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); - vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); - } + vmcs_write16(GUEST_CS_SELECTOR, 0xf000); seg_setup(VCPU_SREG_DS); seg_setup(VCPU_SREG_ES); @@ -4147,10 +4142,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_SYSENTER_EIP, 0); vmcs_writel(GUEST_RFLAGS, 0x02); - if (kvm_vcpu_is_bsp(&vmx->vcpu)) - kvm_rip_write(vcpu, 0xfff0); - else - kvm_rip_write(vcpu, 0); + kvm_rip_write(vcpu, 0xfff0); vmcs_writel(GUEST_GDTR_BASE, 0); vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fadd5a750476..61a5bb60af86 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -162,8 +162,6 @@ u64 __read_mostly host_xcr0; static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); -static void kvm_vcpu_reset(struct kvm_vcpu *vcpu); - static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) { int i; @@ -2830,10 +2828,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); events->nmi.pad = 0; - events->sipi_vector = vcpu->arch.sipi_vector; + events->sipi_vector = 0; /* never valid when reporting to user space */ events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING - | KVM_VCPUEVENT_VALID_SIPI_VECTOR | KVM_VCPUEVENT_VALID_SHADOW); memset(&events->reserved, 0, sizeof(events->reserved)); } @@ -2864,8 +2861,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.nmi_pending = events->nmi.pending; kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); - if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) - vcpu->arch.sipi_vector = events->sipi_vector; + if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR && + kvm_vcpu_has_lapic(vcpu)) + vcpu->arch.apic->sipi_vector = events->sipi_vector; kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -5720,6 +5718,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { + kvm_apic_accept_events(vcpu); + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + r = 1; + goto out; + } + inject_pending_event(vcpu); /* enable NMI/IRQ window open exits if needed */ @@ -5854,14 +5858,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) int r; struct kvm *kvm = vcpu->kvm; - if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { - pr_debug("vcpu %d received sipi with vector # %x\n", - vcpu->vcpu_id, vcpu->arch.sipi_vector); - kvm_lapic_reset(vcpu); - kvm_vcpu_reset(vcpu); - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - } - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); r = vapic_enter(vcpu); if (r) { @@ -5878,8 +5874,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); kvm_vcpu_block(vcpu); vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); - if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) - { + if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { + kvm_apic_accept_events(vcpu); switch(vcpu->arch.mp_state) { case KVM_MP_STATE_HALTED: vcpu->arch.mp_state = @@ -5887,7 +5883,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) case KVM_MP_STATE_RUNNABLE: vcpu->arch.apf.halted = false; break; - case KVM_MP_STATE_SIPI_RECEIVED: + case KVM_MP_STATE_INIT_RECEIVED: + break; default: r = -EINTR; break; @@ -6022,6 +6019,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { kvm_vcpu_block(vcpu); + kvm_apic_accept_events(vcpu); clear_bit(KVM_REQ_UNHALT, &vcpu->requests); r = -EAGAIN; goto out; @@ -6178,6 +6176,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { + kvm_apic_accept_events(vcpu); mp_state->mp_state = vcpu->arch.mp_state; return 0; } @@ -6185,7 +6184,15 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - vcpu->arch.mp_state = mp_state->mp_state; + if (!kvm_vcpu_has_lapic(vcpu) && + mp_state->mp_state != KVM_MP_STATE_RUNNABLE) + return -EINVAL; + + if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events); + } else + vcpu->arch.mp_state = mp_state->mp_state; kvm_make_request(KVM_REQ_EVENT, vcpu); return 0; } @@ -6522,7 +6529,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_x86_ops->vcpu_free(vcpu); } -static void kvm_vcpu_reset(struct kvm_vcpu *vcpu) +void kvm_vcpu_reset(struct kvm_vcpu *vcpu) { atomic_set(&vcpu->arch.nmi_queued, 0); vcpu->arch.nmi_pending = 0; @@ -6552,6 +6559,17 @@ static void kvm_vcpu_reset(struct kvm_vcpu *vcpu) kvm_x86_ops->vcpu_reset(vcpu); } +void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) +{ + struct kvm_segment cs; + + kvm_get_segment(vcpu, &cs, VCPU_SREG_CS); + cs.selector = vector << 8; + cs.base = vector << 12; + kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); + kvm_rip_write(vcpu, 0); +} + int kvm_arch_hardware_enable(void *garbage) { struct kvm *kvm; @@ -6995,7 +7013,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted) || !list_empty_careful(&vcpu->async_pf.done) - || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED + || kvm_apic_has_events(vcpu) || atomic_read(&vcpu->arch.nmi_queued) || (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu)); -- GitLab From eabeaaccfca0ed61b8e00a09b8cfa703c4f11b59 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 13 Mar 2013 11:30:50 +0100 Subject: [PATCH 0079/3163] KVM: nVMX: Clean up and fix pin-based execution controls Only interrupt and NMI exiting are mandatory for KVM to work, thus can be exposed to the guest unconditionally, virtual NMI exiting is optional. So we must not advertise it unless the host supports it. Introduce the symbolic constant PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR at this chance. Reviewed-by:: Paolo Bonzini Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/include/asm/vmx.h | 2 ++ arch/x86/kvm/vmx.c | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 5fb6e24f0649..3c9f455bacee 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -71,6 +71,8 @@ #define PIN_BASED_NMI_EXITING 0x00000008 #define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 + #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 #define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index af1ffaf20892..8eaabfb20232 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2040,14 +2040,16 @@ static __init void nested_vmx_setup_ctls_msrs(void) */ /* pin-based controls */ + rdmsr(MSR_IA32_VMX_PINBASED_CTLS, + nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high); /* * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR. */ - nested_vmx_pinbased_ctls_low = 0x16 ; - nested_vmx_pinbased_ctls_high = 0x16 | - PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | - PIN_BASED_VIRTUAL_NMIS; + nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; + nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | + PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; + nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; /* * Exit controls -- GitLab From c18911a23ce1dec27fa3325b50587de2569d26f8 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 13 Mar 2013 16:06:41 +0100 Subject: [PATCH 0080/3163] KVM: nVMX: Provide EFER.LMA saving support We will need EFER.LMA saving to provide unrestricted guest mode. All what is missing for this is picking up EFER.LMA from VM_ENTRY_CONTROLS on L2->L1 switches. If the host does not support EFER.LMA saving, no change is performed, otherwise we properly emulate for L1 what the hardware does for L0. Advertise the support, depending on the host feature. Reviewed-by: Paolo Bonzini Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/include/asm/vmx.h | 2 ++ arch/x86/kvm/vmx.c | 13 ++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 3c9f455bacee..056bda586a45 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -95,6 +95,8 @@ #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff +#define VMX_MISC_SAVE_EFER_LMA 0x00000020 + /* VMCS Encodings */ enum vmcs_field { VIRTUAL_PROCESSOR_ID = 0x00000000, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8eaabfb20232..02f8c32b9b08 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2022,6 +2022,7 @@ static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; +static u32 nested_vmx_misc_low, nested_vmx_misc_high; static __init void nested_vmx_setup_ctls_msrs(void) { /* @@ -2106,6 +2107,11 @@ static __init void nested_vmx_setup_ctls_msrs(void) nested_vmx_secondary_ctls_high &= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_WBINVD_EXITING; + + /* miscellaneous data */ + rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); + nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA; + nested_vmx_misc_high = 0; } static inline bool vmx_control_verify(u32 control, u32 low, u32 high) @@ -2176,7 +2182,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) nested_vmx_entry_ctls_high); break; case MSR_IA32_VMX_MISC: - *pdata = 0; + *pdata = vmx_control_msr(nested_vmx_misc_low, + nested_vmx_misc_high); break; /* * These MSRs specify bits which the guest must keep fixed (on or off) @@ -7398,6 +7405,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_pending_dbg_exceptions = vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); + vmcs12->vm_entry_controls = + (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | + (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); + /* TODO: These cannot have changed unless we have MSR bitmaps and * the relevant bit asks not to trap the change */ vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); -- GitLab From 0238ea913c21a89387f93097acfbdfeebc9c9257 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 13 Mar 2013 11:31:24 +0100 Subject: [PATCH 0081/3163] KVM: nVMX: Add preemption timer support Provided the host has this feature, it's straightforward to offer it to the guest as well. We just need to load to timer value on L2 entry if the feature was enabled by L1 and watch out for the corresponding exit reason. Reviewed-by: Paolo Bonzini Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/include/asm/vmx.h | 3 +++ arch/x86/include/uapi/asm/vmx.h | 5 +++-- arch/x86/kvm/vmx.c | 17 ++++++++++++++--- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 056bda586a45..fc1c3134473b 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -70,6 +70,7 @@ #define PIN_BASED_EXT_INTR_MASK 0x00000001 #define PIN_BASED_NMI_EXITING 0x00000008 #define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 @@ -95,6 +96,7 @@ #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff +#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f #define VMX_MISC_SAVE_EFER_LMA 0x00000020 /* VMCS Encodings */ @@ -217,6 +219,7 @@ enum vmcs_field { GUEST_INTERRUPTIBILITY_INFO = 0x00004824, GUEST_ACTIVITY_STATE = 0X00004826, GUEST_SYSENTER_CS = 0x0000482A, + VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, HOST_IA32_SYSENTER_CS = 0x00004c00, CR0_GUEST_HOST_MASK = 0x00006000, CR4_GUEST_HOST_MASK = 0x00006002, diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 2871fccfee68..d651082c7cf7 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -65,6 +65,7 @@ #define EXIT_REASON_EOI_INDUCED 45 #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_PREEMPTION_TIMER 52 #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 #define EXIT_REASON_APIC_WRITE 56 @@ -110,7 +111,7 @@ { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ { EXIT_REASON_INVD, "INVD" }, \ - { EXIT_REASON_INVPCID, "INVPCID" } - + { EXIT_REASON_INVPCID, "INVPCID" }, \ + { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" } #endif /* _UAPIVMX_H */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 02f8c32b9b08..17a693868458 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -298,7 +298,8 @@ struct __packed vmcs12 { u32 guest_activity_state; u32 guest_sysenter_cs; u32 host_ia32_sysenter_cs; - u32 padding32[8]; /* room for future expansion */ + u32 vmx_preemption_timer_value; + u32 padding32[7]; /* room for future expansion */ u16 virtual_processor_id; u16 guest_es_selector; u16 guest_cs_selector; @@ -537,6 +538,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { FIELD(GUEST_ACTIVITY_STATE, guest_activity_state), FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs), FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs), + FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value), FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask), FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask), FIELD(CR0_READ_SHADOW, cr0_read_shadow), @@ -2049,7 +2051,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) */ nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | - PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; + PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS | + PIN_BASED_VMX_PREEMPTION_TIMER; nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; /* @@ -2110,7 +2113,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) /* miscellaneous data */ rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); - nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA; + nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | + VMX_MISC_SAVE_EFER_LMA; nested_vmx_misc_high = 0; } @@ -6190,6 +6194,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_EPT_VIOLATION: case EXIT_REASON_EPT_MISCONFIG: return 0; + case EXIT_REASON_PREEMPTION_TIMER: + return vmcs12->pin_based_vm_exec_control & + PIN_BASED_VMX_PREEMPTION_TIMER; case EXIT_REASON_WBINVD: return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); case EXIT_REASON_XSETBV: @@ -7011,6 +7018,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) (vmcs_config.pin_based_exec_ctrl | vmcs12->pin_based_vm_exec_control)); + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, + vmcs12->vmx_preemption_timer_value); + /* * Whether page-faults are trapped is determined by a combination of * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. -- GitLab From 95b0430d1a53541076ffbaf453f8b49a547cceba Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Tue, 12 Mar 2013 17:44:40 +0900 Subject: [PATCH 0082/3163] KVM: MMU: Mark sp mmio cached when creating mmio spte This will be used not to zap unrelated mmu pages when creating/moving a memory slot later. Reviewed-by: Marcelo Tosatti Signed-off-by: Takuya Yoshikawa Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ef7f4a5cf8c7..9b75cae83d10 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -230,6 +230,7 @@ struct kvm_mmu_page { #endif int write_flooding_count; + bool mmio_cached; }; struct kvm_pio_request { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index fdacabba6f62..de45ec195346 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -199,8 +199,11 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access) { + struct kvm_mmu_page *sp = page_header(__pa(sptep)); + access &= ACC_WRITE_MASK | ACC_USER_MASK; + sp->mmio_cached = true; trace_mark_mmio_spte(sptep, gfn, access); mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT); } -- GitLab From 982b3394dd23eec6e5a2f7871238435a167b63cc Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Tue, 12 Mar 2013 17:45:30 +0900 Subject: [PATCH 0083/3163] KVM: x86: Optimize mmio spte zapping when creating/moving memslot When we create or move a memory slot, we need to zap mmio sptes. Currently, zap_all() is used for this and this is causing two problems: - extra page faults after zapping mmu pages - long mmu_lock hold time during zapping mmu pages For the latter, Marcelo reported a disastrous mmu_lock hold time during hot-plug, which made the guest unresponsive for a long time. This patch takes a simple way to fix these problems: do not zap mmu pages unless they are marked mmio cached. On our test box, this took only 50us for the 4GB guest and we did not see ms of mmu_lock hold time any more. Note that we still need to do zap_all() for other cases. So another work is also needed: Xiao's work may be the one. Reviewed-by: Marcelo Tosatti Signed-off-by: Takuya Yoshikawa Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 18 ++++++++++++++++++ arch/x86/kvm/x86.c | 2 +- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9b75cae83d10..3f205c6cde59 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -767,6 +767,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm); unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index de45ec195346..c1a9b7b08ab7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4189,6 +4189,24 @@ void kvm_mmu_zap_all(struct kvm *kvm) spin_unlock(&kvm->mmu_lock); } +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) +{ + struct kvm_mmu_page *sp, *node; + LIST_HEAD(invalid_list); + + spin_lock(&kvm->mmu_lock); +restart: + list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { + if (!sp->mmio_cached) + continue; + if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) + goto restart; + } + + kvm_mmu_commit_zap_page(kvm, &invalid_list); + spin_unlock(&kvm->mmu_lock); +} + static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) { struct kvm *kvm; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 61a5bb60af86..d3c478742e2c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6991,7 +6991,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * mmio sptes. */ if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { - kvm_mmu_zap_all(kvm); + kvm_mmu_zap_mmio_sptes(kvm); kvm_reload_remote_mmus(kvm); } } -- GitLab From fe9ab00f8354a4c388e30301859c5741590c3809 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 14 Mar 2013 13:21:00 +0000 Subject: [PATCH 0084/3163] dell-laptop: Fix krealloc() misuse in parse_da_table() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If krealloc() returns NULL, it *doesn't* free the original. So any code of the form 'foo = krealloc(foo, …);' is almost certainly a bug. Signed-off-by: David Woodhouse --- drivers/platform/x86/dell-laptop.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index fa3ee6209572..1134119521ac 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -284,6 +284,7 @@ static void __init parse_da_table(const struct dmi_header *dm) { /* Final token is a terminator, so we don't want to copy it */ int tokens = (dm->length-11)/sizeof(struct calling_interface_token)-1; + struct calling_interface_token *new_da_tokens; struct calling_interface_structure *table = container_of(dm, struct calling_interface_structure, header); @@ -296,12 +297,13 @@ static void __init parse_da_table(const struct dmi_header *dm) da_command_address = table->cmdIOAddress; da_command_code = table->cmdIOCode; - da_tokens = krealloc(da_tokens, (da_num_tokens + tokens) * - sizeof(struct calling_interface_token), - GFP_KERNEL); + new_da_tokens = krealloc(da_tokens, (da_num_tokens + tokens) * + sizeof(struct calling_interface_token), + GFP_KERNEL); - if (!da_tokens) + if (!new_da_tokens) return; + da_tokens = new_da_tokens; memcpy(da_tokens+da_num_tokens, table->tokens, sizeof(struct calling_interface_token) * tokens); -- GitLab From 63662139e519ce06090b2759cf4a1d291b9cc0e2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 14 Mar 2013 13:23:11 +0000 Subject: [PATCH 0085/3163] params: Fix potential memory leak in add_sysfs_param() On allocation failure, it would fail to free the old attrs array which was no longer referenced by anything (since it would free the old module_param_attrs struct on the way out). Comment the suspicious-looking krealloc() usage to explain why it *isn't* actually buggy, despite looking like a classic realloc() usage bug. Signed-off-by: David Woodhouse --- kernel/params.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/params.c b/kernel/params.c index ed35345be536..53b958fcd639 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -613,10 +613,13 @@ static __modinit int add_sysfs_param(struct module_kobject *mk, sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1), GFP_KERNEL); if (!new) { - kfree(mk->mp); + kfree(attrs); err = -ENOMEM; goto fail; } + /* Despite looking like the typical realloc() bug, this is safe. + * We *want* the old 'attrs' to be freed either way, and we'll store + * the new one in the success case. */ attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL); if (!attrs) { err = -ENOMEM; -- GitLab From 12faafe45477244bc32e2c58f74f7305cc7e84fa Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 Mar 2013 17:45:15 +0900 Subject: [PATCH 0086/3163] f2fs: fix to unlock node page when it was truncated If the node page was truncated, its block address became zero. This means that we don't need to write the node page, but have to unlock NODE_WRITE, decrease the number of dirty node pages, and then unlock_page before returning the f2fs_write_node_page with zero. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e275218904ed..3dc63f4cf2b1 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1106,15 +1106,15 @@ static int f2fs_write_node_page(struct page *page, /* This page is already truncated */ if (ni.blk_addr == NULL_ADDR) - return 0; + goto out; set_page_writeback(page); /* insert node offset */ write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); set_node_addr(sbi, &ni, new_addr); +out: dec_page_count(sbi, F2FS_DIRTY_NODES); - mutex_unlock_op(sbi, NODE_WRITE); unlock_page(page); return 0; -- GitLab From 66d36a2944df461448159be5af13049dd2689e77 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 26 Feb 2013 12:43:46 +0900 Subject: [PATCH 0087/3163] f2fs: read with READ_SYNC when getting dnode page The get_node_page_ra tries to: 1. grab or read a target node page for the given nid, 2. then, call ra_node_page to read other adjacent node pages in advance. So, when we try to read a target node page by #1, we should submit bio with READ_SYNC instead of READA. And, in #2, READA should be used. Signed-off-by: Jaegeuk Kim Reviewed-by: Namjae Jeon --- fs/f2fs/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3dc63f4cf2b1..efcada7becd5 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -930,7 +930,7 @@ struct page *get_node_page_ra(struct page *parent, int start) if (!page) return ERR_PTR(-ENOMEM); - err = read_node_page(page, READA); + err = read_node_page(page, READ_SYNC); if (err) { f2fs_put_page(page, 1); return ERR_PTR(err); -- GitLab From 266e97a81cf73d1a0dac5f68391da382630a80b7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 26 Feb 2013 13:10:46 +0900 Subject: [PATCH 0088/3163] f2fs: introduce readahead mode of node pages Previously, f2fs reads several node pages ahead when get_dnode_of_data is called with RDONLY_NODE flag. And, this flag is set by the following functions. - get_data_block_ro - get_lock_data_page - do_write_data_page - truncate_blocks - truncate_hole However, this readahead mechanism is initially introduced for the use of get_data_block_ro to enhance the sequential read performance. So, let's clarify all the cases with the additional modes as follows. enum { ALLOC_NODE, /* allocate a new node page if needed */ LOOKUP_NODE, /* look up a node without readahead */ LOOKUP_NODE_RA, /* * look up a node with readahead called * by get_datablock_ro. */ } Signed-off-by: Jaegeuk Kim Reviewed-by: Namjae Jeon --- fs/f2fs/data.c | 12 ++++++------ fs/f2fs/f2fs.h | 12 ++++++++---- fs/f2fs/file.c | 8 ++++---- fs/f2fs/node.c | 6 +++--- fs/f2fs/recovery.c | 2 +- 5 files changed, 22 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7bd22a201125..277966a8547a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -183,7 +183,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index) f2fs_put_page(page, 0); set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, RDONLY_NODE); + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) return ERR_PTR(err); f2fs_put_dnode(&dn); @@ -222,7 +222,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) int err; set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, RDONLY_NODE); + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) return ERR_PTR(err); f2fs_put_dnode(&dn); @@ -262,7 +262,7 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index, int err; set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, 0); + err = get_dnode_of_data(&dn, index, ALLOC_NODE); if (err) return ERR_PTR(err); @@ -392,7 +392,7 @@ static int get_data_block_ro(struct inode *inode, sector_t iblock, /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, pgofs, RDONLY_NODE); + err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); if (err) return (err == -ENOENT) ? 0 : err; @@ -443,7 +443,7 @@ int do_write_data_page(struct page *page) int err = 0; set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, page->index, RDONLY_NODE); + err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); if (err) return err; @@ -607,7 +607,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, mutex_lock_op(sbi, DATA_NEW); set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, 0); + err = get_dnode_of_data(&dn, index, ALLOC_NODE); if (err) { mutex_unlock_op(sbi, DATA_NEW); f2fs_put_page(page, 1); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cc2213afdcc7..be7ae70b0b1d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -125,11 +125,15 @@ static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i) * file keeping -1 as its node offset to * distinguish from index node blocks. */ -#define RDONLY_NODE 1 /* - * specify a read-only mode when getting - * a node block. 0 is read-write mode. - * used by get_dnode_of_data(). +enum { + ALLOC_NODE, /* allocate a new node page if needed */ + LOOKUP_NODE, /* look up a node without readahead */ + LOOKUP_NODE_RA, /* + * look up a node with readahead called + * by get_datablock_ro. */ +}; + #define F2FS_LINK_MAX 32000 /* maximum link count per file */ /* for in-memory extent cache entry */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 958a46da19ae..269645e23519 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -43,7 +43,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, /* block allocation */ set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, page->index, 0); + err = get_dnode_of_data(&dn, page->index, ALLOC_NODE); if (err) { mutex_unlock_op(sbi, DATA_NEW); goto out; @@ -258,7 +258,7 @@ static int truncate_blocks(struct inode *inode, u64 from) mutex_lock_op(sbi, DATA_TRUNC); set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, free_from, RDONLY_NODE); + err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { if (err == -ENOENT) goto free_next; @@ -420,7 +420,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) mutex_lock_op(sbi, DATA_TRUNC); set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, RDONLY_NODE); + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) { mutex_unlock_op(sbi, DATA_TRUNC); if (err == -ENOENT) @@ -504,7 +504,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, mutex_lock_op(sbi, DATA_NEW); set_new_dnode(&dn, inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, index, 0); + ret = get_dnode_of_data(&dn, index, ALLOC_NODE); if (ret) { mutex_unlock_op(sbi, DATA_NEW); break; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index efcada7becd5..65ec2eabb392 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -384,7 +384,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4]) /* * Caller should call f2fs_put_dnode(dn). */ -int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro) +int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); struct page *npage[4]; @@ -411,7 +411,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro) for (i = 1; i <= level; i++) { bool done = false; - if (!nids[i] && !ro) { + if (!nids[i] && mode == ALLOC_NODE) { mutex_lock_op(sbi, NODE_NEW); /* alloc new node */ @@ -434,7 +434,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro) alloc_nid_done(sbi, nids[i]); mutex_unlock_op(sbi, NODE_NEW); done = true; - } else if (ro && i == level && level > 1) { + } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { npage[i] = get_node_page_ra(parent, offset[i - 1]); if (IS_ERR(npage[i])) { err = PTR_ERR(npage[i]); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b235215ac138..6b82e2034cfd 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -247,7 +247,7 @@ static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, end = start + ADDRS_PER_BLOCK; set_new_dnode(&dn, inode, NULL, NULL, 0); - if (get_dnode_of_data(&dn, start, 0)) + if (get_dnode_of_data(&dn, start, ALLOC_NODE)) return; wait_on_page_writeback(dn.node_page); -- GitLab From 52c2db3f95ff8e8d9650885d6d66b8258ded1e38 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Wed, 20 Feb 2013 07:47:06 +0900 Subject: [PATCH 0089/3163] f2fs: check the level before calling get_nid function The caller of get_nid should be careful not to put lower value than NODE_DIR1_BLOCK in case of level is zero. Signed-off-by: Changman Lee Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 65ec2eabb392..d408e69294c8 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -403,7 +403,8 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) return PTR_ERR(npage[0]); parent = npage[0]; - nids[1] = get_nid(parent, offset[0], true); + if (level != 0) + nids[1] = get_nid(parent, offset[0], true); dn->inode_page = npage[0]; dn->inode_page_locked = true; -- GitLab From e0f56cb44b05abacb6aa8fa8695c28431e84b7a0 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 2 Feb 2013 23:51:51 +0900 Subject: [PATCH 0090/3163] f2fs: optimize get node page readahead part We can remove the call to find_get_page to get a page from the cache and check for up-to-date, instead we can make use of grab_cache_page part itself to fetch the page from the cache. So, removing the call and moving the PageUptodate at proper place, also taken care of moving the lock_page condition in the page_hit part. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d408e69294c8..58f7216993c7 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -921,19 +921,17 @@ struct page *get_node_page_ra(struct page *parent, int start) if (!nid) return ERR_PTR(-ENOENT); - page = find_get_page(mapping, nid); - if (page && PageUptodate(page)) - goto page_hit; - f2fs_put_page(page, 0); - repeat: page = grab_cache_page(mapping, nid); if (!page) return ERR_PTR(-ENOMEM); + else if (PageUptodate(page)) + goto page_hit; err = read_node_page(page, READ_SYNC); + unlock_page(page); if (err) { - f2fs_put_page(page, 1); + f2fs_put_page(page, 0); return ERR_PTR(err); } @@ -947,8 +945,9 @@ struct page *get_node_page_ra(struct page *parent, int start) ra_node_page(sbi, nid); } -page_hit: lock_page(page); + +page_hit: if (PageError(page)) { f2fs_put_page(page, 1); return ERR_PTR(-EIO); -- GitLab From 3aa770a9c9d077283b1aa07e8549a4fdc41fc5ed Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 2 Mar 2013 12:40:50 +0900 Subject: [PATCH 0091/3163] f2fs: optimize and change return path in lookup_free_nid_list Optimize and change return path in lookup_free_nid_list Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 58f7216993c7..6006e8e8a5f3 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1195,14 +1195,13 @@ const struct address_space_operations f2fs_node_aops = { static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head) { struct list_head *this; - struct free_nid *i = NULL; + struct free_nid *i; list_for_each(this, head) { i = list_entry(this, struct free_nid, list); if (i->nid == n) - break; - i = NULL; + return i; } - return i; + return NULL; } static void __del_from_free_nid_list(struct free_nid *i) -- GitLab From 5a20d339c785d98d8b050b9afc098e4184a6098c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 3 Mar 2013 13:58:05 +0900 Subject: [PATCH 0092/3163] f2fs: align f2fs maximum name length to linux based filesystem The maximum filename length supported in linux is 255 characters. So let's follow that. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 3 +++ fs/f2fs/namei.c | 2 +- fs/f2fs/super.c | 2 +- include/linux/f2fs_fs.h | 17 +++++++++-------- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index a1f38443ecee..2851ae6948a1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -189,6 +189,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, unsigned int max_depth; unsigned int level; + if (namelen > F2FS_NAME_LEN) + return NULL; + if (npages == 0) return NULL; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 1a49b881bac0..d4a171b1a68b 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -197,7 +197,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, struct f2fs_dir_entry *de; struct page *page; - if (dentry->d_name.len > F2FS_MAX_NAME_LEN) + if (dentry->d_name.len > F2FS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); de = f2fs_find_entry(dir, &dentry->d_name, &page); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8c117649a035..1c7f595ca47c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -180,7 +180,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = sbi->total_node_count; buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); - buf->f_namelen = F2FS_MAX_NAME_LEN; + buf->f_namelen = F2FS_NAME_LEN; buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[1] = (u32)(id >> 32); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index f9a12f6243a5..df6fab82f87e 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -139,7 +139,7 @@ struct f2fs_extent { __le32 len; /* lengh of the extent */ } __packed; -#define F2FS_MAX_NAME_LEN 256 +#define F2FS_NAME_LEN 255 #define ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ #define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ #define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ @@ -165,7 +165,8 @@ struct f2fs_inode { __le32 i_flags; /* file attributes */ __le32 i_pino; /* parent inode number */ __le32 i_namelen; /* file name length */ - __u8 i_name[F2FS_MAX_NAME_LEN]; /* file name for SPOR */ + __u8 i_name[F2FS_NAME_LEN]; /* file name for SPOR */ + __u8 i_reserved2; /* for backward compatibility */ struct f2fs_extent i_ext; /* caching a largest extent */ @@ -362,10 +363,10 @@ struct f2fs_summary_block { typedef __le32 f2fs_hash_t; /* One directory entry slot covers 8bytes-long file name */ -#define F2FS_NAME_LEN 8 -#define F2FS_NAME_LEN_BITS 3 +#define F2FS_SLOT_LEN 8 +#define F2FS_SLOT_LEN_BITS 3 -#define GET_DENTRY_SLOTS(x) ((x + F2FS_NAME_LEN - 1) >> F2FS_NAME_LEN_BITS) +#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) /* the number of dentry in a block */ #define NR_DENTRY_IN_BLOCK 214 @@ -377,10 +378,10 @@ typedef __le32 f2fs_hash_t; #define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ BITS_PER_BYTE) #define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ - F2FS_NAME_LEN) * \ + F2FS_SLOT_LEN) * \ NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) -/* One directory entry slot representing F2FS_NAME_LEN-sized file name */ +/* One directory entry slot representing F2FS_SLOT_LEN-sized file name */ struct f2fs_dir_entry { __le32 hash_code; /* hash code of file name */ __le32 ino; /* inode number */ @@ -394,7 +395,7 @@ struct f2fs_dentry_block { __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP]; __u8 reserved[SIZE_OF_RESERVED]; struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK]; - __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_NAME_LEN]; + __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN]; } __packed; /* file types used in inode_info->flags */ -- GitLab From 25c0a6e529b56ca010e1f46239edd07c1b484b63 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 2 Mar 2013 12:41:31 +0900 Subject: [PATCH 0093/3163] f2fs: avoid extra ++ while returning from get_node_path In all the breaking conditions in get_node_path, 'n' is used to track index in offset[] array, but while breaking out also, in all paths n++ is done. So, remove the ++ from breaking paths. Also, avoid reset of 'level=0' in first case. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6006e8e8a5f3..a3cb1ff34f8e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -320,15 +320,14 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4]) noffset[0] = 0; if (block < direct_index) { - offset[n++] = block; - level = 0; + offset[n] = block; goto got; } block -= direct_index; if (block < direct_blks) { offset[n++] = NODE_DIR1_BLOCK; noffset[n] = 1; - offset[n++] = block; + offset[n] = block; level = 1; goto got; } @@ -336,7 +335,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4]) if (block < direct_blks) { offset[n++] = NODE_DIR2_BLOCK; noffset[n] = 2; - offset[n++] = block; + offset[n] = block; level = 1; goto got; } @@ -346,7 +345,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4]) noffset[n] = 3; offset[n++] = block / direct_blks; noffset[n] = 4 + offset[n - 1]; - offset[n++] = block % direct_blks; + offset[n] = block % direct_blks; level = 2; goto got; } @@ -356,7 +355,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4]) noffset[n] = 4 + dptrs_per_blk; offset[n++] = block / direct_blks; noffset[n] = 5 + dptrs_per_blk + offset[n - 1]; - offset[n++] = block % direct_blks; + offset[n] = block % direct_blks; level = 2; goto got; } @@ -371,7 +370,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4]) noffset[n] = 7 + (dptrs_per_blk * 2) + offset[n - 2] * (dptrs_per_blk + 1) + offset[n - 1]; - offset[n++] = block % direct_blks; + offset[n] = block % direct_blks; level = 3; goto got; } else { -- GitLab From 4918c6ca6838658b71d3ce75e1a4373195ac8d40 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 15 Mar 2013 08:38:56 +0100 Subject: [PATCH 0094/3163] KVM: VMX: Require KVM_SET_TSS_ADDR being called prior to running a VCPU Very old user space (namely qemu-kvm before kvm-49) didn't set the TSS base before running the VCPU. We always warned about this bug, but no reports about users actually seeing this are known. Time to finally remove the workaround that effectively prevented to call vmx_vcpu_reset while already holding the KVM srcu lock. Reviewed-by: Gleb Natapov Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 17a693868458..ad978a6c282e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2898,22 +2898,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmx->cpl = 0; } -static gva_t rmode_tss_base(struct kvm *kvm) -{ - if (!kvm->arch.tss_addr) { - struct kvm_memslots *slots; - struct kvm_memory_slot *slot; - gfn_t base_gfn; - - slots = kvm_memslots(kvm); - slot = id_to_memslot(slots, 0); - base_gfn = slot->base_gfn + slot->npages - 3; - - return base_gfn << PAGE_SHIFT; - } - return kvm->arch.tss_addr; -} - static void fix_rmode_seg(int seg, struct kvm_segment *save) { const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -2964,19 +2948,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu) /* * Very old userspace does not call KVM_SET_TSS_ADDR before entering - * vcpu. Call it here with phys address pointing 16M below 4G. + * vcpu. Warn the user that an update is overdue. */ - if (!vcpu->kvm->arch.tss_addr) { + if (!vcpu->kvm->arch.tss_addr) printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " "called before entering vcpu\n"); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - vmx_set_tss_addr(vcpu->kvm, 0xfeffd000); - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - } vmx_segment_cache_clear(vmx); - vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); + vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr); vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); @@ -3623,7 +3603,7 @@ static int init_rmode_tss(struct kvm *kvm) int r, idx, ret = 0; idx = srcu_read_lock(&kvm->srcu); - fn = rmode_tss_base(kvm) >> PAGE_SHIFT; + fn = kvm->arch.tss_addr >> PAGE_SHIFT; r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); if (r < 0) goto out; @@ -4190,9 +4170,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); vmx_set_cr4(&vmx->vcpu, 0); vmx_set_efer(&vmx->vcpu, 0); vmx_fpu_activate(&vmx->vcpu); -- GitLab From 393ff91f57c87d48ffed30878be6e3e486d3a00a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 Mar 2013 21:29:23 +0900 Subject: [PATCH 0095/3163] f2fs: reduce unncessary locking pages during read This patch reduces redundant locking and unlocking pages during read operations. In f2fs_readpage, let's use wait_on_page_locked() instead of lock_page. And then, when we need to modify any data finally, let's lock the page so that we can avoid lock contention. [readpage rule] - The f2fs_readpage returns unlocked page, or released page too in error cases. - Its caller should handle read error, -EIO, after locking the page, which indicates read completion. - Its caller should check PageUptodate after grab_cache_page. Signed-off-by: Changman Lee Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 12 +++++---- fs/f2fs/data.c | 58 +++++++++++++++++++++++--------------------- fs/f2fs/node.c | 58 ++++++++++++++++++++++++++------------------ fs/f2fs/recovery.c | 31 ++++++++++++++--------- 4 files changed, 91 insertions(+), 68 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2b6fc131e2ce..d947e66ee8a8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -57,13 +57,15 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) cond_resched(); goto repeat; } - if (f2fs_readpage(sbi, page, index, READ_SYNC)) { - f2fs_put_page(page, 1); + if (PageUptodate(page)) + goto out; + + if (f2fs_readpage(sbi, page, index, READ_SYNC)) goto repeat; - } - mark_page_accessed(page); - /* We do not allow returning an errorneous page */ + lock_page(page); +out: + mark_page_accessed(page); return page; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 277966a8547a..c8e20b618913 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -199,12 +199,17 @@ struct page *find_data_page(struct inode *inode, pgoff_t index) if (!page) return ERR_PTR(-ENOMEM); + if (PageUptodate(page)) { + unlock_page(page); + return page; + } + err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); - if (err) { - f2fs_put_page(page, 1); - return ERR_PTR(err); + wait_on_page_locked(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 0); + return ERR_PTR(-EIO); } - unlock_page(page); return page; } @@ -241,9 +246,13 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) BUG_ON(dn.data_blkaddr == NULL_ADDR); err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); - if (err) { - f2fs_put_page(page, 1); + if (err) return ERR_PTR(err); + + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); } return page; } @@ -283,14 +292,17 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index, if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); } else { err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); - if (err) { - f2fs_put_page(page, 1); + if (err) return ERR_PTR(err); + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); } } - SetPageUptodate(page); if (new_i_size && i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { @@ -325,22 +337,14 @@ static void read_end_io(struct bio *bio, int err) /* * Fill the locked page with data located in the block address. - * Read operation is synchronous, and caller must unlock the page. + * Return unlocked page. */ int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, block_t blk_addr, int type) { struct block_device *bdev = sbi->sb->s_bdev; - bool sync = (type == READ_SYNC); struct bio *bio; - /* This page can be already read by other threads */ - if (PageUptodate(page)) { - if (!sync) - unlock_page(page); - return 0; - } - down_read(&sbi->bio_sem); /* Allocate a new bio */ @@ -354,18 +358,12 @@ int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, kfree(bio->bi_private); bio_put(bio); up_read(&sbi->bio_sem); + f2fs_put_page(page, 1); return -EFAULT; } submit_bio(type, bio); up_read(&sbi->bio_sem); - - /* wait for read completion if sync */ - if (sync) { - lock_page(page); - if (PageError(page)) - return -EIO; - } return 0; } @@ -636,18 +634,22 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, /* Reading beyond i_size is simple: memset to zero */ zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); - return 0; + goto out; } if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); - if (err) { - f2fs_put_page(page, 1); + if (err) return err; + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return -EIO; } } +out: SetPageUptodate(page); clear_cold_data(page); return 0; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index a3cb1ff34f8e..9e6ed6708fa8 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -100,10 +100,13 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) page = grab_cache_page(mapping, index); if (!page) continue; - if (f2fs_readpage(sbi, page, index, READ)) { + if (PageUptodate(page)) { f2fs_put_page(page, 1); continue; } + if (f2fs_readpage(sbi, page, index, READ)) + continue; + f2fs_put_page(page, 0); } } @@ -851,8 +854,16 @@ static int read_node_page(struct page *page, int type) get_node_info(sbi, page->index, &ni); - if (ni.blk_addr == NULL_ADDR) + if (ni.blk_addr == NULL_ADDR) { + f2fs_put_page(page, 1); return -ENOENT; + } + + if (PageUptodate(page)) { + unlock_page(page); + return 0; + } + return f2fs_readpage(sbi, page, ni.blk_addr, type); } @@ -865,19 +876,18 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) struct page *apage; apage = find_get_page(mapping, nid); - if (apage && PageUptodate(apage)) - goto release_out; + if (apage && PageUptodate(apage)) { + f2fs_put_page(apage, 0); + return; + } f2fs_put_page(apage, 0); apage = grab_cache_page(mapping, nid); if (!apage) return; - if (read_node_page(apage, READA)) - unlock_page(apage); - -release_out: - f2fs_put_page(apage, 0); + if (read_node_page(apage, READA) == 0) + f2fs_put_page(apage, 0); return; } @@ -892,11 +902,14 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) return ERR_PTR(-ENOMEM); err = read_node_page(page, READ_SYNC); - if (err) { - f2fs_put_page(page, 1); + if (err) return ERR_PTR(err); - } + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } BUG_ON(nid != nid_of_node(page)); mark_page_accessed(page); return page; @@ -928,11 +941,8 @@ struct page *get_node_page_ra(struct page *parent, int start) goto page_hit; err = read_node_page(page, READ_SYNC); - unlock_page(page); - if (err) { - f2fs_put_page(page, 0); + if (err) return ERR_PTR(err); - } /* Then, try readahead for siblings of the desired node */ end = start + MAX_RA_NODE; @@ -957,6 +967,7 @@ struct page *get_node_page_ra(struct page *parent, int start) f2fs_put_page(page, 1); goto repeat; } + mark_page_accessed(page); return page; } @@ -1473,23 +1484,24 @@ int restore_node_summary(struct f2fs_sb_info *sbi, sum_entry = &sum->entries[0]; for (i = 0; i < last_offset; i++, sum_entry++) { + /* + * In order to read next node page, + * we must clear PageUptodate flag. + */ + ClearPageUptodate(page); + if (f2fs_readpage(sbi, page, addr, READ_SYNC)) goto out; + lock_page(page); rn = (struct f2fs_node *)page_address(page); sum_entry->nid = rn->footer.nid; sum_entry->version = 0; sum_entry->ofs_in_node = 0; addr++; - - /* - * In order to read next node page, - * we must clear PageUptodate flag. - */ - ClearPageUptodate(page); } -out: unlock_page(page); +out: __free_pages(page, 0); return 0; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 6b82e2034cfd..2d86eb26c493 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -112,11 +112,16 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) while (1) { struct fsync_inode_entry *entry; - if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC)) + err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); + if (err) goto out; - if (cp_ver != cpver_of_node(page)) - goto out; + lock_page(page); + + if (cp_ver != cpver_of_node(page)) { + err = -EINVAL; + goto unlock_out; + } if (!is_fsync_dnode(page)) goto next; @@ -131,7 +136,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) if (IS_INODE(page) && is_dent_dnode(page)) { if (recover_inode_page(sbi, page)) { err = -ENOMEM; - goto out; + goto unlock_out; } } @@ -139,14 +144,14 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); if (!entry) { err = -ENOMEM; - goto out; + goto unlock_out; } entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); if (IS_ERR(entry->inode)) { err = PTR_ERR(entry->inode); kmem_cache_free(fsync_entry_slab, entry); - goto out; + goto unlock_out; } list_add_tail(&entry->list, head); @@ -155,15 +160,15 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) if (IS_INODE(page)) { err = recover_inode(entry->inode, page); if (err) - goto out; + goto unlock_out; } next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); - ClearPageUptodate(page); } -out: +unlock_out: unlock_page(page); +out: __free_pages(page, 0); return err; } @@ -319,8 +324,10 @@ static void recover_data(struct f2fs_sb_info *sbi, if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC)) goto out; + lock_page(page); + if (cp_ver != cpver_of_node(page)) - goto out; + goto unlock_out; entry = get_fsync_inode(head, ino_of_node(page)); if (!entry) @@ -336,10 +343,10 @@ static void recover_data(struct f2fs_sb_info *sbi, next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); - ClearPageUptodate(page); } -out: +unlock_out: unlock_page(page); +out: __free_pages(page, 0); allocate_new_segments(sbi); -- GitLab From 08d8058be6d11bd81f2ed75fc0ecdf55b7685655 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 Mar 2013 17:49:22 +0900 Subject: [PATCH 0096/3163] f2fs: should check the node page was truncated first Currently, f2fs doesn't reclaim any node pages. However, if we found that a node page was truncated by checking its block address with zero during f2fs_write_node_page, we should not skip that node page and return zero to reclaim it. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9e6ed6708fa8..90221cc7247c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1097,13 +1097,6 @@ static int f2fs_write_node_page(struct page *page, block_t new_addr; struct node_info ni; - if (wbc->for_reclaim) { - dec_page_count(sbi, F2FS_DIRTY_NODES); - wbc->pages_skipped++; - set_page_dirty(page); - return AOP_WRITEPAGE_ACTIVATE; - } - wait_on_page_writeback(page); mutex_lock_op(sbi, NODE_WRITE); @@ -1118,6 +1111,14 @@ static int f2fs_write_node_page(struct page *page, if (ni.blk_addr == NULL_ADDR) goto out; + if (wbc->for_reclaim) { + dec_page_count(sbi, F2FS_DIRTY_NODES); + wbc->pages_skipped++; + set_page_dirty(page); + mutex_unlock_op(sbi, NODE_WRITE); + return AOP_WRITEPAGE_ACTIVATE; + } + set_page_writeback(page); /* insert node offset */ -- GitLab From 48cb76c7be7056810cdcdcdcd8d90d3fdc4e250f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 14 Mar 2013 08:49:58 +0900 Subject: [PATCH 0097/3163] f2fs: scan next nat page to reuse free nids in there When we build new free nids, let's scan the just next NAT page instead of skipping a couple of previously scanned pages in order to reuse free nids in there. Otherwise, we can use too much wide range of nids even though several nids were deallocated, and also their node pages can be cached in the node_inode's address space. This means that we can retain lots of clean pages in the main memory, which induces mm's reclaiming overhead. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 90221cc7247c..94951d9aff4c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1317,7 +1317,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi) break; } - nm_i->next_scan_nid = nid; + /* go to the next nat page in order to reuse free nids first */ + nm_i->next_scan_nid = nm_i->init_scan_nid + NAT_ENTRY_PER_BLOCK; /* find free nids from current sum_pages */ mutex_lock(&curseg->curseg_mutex); -- GitLab From c3850aa1cb25872fddacd7abd8dfb021411e92ee Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 14 Mar 2013 09:24:32 +0900 Subject: [PATCH 0098/3163] f2fs: fix return value of releasepage for node and data If the return value of releasepage is equal to zero, the page cannot be reclaimed. Instead, we should return 1 in order to reclaim clean pages. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/node.c | 11 +---------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c8e20b618913..ea8be6fc38f1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -683,7 +683,7 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) static int f2fs_release_data_page(struct page *page, gfp_t wait) { ClearPagePrivate(page); - return 0; + return 1; } static int f2fs_set_data_page_dirty(struct page *page) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 94951d9aff4c..bf9172bbbb00 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -933,7 +933,6 @@ struct page *get_node_page_ra(struct page *parent, int start) if (!nid) return ERR_PTR(-ENOENT); -repeat: page = grab_cache_page(mapping, nid); if (!page) return ERR_PTR(-ENOMEM); @@ -961,12 +960,6 @@ struct page *get_node_page_ra(struct page *parent, int start) f2fs_put_page(page, 1); return ERR_PTR(-EIO); } - - /* Has the page been truncated? */ - if (page->mapping != mapping) { - f2fs_put_page(page, 1); - goto repeat; - } mark_page_accessed(page); return page; } @@ -1189,7 +1182,7 @@ static void f2fs_invalidate_node_page(struct page *page, unsigned long offset) static int f2fs_release_node_page(struct page *page, gfp_t wait) { ClearPagePrivate(page); - return 0; + return 1; } /* @@ -1630,8 +1623,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) write_lock(&nm_i->nat_tree_lock); __del_from_nat_cache(nm_i, ne); write_unlock(&nm_i->nat_tree_lock); - - /* We can reuse this freed nid at this point */ add_free_nid(NM_I(sbi), nid); } else { write_lock(&nm_i->nat_tree_lock); -- GitLab From 04431c44e55613a91ced16c523f749c08dff91bf Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 16 Mar 2013 08:34:37 +0900 Subject: [PATCH 0099/3163] f2fs: fix not to allocate max_nid The build_free_nid should not add free nids over nm_i->max_nid. But, there was a hole that invalid free nid was added by the following scenario. Let's suppose nm_i->max_nid = 150 and the last NAT page has 100 ~ 200 nids. build_free_nids - get_current_nat_page loads the last NAT page - scan_nat_page can add 100 ~ 200 nids -> Bug here! So, when scanning an NAT page, we should check each candidate whether it is over max_nid or not. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index bf9172bbbb00..f7b03ba9c0d7 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1268,6 +1268,8 @@ static int scan_nat_page(struct f2fs_nm_info *nm_i, i = start_nid % NAT_ENTRY_PER_BLOCK; for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { + if (start_nid >= nm_i->max_nid) + break; blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); BUG_ON(blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) -- GitLab From ae51fb31b8c3eb0cedc223782832be393e53623b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 16 Mar 2013 11:13:04 +0900 Subject: [PATCH 0100/3163] f2fs: fix to call WRITE_FLUSH at the end of fsync The fsync call should be ended after flushing the in-device caches. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 269645e23519..ff018a42e435 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -178,6 +179,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); + ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); } out: mutex_unlock(&inode->i_mutex); -- GitLab From d3ee456dfbed1992bcaa0096d9bc76a691b0e700 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 17 Mar 2013 17:26:14 +0900 Subject: [PATCH 0101/3163] f2fs: notify when discard is not supported Change f2fs so that a warning is emitted when an attempt is made to mount a filesystem with the unsupported discard option. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1c7f595ca47c..022b32a14f34 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "f2fs.h" @@ -650,6 +651,14 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto fail; + if (test_opt(sbi, DISCARD)) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + if (!blk_queue_discard(q)) + f2fs_msg(sb, KERN_WARNING, + "mounting with \"discard\" option, but " + "the device does not support discard"); + } + return 0; fail: stop_gc_thread(sbi); -- GitLab From 7c909772f1222dd82098659da4d0c41d8a051790 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 17 Mar 2013 17:26:39 +0900 Subject: [PATCH 0102/3163] f2fs: reorganize f2fs_setxattr make use of F2FS_NAME_LEN for name length checking, change return conditions at few places, by assigning storing the errorvalue in 'error' and making a common exit path. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 8038c0496504..3bfea80610ff 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -310,12 +310,13 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, if (name == NULL) return -EINVAL; - name_len = strlen(name); if (value == NULL) value_len = 0; - if (name_len > 255 || value_len > MAX_VALUE_LEN) + name_len = strlen(name); + + if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN) return -ERANGE; f2fs_balance_fs(sbi); @@ -326,8 +327,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, struct dnode_of_data dn; if (!alloc_nid(sbi, &fi->i_xattr_nid)) { - mutex_unlock_op(sbi, NODE_NEW); - return -ENOSPC; + error = -ENOSPC; + goto exit; } set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid); mark_inode_dirty(inode); @@ -336,8 +337,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, if (IS_ERR(page)) { alloc_nid_failed(sbi, fi->i_xattr_nid); fi->i_xattr_nid = 0; - mutex_unlock_op(sbi, NODE_NEW); - return PTR_ERR(page); + error = PTR_ERR(page); + goto exit; } alloc_nid_done(sbi, fi->i_xattr_nid); @@ -349,8 +350,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, /* The inode already has an extended attribute block. */ page = get_node_page(sbi, fi->i_xattr_nid); if (IS_ERR(page)) { - mutex_unlock_op(sbi, NODE_NEW); - return PTR_ERR(page); + error = PTR_ERR(page); + goto exit; } base_addr = page_address(page); @@ -438,6 +439,7 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, return 0; cleanup: f2fs_put_page(page, 1); +exit: mutex_unlock_op(sbi, NODE_NEW); return error; } -- GitLab From c0d39e65ba324390eb0ffb60661ab12104e5fcc7 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 17 Mar 2013 17:26:53 +0900 Subject: [PATCH 0103/3163] f2fs: fix return values from validate superblock validate super block is not returning with proper values. When failure from sb_bread it should reflect there is an EIO otherwise it should return of EINVAL. Returning, '1' is not conveying proper message as the return type. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 022b32a14f34..1db5ebe6692e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -474,7 +474,7 @@ static int validate_superblock(struct super_block *sb, if (!*raw_super_buf) { f2fs_msg(sb, KERN_ERR, "unable to read %s superblock", super); - return 1; + return -EIO; } *raw_super = (struct f2fs_super_block *) @@ -486,7 +486,7 @@ static int validate_superblock(struct super_block *sb, f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " "in %s superblock", super); - return 1; + return -EINVAL; } static int f2fs_fill_super(struct super_block *sb, void *data, int silent) @@ -509,9 +509,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_sbi; } - if (validate_superblock(sb, &raw_super, &raw_super_buf, 0)) { + err = validate_superblock(sb, &raw_super, &raw_super_buf, 0); + if (err) { brelse(raw_super_buf); - if (validate_superblock(sb, &raw_super, &raw_super_buf, 1)) + /* check secondary superblock when primary failed */ + err = validate_superblock(sb, &raw_super, &raw_super_buf, 1); + if (err) goto free_sb_buf; } /* init some FS parameters */ -- GitLab From 064e0823285a41f5ccb92f26a661df5f44cac3eb Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 17 Mar 2013 17:27:20 +0900 Subject: [PATCH 0104/3163] f2fs: avoid BUG_ON from check_nid_range and update return path in do_read_inode In function check_nid_range, there is no need to trigger BUG_ON and make kernel stop. Instead it could just check and indicate the inode number to be EINVAL. Update the return path in do_read_inode to use the return from check_nid_range. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat [Jaegeuk: replace BUG_ON with WARN_ON] Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 7 +++++-- fs/f2fs/inode.c | 6 +++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index be7ae70b0b1d..06ff6a51c700 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -515,9 +515,12 @@ static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, enum lock_type t) /* * Check whether the given nid is within node id range. */ -static inline void check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) +static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) { - BUG_ON((nid >= NM_I(sbi)->max_nid)); + WARN_ON((nid >= NM_I(sbi)->max_nid)); + if (nid >= NM_I(sbi)->max_nid) + return -EINVAL; + return 0; } #define F2FS_DEFAULT_ALLOCATED_BLOCKS 1 diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ddae412d30c8..e0e8308594a5 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -44,7 +44,11 @@ static int do_read_inode(struct inode *inode) struct f2fs_inode *ri; /* Check if ino is within scope */ - check_nid_range(sbi, inode->i_ino); + if (check_nid_range(sbi, inode->i_ino)) { + f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu", + (unsigned long) inode->i_ino); + return -EINVAL; + } node_page = get_node_page(sbi, inode->i_ino); if (IS_ERR(node_page)) -- GitLab From 111d2495a8a8fbd8e3bb0f1c1c60f977b1386249 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Tue, 19 Mar 2013 08:03:35 +0900 Subject: [PATCH 0105/3163] f2fs: fix typo in comments Correct spelling typo in comments Signed-off-by: Masanari Iida Acked-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/super.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 06ff6a51c700..5bb87e0216f5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -141,7 +141,7 @@ struct extent_info { rwlock_t ext_lock; /* rwlock for consistency */ unsigned int fofs; /* start offset in a file */ u32 blk_addr; /* start block address of the extent */ - unsigned int len; /* lenth of the extent */ + unsigned int len; /* length of the extent */ }; /* diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 94b8a0c48453..2e3eb2d4fc30 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -222,7 +222,7 @@ static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, } /* - * This function is called from two pathes. + * This function is called from two paths. * One is garbage collection and the other is SSR segment selection. * When it is called during GC, it just gets a victim segment * and it does not remove it from dirty seglist. diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1db5ebe6692e..c9ef88da0723 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -83,7 +83,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) init_once((void *) fi); - /* Initilize f2fs-specific inode info */ + /* Initialize f2fs-specific inode info */ fi->vfs_inode.i_version = 1; atomic_set(&fi->dirty_dents, 0); fi->i_current_depth = 1; -- GitLab From 65e967324200a3cca9b1e903ed624b2a962e7b2b Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 9 Mar 2013 16:47:55 +0900 Subject: [PATCH 0106/3163] mips: remove redundant GENERIC_GPIO select Signed-off-by: Alexandre Courbot Acked-by: Arnd Bergmann Acked-by: Linus Walleij --- arch/mips/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index cd2e21ff562a..1179b7e0e29e 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1201,7 +1201,6 @@ config CPU_LOONGSON2F bool "Loongson 2F" depends on SYS_HAS_CPU_LOONGSON2F select CPU_LOONGSON2 - select GENERIC_GPIO select ARCH_REQUIRE_GPIOLIB help The Loongson 2F processor implements the MIPS III instruction set -- GitLab From 1b920175062bf5baaa94f50b0b309ceaf47ad019 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sun, 10 Mar 2013 21:22:21 +0900 Subject: [PATCH 0107/3163] mips: loongson: use GPIO driver on CONFIG_GPIOLIB The GPIO driver uses gpiolib, thus it should be compiled when CONFIG_GPIOLIB is defined and not only CONFIG_GENERIC_GPIO. Signed-off-by: Alexandre Courbot Acked-by: Arnd Bergmann Acked-by: Linus Walleij --- arch/mips/loongson/common/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/loongson/common/Makefile b/arch/mips/loongson/common/Makefile index e526488df655..4c57b3e5743f 100644 --- a/arch/mips/loongson/common/Makefile +++ b/arch/mips/loongson/common/Makefile @@ -4,7 +4,7 @@ obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \ pci.o bonito-irq.o mem.o machtype.o platform.o -obj-$(CONFIG_GENERIC_GPIO) += gpio.o +obj-$(CONFIG_GPIOLIB) += gpio.o # # Serial port support -- GitLab From f9612a65ec0a0457ee62c94e42f89abf2f8fbe21 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 11 Mar 2013 11:45:00 +0900 Subject: [PATCH 0108/3163] mips: txx9: change GENERIC_GPIO to GPIOLIB The GPIO driver for txx9 requires gpiolib, so this is more accurate and let us get rid of one mention of GENERIC_GPIO which is due to disappear. Signed-off-by: Alexandre Courbot Acked-by: Arnd Bergmann Acked-by: Linus Walleij --- arch/mips/txx9/generic/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 5524f2c7b05c..5364aabc2102 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -118,7 +118,7 @@ EXPORT_SYMBOL(clk_put); /* GPIO support */ -#ifdef CONFIG_GENERIC_GPIO +#ifdef CONFIG_GPIOLIB int gpio_to_irq(unsigned gpio) { return -EINVAL; -- GitLab From efb1243690c50874345756015c5d15e76bd74eae Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 11 Mar 2013 19:17:52 +0900 Subject: [PATCH 0109/3163] mips: alchemy: require gpiolib Require gpiolib instead of just GENERIC_GPIO. Signed-off-by: Alexandre Courbot Acked-by: Arnd Bergmann Acked-by: Linus Walleij --- arch/mips/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 1179b7e0e29e..983503899478 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -61,8 +61,7 @@ config MIPS_ALCHEMY select SYS_HAS_CPU_MIPS32_R1 select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_APM_EMULATION - select GENERIC_GPIO - select ARCH_WANT_OPTIONAL_GPIOLIB + select ARCH_REQUIRE_GPIOLIB select SYS_SUPPORTS_ZBOOT select USB_ARCH_HAS_OHCI select USB_ARCH_HAS_EHCI -- GitLab From ab294726107639ca835cf1ed1cfc5092fcf53903 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 9 Mar 2013 16:28:07 +0900 Subject: [PATCH 0110/3163] arm: remove redundant GENERIC_GPIO selection GENERIC_GPIO was selected next to ARCH_REQUIRE_GPIOLIB, which itself selects GENERIC_GPIO. Signed-off-by: Alexandre Courbot Acked-by: Arnd Bergmann Acked-by: Linus Walleij --- arch/mips/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 983503899478..50cded3714af 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -224,7 +224,6 @@ config MACH_JZ4740 select SYS_SUPPORTS_ZBOOT_UART16550 select DMA_NONCOHERENT select IRQ_CPU - select GENERIC_GPIO select ARCH_REQUIRE_GPIOLIB select SYS_HAS_EARLY_PRINTK select HAVE_PWM @@ -934,7 +933,6 @@ config CSRC_SB1250 bool config GPIO_TXX9 - select GENERIC_GPIO select ARCH_REQUIRE_GPIOLIB bool -- GitLab From 43b426d6fae80722c7566e73ed27ef553a91e2d6 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sun, 10 Mar 2013 21:13:04 +0900 Subject: [PATCH 0111/3163] arm: plat-orion: use GPIO driver on CONFIG_GPIOLIB The GPIO driver uses gpiolib, thus it should be compiled when CONFIG_GPIOLIB is defined and not only CONFIG_GENERIC_GPIO Signed-off-by: Alexandre Courbot Acked-by: Arnd Bergmann Acked-by: Linus Walleij --- arch/arm/plat-orion/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-orion/Makefile b/arch/arm/plat-orion/Makefile index a82cecb84948..bbc0d6b6aac4 100644 --- a/arch/arm/plat-orion/Makefile +++ b/arch/arm/plat-orion/Makefile @@ -5,6 +5,6 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include obj-y += addr-map.o -orion-gpio-$(CONFIG_GENERIC_GPIO) += gpio.o +orion-gpio-$(CONFIG_GPIOLIB) += gpio.o obj-$(CONFIG_PLAT_ORION_LEGACY) += irq.o pcie.o time.o common.o mpp.o obj-$(CONFIG_PLAT_ORION_LEGACY) += $(orion-gpio-y) -- GitLab From d80a357809e9d0860919cfcd563807a253e7ac0e Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 8 Mar 2013 18:32:01 +0900 Subject: [PATCH 0112/3163] unicore32: remove unneeded select GENERIC_GPIO PUV3_GPIO depends on ARCH_PUV3 which requires GPIOLIB and thus already selects GENERIC_GPIO Signed-off-by: Alexandre Courbot Acked-by: Arnd Bergmann Acked-by: Linus Walleij --- arch/unicore32/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index 2943e3acdf0c..70f8c8ea8912 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -219,7 +219,6 @@ if ARCH_PUV3 config PUV3_GPIO bool depends on !ARCH_FPGA - select GENERIC_GPIO select GPIO_SYSFS default y -- GitLab From aef1552bc192c2cb1742d7684e0fbd23775f3dac Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 12 Mar 2013 18:08:41 +0900 Subject: [PATCH 0113/3163] unicore32: default GENERIC_GPIO to false GPIO implementation for unicore32 uses gpiolib, which sets GENERIC_GPIO when selected. Signed-off-by: Alexandre Courbot Acked-by: Arnd Bergmann Acked-by: Linus Walleij --- arch/unicore32/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index 70f8c8ea8912..63521dc6c9d5 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -24,7 +24,7 @@ config UNICORE32 Please see web page at . config GENERIC_GPIO - def_bool y + bool config GENERIC_CSUM def_bool y -- GitLab From a76eb09ae38b73aa3cc2b664c903e0188b46b2e6 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 9 Mar 2013 16:30:07 +0900 Subject: [PATCH 0114/3163] powerpc: remove redundant GENERIC_GPIO selection ARCH_REQUIRE_GPIOLIB selects GENERIC_GPIO, so there is no need to select it explicitly. Signed-off-by: Alexandre Courbot Acked-by: Arnd Bergmann Acked-by: Linus Walleij --- arch/powerpc/platforms/40x/Kconfig | 1 - arch/powerpc/platforms/44x/Kconfig | 1 - arch/powerpc/platforms/85xx/Kconfig | 1 - arch/powerpc/platforms/86xx/Kconfig | 3 --- arch/powerpc/platforms/8xx/Kconfig | 1 - arch/powerpc/platforms/Kconfig | 4 ---- 6 files changed, 11 deletions(-) diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig index a392d12dd21f..d0b46a2fc85f 100644 --- a/arch/powerpc/platforms/40x/Kconfig +++ b/arch/powerpc/platforms/40x/Kconfig @@ -145,7 +145,6 @@ config PPC4xx_GPIO bool "PPC4xx GPIO support" depends on 40x select ARCH_REQUIRE_GPIOLIB - select GENERIC_GPIO help Enable gpiolib support for ppc40x based boards diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 0effe9f5a1ea..50306b8e7e6c 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -248,7 +248,6 @@ config PPC4xx_GPIO bool "PPC4xx GPIO support" depends on 44x select ARCH_REQUIRE_GPIOLIB - select GENERIC_GPIO help Enable gpiolib support for ppc440 based boards diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index a0dcd577fb0d..78549b6ebf30 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -203,7 +203,6 @@ config GE_IMP3A select DEFAULT_UIMAGE select SWIOTLB select MMIO_NVRAM - select GENERIC_GPIO select ARCH_REQUIRE_GPIOLIB select GE_FPGA help diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig index 7a6279e38213..1afd1e4a2dd2 100644 --- a/arch/powerpc/platforms/86xx/Kconfig +++ b/arch/powerpc/platforms/86xx/Kconfig @@ -37,7 +37,6 @@ config GEF_PPC9A bool "GE PPC9A" select DEFAULT_UIMAGE select MMIO_NVRAM - select GENERIC_GPIO select ARCH_REQUIRE_GPIOLIB select GE_FPGA help @@ -47,7 +46,6 @@ config GEF_SBC310 bool "GE SBC310" select DEFAULT_UIMAGE select MMIO_NVRAM - select GENERIC_GPIO select ARCH_REQUIRE_GPIOLIB select GE_FPGA help @@ -57,7 +55,6 @@ config GEF_SBC610 bool "GE SBC610" select DEFAULT_UIMAGE select MMIO_NVRAM - select GENERIC_GPIO select ARCH_REQUIRE_GPIOLIB select GE_FPGA select HAS_RAPIDIO diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index 1fb0b3cddeb3..8dec3c0911ad 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -114,7 +114,6 @@ config 8xx_COPYBACK config 8xx_GPIO bool "GPIO API Support" - select GENERIC_GPIO select ARCH_REQUIRE_GPIOLIB help Saying Y here will cause the ports on an MPC8xx processor to be used diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 52de8bccfb30..15095d58bc97 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -303,7 +303,6 @@ config QUICC_ENGINE config QE_GPIO bool "QE GPIO support" depends on QUICC_ENGINE - select GENERIC_GPIO select ARCH_REQUIRE_GPIOLIB help Say Y here if you're going to use hardware that connects to the @@ -316,7 +315,6 @@ config CPM2 select PPC_LIB_RHEAP select PPC_PCI_CHOICE select ARCH_REQUIRE_GPIOLIB - select GENERIC_GPIO help The CPM2 (Communications Processor Module) is a coprocessor on embedded CPUs made by Freescale. Selecting this option means that @@ -355,7 +353,6 @@ config OF_RTC config SIMPLE_GPIO bool "Support for simple, memory-mapped GPIO controllers" depends on PPC - select GENERIC_GPIO select ARCH_REQUIRE_GPIOLIB help Say Y here to support simple, memory-mapped GPIO controllers. @@ -366,7 +363,6 @@ config SIMPLE_GPIO config MCU_MPC8349EMITX bool "MPC8349E-mITX MCU driver" depends on I2C=y && PPC_83xx - select GENERIC_GPIO select ARCH_REQUIRE_GPIOLIB help Say Y here to enable soft power-off functionality on the Freescale -- GitLab From 95188aaf9fc81e9539606cad5c9579bd27604f92 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sun, 10 Mar 2013 21:33:13 +0900 Subject: [PATCH 0115/3163] sh: replace CONFIG_GENERIC_GPIO by CONFIG_GPIOLIB SH GPIO drivers all use gpiolib and CONFIG_GENERIC_GPIO is only selected through CONFIG_GPIOLIB, yet some compilation units depended on CONFIG_GENERIC_GPIO. Make them depend on CONFIG_GPIOLIB instead since it is more accurate and prepares us for the future removal of CONFIG_GENERIC_GPIO. Signed-off-by: Alexandre Courbot Acked-by: Paul Mundt Acked-by: Arnd Bergmann Acked-by: Linus Walleij --- arch/sh/boards/mach-sdk7786/Makefile | 2 +- arch/sh/boards/mach-x3proto/Makefile | 2 +- arch/sh/kernel/cpu/sh2a/Makefile | 2 +- arch/sh/kernel/cpu/sh3/Makefile | 2 +- arch/sh/kernel/cpu/sh4a/Makefile | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/sh/boards/mach-sdk7786/Makefile b/arch/sh/boards/mach-sdk7786/Makefile index 8ae56e9560ac..45d32e3590b9 100644 --- a/arch/sh/boards/mach-sdk7786/Makefile +++ b/arch/sh/boards/mach-sdk7786/Makefile @@ -1,4 +1,4 @@ obj-y := fpga.o irq.o nmi.o setup.o -obj-$(CONFIG_GENERIC_GPIO) += gpio.o +obj-$(CONFIG_GPIOLIB) += gpio.o obj-$(CONFIG_HAVE_SRAM_POOL) += sram.o diff --git a/arch/sh/boards/mach-x3proto/Makefile b/arch/sh/boards/mach-x3proto/Makefile index 708c21c919ff..0cbe3d02dea3 100644 --- a/arch/sh/boards/mach-x3proto/Makefile +++ b/arch/sh/boards/mach-x3proto/Makefile @@ -1,3 +1,3 @@ obj-y += setup.o ilsel.o -obj-$(CONFIG_GENERIC_GPIO) += gpio.o +obj-$(CONFIG_GPIOLIB) += gpio.o diff --git a/arch/sh/kernel/cpu/sh2a/Makefile b/arch/sh/kernel/cpu/sh2a/Makefile index 7fdc102d0dd6..990195d98456 100644 --- a/arch/sh/kernel/cpu/sh2a/Makefile +++ b/arch/sh/kernel/cpu/sh2a/Makefile @@ -21,4 +21,4 @@ pinmux-$(CONFIG_CPU_SUBTYPE_SH7203) := pinmux-sh7203.o pinmux-$(CONFIG_CPU_SUBTYPE_SH7264) := pinmux-sh7264.o pinmux-$(CONFIG_CPU_SUBTYPE_SH7269) := pinmux-sh7269.o -obj-$(CONFIG_GENERIC_GPIO) += $(pinmux-y) +obj-$(CONFIG_GPIOLIB) += $(pinmux-y) diff --git a/arch/sh/kernel/cpu/sh3/Makefile b/arch/sh/kernel/cpu/sh3/Makefile index 6f13f33a35ff..d3634ae7b71a 100644 --- a/arch/sh/kernel/cpu/sh3/Makefile +++ b/arch/sh/kernel/cpu/sh3/Makefile @@ -30,4 +30,4 @@ clock-$(CONFIG_CPU_SUBTYPE_SH7712) := clock-sh7712.o pinmux-$(CONFIG_CPU_SUBTYPE_SH7720) := pinmux-sh7720.o obj-y += $(clock-y) -obj-$(CONFIG_GENERIC_GPIO) += $(pinmux-y) +obj-$(CONFIG_GPIOLIB) += $(pinmux-y) diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile index 8fc6ec2be2fa..0705df775208 100644 --- a/arch/sh/kernel/cpu/sh4a/Makefile +++ b/arch/sh/kernel/cpu/sh4a/Makefile @@ -47,6 +47,6 @@ pinmux-$(CONFIG_CPU_SUBTYPE_SHX3) := pinmux-shx3.o obj-y += $(clock-y) obj-$(CONFIG_SMP) += $(smp-y) -obj-$(CONFIG_GENERIC_GPIO) += $(pinmux-y) +obj-$(CONFIG_GPIOLIB) += $(pinmux-y) obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += ubc.o -- GitLab From 28d6ecfc2bc2de72609c23dc31e3f1f2e6cdd22f Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sun, 10 Mar 2013 21:48:09 +0900 Subject: [PATCH 0116/3163] xtensa: remove explicit selection of GENERIC_GPIO The Xtensa GPIO driver uses gpiolib and cannot compile with only GENERIC_GPIO. Signed-off-by: Alexandre Courbot Acked-by: Arnd Bergmann Acked-by: Linus Walleij --- arch/xtensa/configs/iss_defconfig | 1 - arch/xtensa/configs/s6105_defconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig index ddab37b24741..77c52f80187a 100644 --- a/arch/xtensa/configs/iss_defconfig +++ b/arch/xtensa/configs/iss_defconfig @@ -10,7 +10,6 @@ CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_HARDIRQS=y -CONFIG_GENERIC_GPIO=y # CONFIG_ARCH_HAS_ILOG2_U32 is not set # CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_NO_IOPORT=y diff --git a/arch/xtensa/configs/s6105_defconfig b/arch/xtensa/configs/s6105_defconfig index eaf1b8fc6556..4799c6a526b5 100644 --- a/arch/xtensa/configs/s6105_defconfig +++ b/arch/xtensa/configs/s6105_defconfig @@ -10,7 +10,6 @@ CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_HARDIRQS=y -CONFIG_GENERIC_GPIO=y # CONFIG_ARCH_HAS_ILOG2_U32 is not set # CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_NO_IOPORT=y -- GitLab From 85b0987ee263e5317d13727ad85b881b91636be5 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 12 Mar 2013 16:50:39 +0900 Subject: [PATCH 0117/3163] avr32: default GENERIC_GPIO to false This will help removing the GENERIC_GPIO option. GPIO access is done through gpiolib, which selects GENERIC_GPIO anyway. Signed-off-by: Alexandre Courbot Acked-by: Arnd Bergmann Acked-by: Linus Walleij --- arch/avr32/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index c1a868d398bd..d22c6b46601c 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -27,7 +27,7 @@ config AVR32 http://avr32linux.org/. config GENERIC_GPIO - def_bool y + def_bool n config STACKTRACE_SUPPORT def_bool y -- GitLab From 5fa0a8cb45257c165e4e8ee7dfccfc605bffae90 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 12 Mar 2013 18:06:51 +0900 Subject: [PATCH 0118/3163] openrisc: default GENERIC_GPIO to false This is one step towards the removal of the GENERIC_GPIO option. OpenRISC mandates the use of GPIOLIB, which enables GENERIC_GPIO anyway, so this patch should be a no-op. Signed-off-by: Alexandre Courbot Acked-by: Arnd Bergmann Acked-by: Linus Walleij Acked-by: Jonas Bonn --- arch/openrisc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 9ab3bf2eca8d..e32440268899 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -45,7 +45,7 @@ config NO_IOPORT def_bool y config GENERIC_GPIO - def_bool y + def_bool n config TRACE_IRQFLAGS_SUPPORT def_bool y -- GitLab From eb04eb268d337db274bb675f31f690ba3ae5a346 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 11 Mar 2013 19:18:25 +0900 Subject: [PATCH 0119/3163] mips: pnx833x: remove requirement for GENERIC_GPIO pnx833x does not seem to use the generic gpio API. Signed-off-by: Alexandre Courbot Acked-by: Arnd Bergmann Acked-by: Linus Walleij --- arch/mips/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 50cded3714af..ff0e563206cf 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1107,7 +1107,6 @@ config SOC_PNX833X select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_LITTLE_ENDIAN select SYS_SUPPORTS_BIG_ENDIAN - select GENERIC_GPIO select CPU_MIPSR2_IRQ_VI config SOC_PNX8335 -- GitLab From 04b66839d312d3bdaff77f265eb7305347fa1fb7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 19 Mar 2013 16:30:26 +0100 Subject: [PATCH 0120/3163] KVM: x86: correctly initialize the CS base on reset The CS base was initialized to 0 on VMX (wrong, but usually overridden by userspace before starting) or 0xf0000 on SVM. The correct value is 0xffff0000, and VMX is able to emulate it now, so use it. Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 8 +------- arch/x86/kvm/vmx.c | 1 + 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7219a4012a0e..7a46c1f46861 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1131,17 +1131,11 @@ static void init_vmcb(struct vcpu_svm *svm) init_seg(&save->gs); save->cs.selector = 0xf000; + save->cs.base = 0xffff0000; /* Executable/Readable Code Segment */ save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; save->cs.limit = 0xffff; - /* - * cs.base should really be 0xffff0000, but vmx can't handle that, so - * be consistent with it. - * - * Replace when we have real mode working for vmx. - */ - save->cs.base = 0xf0000; save->gdtr.limit = 0xffff; save->idtr.limit = 0xffff; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ad978a6c282e..03f574641852 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4113,6 +4113,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) seg_setup(VCPU_SREG_CS); vmcs_write16(GUEST_CS_SELECTOR, 0xf000); + vmcs_write32(GUEST_CS_BASE, 0xffff0000); seg_setup(VCPU_SREG_DS); seg_setup(VCPU_SREG_ES); -- GitLab From 765278b7d5e7d0f55b5060716ad45991be5cceb5 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 12 Mar 2013 17:54:50 +0900 Subject: [PATCH 0121/3163] m68k: coldfire: use gpiolib Force use of gpiolib for Coldfire, as a step towards the deprecation of GENERIC_GPIO. Signed-off-by: Alexandre Courbot Acked-by: Arnd Bergmann Acked-by: Linus Walleij Acked-by: Greg Ungerer --- arch/m68k/Kconfig.cpu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index b1cfff832fb5..d266787725b4 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -22,8 +22,7 @@ config M68KCLASSIC config COLDFIRE bool "Coldfire CPU family support" - select GENERIC_GPIO - select ARCH_WANT_OPTIONAL_GPIOLIB + select ARCH_REQUIRE_GPIOLIB select ARCH_HAVE_CUSTOM_GPIO_H select CPU_HAS_NO_BITFIELDS select CPU_HAS_NO_MULDIV64 -- GitLab From a2523d3cdab76fbda28414fd82743815f3cc3df7 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 12 Mar 2013 18:04:08 +0900 Subject: [PATCH 0122/3163] blackfin: force use of gpiolib Set the GENERIC_GPIO option to false by default and force the use of gpiolib instead of making it optional, to prepare for the removal of GENERIC_GPIO. Signed-off-by: Alexandre Courbot Acked-by: Arnd Bergmann Acked-by: Linus Walleij Acked-by: Steven Miao --- arch/blackfin/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index c3f2e0bc644a..20e203aef0fe 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -31,7 +31,7 @@ config BLACKFIN select HAVE_OPROFILE select HAVE_PERF_EVENTS select ARCH_HAVE_CUSTOM_GPIO_H - select ARCH_WANT_OPTIONAL_GPIOLIB + select ARCH_REQUIRE_GPIOLIB select HAVE_UID16 select VIRT_TO_BUS select ARCH_WANT_IPC_PARSE_VERSION @@ -56,7 +56,7 @@ config ZONE_DMA def_bool y config GENERIC_GPIO - def_bool y + def_bool n config FORCE_MAX_ZONEORDER int -- GitLab From 7ddca7e43c8f28f9419da81a0e7730b66aa60fe9 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Thu, 21 Mar 2013 19:33:43 +0900 Subject: [PATCH 0123/3163] KVM: MMU: Move kvm_mmu_free_some_pages() into kvm_mmu_alloc_page() What this function is doing is to ensure that the number of shadow pages does not exceed the maximum limit stored in n_max_mmu_pages: so this is placed at every code path that can reach kvm_mmu_alloc_page(). Although it might have some sense to spread this function in each such code path when it could be called before taking mmu_lock, the rule was changed not to do so. Taking this background into account, this patch moves it into kvm_mmu_alloc_page() and simplifies the code. Note: the unlikely hint in kvm_mmu_free_some_pages() guarantees that the overhead of this function is almost zero except when we actually need to allocate some shadow pages, so we do not need to care about calling it multiple times in one path by doing kvm_mmu_get_page() a few times. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 9 +++------ arch/x86/kvm/paging_tmpl.h | 1 - 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c1a9b7b08ab7..38f34c5361f4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1505,6 +1505,9 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte, int direct) { struct kvm_mmu_page *sp; + + kvm_mmu_free_some_pages(vcpu); + sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); if (!direct) @@ -2842,7 +2845,6 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; - kvm_mmu_free_some_pages(vcpu); if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, @@ -2920,7 +2922,6 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { spin_lock(&vcpu->kvm->mmu_lock); - kvm_mmu_free_some_pages(vcpu); sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL, NULL); ++sp->root_count; @@ -2932,7 +2933,6 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); spin_lock(&vcpu->kvm->mmu_lock); - kvm_mmu_free_some_pages(vcpu); sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL, @@ -2971,7 +2971,6 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); spin_lock(&vcpu->kvm->mmu_lock); - kvm_mmu_free_some_pages(vcpu); sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, 0, ACC_ALL, NULL); root = __pa(sp->spt); @@ -3005,7 +3004,6 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) return 1; } spin_lock(&vcpu->kvm->mmu_lock); - kvm_mmu_free_some_pages(vcpu); sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, 0, ACC_ALL, NULL); @@ -3311,7 +3309,6 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; - kvm_mmu_free_some_pages(vcpu); if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, gpa, write, map_writable, diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 105dd5bd550e..af143f065532 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -627,7 +627,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, goto out_unlock; kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); - kvm_mmu_free_some_pages(vcpu); if (!force_pt_level) transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); r = FNAME(fetch)(vcpu, addr, &walker, write_fault, -- GitLab From 81f4f76bbc712a2dff8bb020057c554e285370e1 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Thu, 21 Mar 2013 19:34:27 +0900 Subject: [PATCH 0124/3163] KVM: MMU: Rename kvm_mmu_free_some_pages() to make_mmu_pages_available() The current name "kvm_mmu_free_some_pages" should be used for something that actually frees some shadow pages, as we expect from the name, but what the function is doing is to make some, KVM_MIN_FREE_MMU_PAGES, shadow pages available: it does nothing when there are enough. This patch changes the name to reflect this meaning better; while doing this renaming, the code in the wrapper function is inlined into the main body since the whole function will be inlined into the only caller now. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 9 +++++++-- arch/x86/kvm/mmu.h | 6 ------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 38f34c5361f4..633e30cfbd63 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1501,12 +1501,14 @@ static void drop_parent_pte(struct kvm_mmu_page *sp, mmu_spte_clear_no_track(parent_pte); } +static void make_mmu_pages_available(struct kvm_vcpu *vcpu); + static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte, int direct) { struct kvm_mmu_page *sp; - kvm_mmu_free_some_pages(vcpu); + make_mmu_pages_available(vcpu); sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); @@ -4010,10 +4012,13 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) } EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); -void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) +static void make_mmu_pages_available(struct kvm_vcpu *vcpu) { LIST_HEAD(invalid_list); + if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES)) + return; + while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) break; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 3b1ad0049ea4..2adcbc2cac6d 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -64,12 +64,6 @@ static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) return 0; } -static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) -{ - if (unlikely(kvm_mmu_available_pages(vcpu->kvm)< KVM_MIN_FREE_MMU_PAGES)) - __kvm_mmu_free_some_pages(vcpu); -} - static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) { if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE)) -- GitLab From d26f22c9cdfa935e674b2ff747dbcfaf9fa048f8 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Sun, 24 Feb 2013 18:57:11 +0000 Subject: [PATCH 0125/3163] KVM: PPC: move tsr update in a separate function This is done so that same function can be called from SREGS and ONE_REG interface (follow up patch). Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index eb88fa621073..f2fd47d35ab5 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1148,6 +1148,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, return r; } +static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr) +{ + u32 old_tsr = vcpu->arch.tsr; + + vcpu->arch.tsr = new_tsr; + + if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) + arm_next_watchdog(vcpu); + + update_timer_ints(vcpu); +} + /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { @@ -1287,16 +1299,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, kvmppc_emulate_dec(vcpu); } - if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { - u32 old_tsr = vcpu->arch.tsr; - - vcpu->arch.tsr = sregs->u.e.tsr; - - if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) - arm_next_watchdog(vcpu); - - update_timer_ints(vcpu); - } + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) + kvmppc_set_tsr(vcpu, sregs->u.e.tsr); return 0; } -- GitLab From 78accda4f888c77122cf3da6185f905d4677eb07 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Sun, 24 Feb 2013 18:57:12 +0000 Subject: [PATCH 0126/3163] KVM: PPC: Added one_reg interface for timer registers If userspace wants to change some specific bits of TSR (timer status register) then it uses GET/SET_SREGS ioctl interface. So the steps will be: i) user-space will make get ioctl, ii) change TSR in userspace iii) then make set ioctl. It can happen that TSR gets changed by kernel after step i) and before step iii). To avoid this we have added below one_reg ioctls for oring and clearing specific bits in TSR. This patch adds one registerface for: 1) setting specific bit in TSR (timer status register) 2) clearing specific bit in TSR (timer status register) 3) setting/getting the TCR register. There are cases where we want to only change TCR and not TSR. Although we can uses SREGS without KVM_SREGS_E_UPDATE_TSR flag but I think one reg is better. I am open if someone feels we should use SREGS only here. 4) getting/setting TSR register Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 4 ++++ arch/powerpc/include/uapi/asm/kvm.h | 5 +++++ arch/powerpc/kvm/booke.c | 30 +++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index c16b442556e8..976eb650e7ef 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1788,6 +1788,10 @@ registers, find a list below: PPC | KVM_REG_PPC_VPA_DTL | 128 PPC | KVM_REG_PPC_EPCR | 32 PPC | KVM_REG_PPC_EPR | 32 + PPC | KVM_REG_PPC_TCR | 32 + PPC | KVM_REG_PPC_TSR | 32 + PPC | KVM_REG_PPC_OR_TSR | 32 + PPC | KVM_REG_PPC_CLEAR_TSR | 32 ARM registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 16064d00adb9..ef072b1a6e3f 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -417,4 +417,9 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85) #define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86) +/* Timer Status Register OR/CLEAR interface */ +#define KVM_REG_PPC_OR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x87) +#define KVM_REG_PPC_CLEAR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88) +#define KVM_REG_PPC_TCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89) +#define KVM_REG_PPC_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index f2fd47d35ab5..11825539e2b1 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1442,6 +1442,12 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr); break; #endif + case KVM_REG_PPC_TCR: + r = put_user(vcpu->arch.tcr, (u32 __user *)(long)reg->addr); + break; + case KVM_REG_PPC_TSR: + r = put_user(vcpu->arch.tsr, (u32 __user *)(long)reg->addr); + break; default: break; } @@ -1485,6 +1491,30 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) break; } #endif + case KVM_REG_PPC_OR_TSR: { + u32 tsr_bits; + r = get_user(tsr_bits, (u32 __user *)(long)reg->addr); + kvmppc_set_tsr_bits(vcpu, tsr_bits); + break; + } + case KVM_REG_PPC_CLEAR_TSR: { + u32 tsr_bits; + r = get_user(tsr_bits, (u32 __user *)(long)reg->addr); + kvmppc_clr_tsr_bits(vcpu, tsr_bits); + break; + } + case KVM_REG_PPC_TSR: { + u32 tsr; + r = get_user(tsr, (u32 __user *)(long)reg->addr); + kvmppc_set_tsr(vcpu, tsr); + break; + } + case KVM_REG_PPC_TCR: { + u32 tcr; + r = get_user(tcr, (u32 __user *)(long)reg->addr); + kvmppc_set_tcr(vcpu, tcr); + break; + } default: break; } -- GitLab From 15b708beee6841e0a59ded702c8bfe3042a5b5a4 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 27 Feb 2013 18:13:10 +0000 Subject: [PATCH 0127/3163] KVM: PPC: booke: Added debug handler Installed debug handler will be used for guest debug support and debug facility emulation features (patches for these features will follow this patch). Signed-off-by: Liu Yu [bharat.bhushan@freescale.com: Substantial changes] Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/booke_interrupts.S | 42 ++++++++++++++++++++++++++--- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d1bb86074721..e34f8fee9080 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -504,6 +504,7 @@ struct kvm_vcpu_arch { u32 tlbcfg[4]; u32 mmucfg; u32 epr; + u32 crit_save; struct kvmppc_booke_debug_reg dbg_reg; #endif gpa_t paddr_accessed; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index b6c17ec9b169..d87c90886c75 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -596,6 +596,7 @@ int main(void) DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); + DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save)); #endif /* CONFIG_PPC_BOOK3S */ #endif /* CONFIG_KVM */ diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index f4bb55c96517..2c6deb5ef2fe 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -54,8 +54,7 @@ (1< Date: Wed, 13 Feb 2013 19:37:48 +0000 Subject: [PATCH 0128/3163] kvm/ppc/e500: h2g_tlb1_rmap: esel 0 is valid Add one to esel values in h2g_tlb1_rmap, so that "no mapping" can be distinguished from "esel 0". Note that we're not saved by the fact that host esel 0 is reserved for non-KVM use, because KVM host esel numbering is not the raw host numbering (see to_htlb1_esel). Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_mmu_host.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index a222edfb9a9b..35fb80ec1f57 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -511,10 +511,10 @@ static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; if (vcpu_e500->h2g_tlb1_rmap[sesel]) { - unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel]; + unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel] - 1; vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); } - vcpu_e500->h2g_tlb1_rmap[sesel] = esel; + vcpu_e500->h2g_tlb1_rmap[sesel] = esel + 1; return sesel; } -- GitLab From 36ada4f4317e27bf52f52aef5c72f553eef08f4a Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 13 Feb 2013 19:37:49 +0000 Subject: [PATCH 0129/3163] kvm/ppc/e500: g2h_tlb1_map: clear old bit before setting new bit It's possible that we're using the same host TLB1 slot to map (a presumably different portion of) the same guest TLB1 entry. Clear the bit in the map before setting it, so that if the esels are the same the bit will remain set. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_mmu_host.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 35fb80ec1f57..8e72b2124f63 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -507,13 +507,14 @@ static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) vcpu_e500->host_tlb1_nv = 0; - vcpu_e500->tlb_refs[1][sesel] = *ref; - vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; - vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; if (vcpu_e500->h2g_tlb1_rmap[sesel]) { unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel] - 1; vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); } + + vcpu_e500->tlb_refs[1][sesel] = *ref; + vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; + vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; vcpu_e500->h2g_tlb1_rmap[sesel] = esel + 1; return sesel; -- GitLab From 47bf379742bf1baad9624e203912b72c3fa9c80a Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 6 Mar 2013 16:02:49 +0000 Subject: [PATCH 0130/3163] kvm/ppc/e500: eliminate tlb_refs Commit 523f0e5421c12610527c620b983b443f329e3a32 ("KVM: PPC: E500: Explicitly mark shadow maps invalid") began using E500_TLB_VALID for guest TLB1 entries, and skipping invalidations if it's not set. However, when E500_TLB_VALID was set for such entries, it was on a fake local ref, and so the invalidations never happen. gtlb_privs is documented as being only for guest TLB0, though we already violate that with E500_TLB_BITMAP. Now that we have MMU notifiers, and thus don't need to actually retain a reference to the mapped pages, get rid of tlb_refs, and use gtlb_privs for E500_TLB_VALID in TLB1. Since we can have more than one host TLB entry for a given tlbe_ref, be careful not to clear existing flags that are relevant to other host TLB entries when preparing a new host TLB entry. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500.h | 24 ++++------ arch/powerpc/kvm/e500_mmu_host.c | 75 ++++++++++---------------------- 2 files changed, 30 insertions(+), 69 deletions(-) diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 41cefd43655f..33db48a8ce24 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -26,17 +26,20 @@ #define E500_PID_NUM 3 #define E500_TLB_NUM 2 -#define E500_TLB_VALID 1 -#define E500_TLB_BITMAP 2 +/* entry is mapped somewhere in host TLB */ +#define E500_TLB_VALID (1 << 0) +/* TLB1 entry is mapped by host TLB1, tracked by bitmaps */ +#define E500_TLB_BITMAP (1 << 1) +/* TLB1 entry is mapped by host TLB0 */ #define E500_TLB_TLB0 (1 << 2) struct tlbe_ref { - pfn_t pfn; - unsigned int flags; /* E500_TLB_* */ + pfn_t pfn; /* valid only for TLB0, except briefly */ + unsigned int flags; /* E500_TLB_* */ }; struct tlbe_priv { - struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */ + struct tlbe_ref ref; }; #ifdef CONFIG_KVM_E500V2 @@ -63,17 +66,6 @@ struct kvmppc_vcpu_e500 { unsigned int gtlb_nv[E500_TLB_NUM]; - /* - * information associated with each host TLB entry -- - * TLB1 only for now. If/when guest TLB1 entries can be - * mapped with host TLB0, this will be used for that too. - * - * We don't want to use this for guest TLB0 because then we'd - * have the overhead of doing the translation again even if - * the entry is still in the guest TLB (e.g. we swapped out - * and back, and our host TLB entries got evicted). - */ - struct tlbe_ref *tlb_refs[E500_TLB_NUM]; unsigned int host_tlb1_nv; u32 svr; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 8e72b2124f63..1c6a9d729df4 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -193,8 +193,11 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][esel].ref; /* Don't bother with unmapped entries */ - if (!(ref->flags & E500_TLB_VALID)) - return; + if (!(ref->flags & E500_TLB_VALID)) { + WARN(ref->flags & (E500_TLB_BITMAP | E500_TLB_TLB0), + "%s: flags %x\n", __func__, ref->flags); + WARN_ON(tlbsel == 1 && vcpu_e500->g2h_tlb1_map[esel]); + } if (tlbsel == 1 && ref->flags & E500_TLB_BITMAP) { u64 tmp = vcpu_e500->g2h_tlb1_map[esel]; @@ -248,7 +251,7 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, pfn_t pfn) { ref->pfn = pfn; - ref->flags = E500_TLB_VALID; + ref->flags |= E500_TLB_VALID; if (tlbe_is_writable(gtlbe)) kvm_set_pfn_dirty(pfn); @@ -257,6 +260,7 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) { if (ref->flags & E500_TLB_VALID) { + /* FIXME: don't log bogus pfn for TLB1 */ trace_kvm_booke206_ref_release(ref->pfn, ref->flags); ref->flags = 0; } @@ -274,36 +278,23 @@ static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) { - int tlbsel = 0; - int i; - - for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { - struct tlbe_ref *ref = - &vcpu_e500->gtlb_priv[tlbsel][i].ref; - kvmppc_e500_ref_release(ref); - } -} - -static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - int stlbsel = 1; + int tlbsel; int i; - kvmppc_e500_tlbil_all(vcpu_e500); - - for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { - struct tlbe_ref *ref = - &vcpu_e500->tlb_refs[stlbsel][i]; - kvmppc_e500_ref_release(ref); + for (tlbsel = 0; tlbsel <= 1; tlbsel++) { + for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { + struct tlbe_ref *ref = + &vcpu_e500->gtlb_priv[tlbsel][i].ref; + kvmppc_e500_ref_release(ref); + } } - - clear_tlb_privs(vcpu_e500); } void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - clear_tlb_refs(vcpu_e500); + kvmppc_e500_tlbil_all(vcpu_e500); + clear_tlb_privs(vcpu_e500); clear_tlb1_bitmap(vcpu_e500); } @@ -458,8 +449,6 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); } - /* Drop old ref and setup new one. */ - kvmppc_e500_ref_release(ref); kvmppc_e500_ref_setup(ref, gtlbe, pfn); kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, @@ -512,10 +501,10 @@ static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); } - vcpu_e500->tlb_refs[1][sesel] = *ref; vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; vcpu_e500->h2g_tlb1_rmap[sesel] = esel + 1; + WARN_ON(!(ref->flags & E500_TLB_VALID)); return sesel; } @@ -527,13 +516,12 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, struct kvm_book3e_206_tlb_entry *stlbe, int esel) { - struct tlbe_ref ref; + struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[1][esel].ref; int sesel; int r; - ref.flags = 0; r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, - &ref); + ref); if (r) return r; @@ -545,7 +533,7 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, } /* Otherwise map into TLB1 */ - sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, &ref, esel); + sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, ref, esel); write_stlbe(vcpu_e500, gtlbe, stlbe, 1, sesel); return 0; @@ -566,7 +554,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, case 0: priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; - /* Triggers after clear_tlb_refs or on initial mapping */ + /* Triggers after clear_tlb_privs or on initial mapping */ if (!(priv->ref.flags & E500_TLB_VALID)) { kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); } else { @@ -666,35 +654,16 @@ int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500) host_tlb_params[0].entries / host_tlb_params[0].ways; host_tlb_params[1].sets = 1; - vcpu_e500->tlb_refs[0] = - kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries, - GFP_KERNEL); - if (!vcpu_e500->tlb_refs[0]) - goto err; - - vcpu_e500->tlb_refs[1] = - kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries, - GFP_KERNEL); - if (!vcpu_e500->tlb_refs[1]) - goto err; - vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) * host_tlb_params[1].entries, GFP_KERNEL); if (!vcpu_e500->h2g_tlb1_rmap) - goto err; + return -EINVAL; return 0; - -err: - kfree(vcpu_e500->tlb_refs[0]); - kfree(vcpu_e500->tlb_refs[1]); - return -EINVAL; } void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) { kfree(vcpu_e500->h2g_tlb1_rmap); - kfree(vcpu_e500->tlb_refs[0]); - kfree(vcpu_e500->tlb_refs[1]); } -- GitLab From 4fe27d2addda8af7714546a69369fb92dddcf9a3 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 14 Feb 2013 14:00:25 +0000 Subject: [PATCH 0131/3163] KVM: PPC: Remove unused argument to kvmppc_core_dequeue_external Currently kvmppc_core_dequeue_external() takes a struct kvm_interrupt * argument and does nothing with it, in any of its implementations. This removes it in order to make things easier for forthcoming in-kernel interrupt controller emulation code. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 3 +-- arch/powerpc/kvm/book3s.c | 3 +-- arch/powerpc/kvm/booke.c | 3 +-- arch/powerpc/kvm/powerpc.c | 2 +- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 44fa9ad1d62c..f58930779ae8 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -104,8 +104,7 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); -extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq); +extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index a4b645285240..6548445fd823 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -160,8 +160,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_book3s_queue_irqprio(vcpu, vec); } -void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq) +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) { kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 11825539e2b1..58057d6f146d 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -222,8 +222,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, prio); } -void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq) +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) { clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 7b5d4d20cdc5..16b45954511c 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -739,7 +739,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { if (irq->irq == KVM_INTERRUPT_UNSET) { - kvmppc_core_dequeue_external(vcpu, irq); + kvmppc_core_dequeue_external(vcpu); return 0; } -- GitLab From 1bf0f20bcc153f4f97bbcc52cd7f0596b427ef34 Mon Sep 17 00:00:00 2001 From: Philip Avinash Date: Wed, 20 Mar 2013 12:09:24 +0530 Subject: [PATCH 0132/3163] pwm: davinci: Add Kconfig support for ECAP & EHRPWM devices Add EHRPWM and ECAP support build support for DAVINCI_DA8XX platforms. Also, since DAVINCI platforms doesn't support TI-PWM-Subsystem module, remove the select option for CONFIG_PWM_TIPWMSS. Also, update CONFIG_PWM_TIPWMSS compiler directive appropriately in pwm-tipwmss.h to fix the below compiler error upon removal of CONFIG_PWM_TIPWMSS for DAVINCI platforms. drivers/pwm/pwm-tiecap.c: In function 'ecap_pwm_probe': drivers/pwm/pwm-tiecap.c:263:4: error: 'PWMSS_ECAPCLK_EN' undeclared (first use in this function) drivers/pwm/pwm-tiecap.c:263:4: note: each undeclared identifier is reported only once for each function it appears in drivers/pwm/pwm-tiecap.c:264:17: error: 'PWMSS_ECAPCLK_EN_ACK' undeclared (first use in this function) drivers/pwm/pwm-tiecap.c: In function 'ecap_pwm_remove': drivers/pwm/pwm-tiecap.c:291:49: error: 'PWMSS_ECAPCLK_STOP_REQ' undeclared (first use in this function) make[2]: *** [drivers/pwm/pwm-tiecap.o] Error 1 make[1]: *** [drivers/pwm] Error 2 make: *** [drivers] Error 2 Signed-off-by: Philip Avinash Signed-off-by: Thierry Reding --- drivers/pwm/Kconfig | 8 +++----- drivers/pwm/pwm-tipwmss.h | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index 0e0bfa035083..115b64453493 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -147,8 +147,7 @@ config PWM_TEGRA config PWM_TIECAP tristate "ECAP PWM support" - depends on SOC_AM33XX - select PWM_TIPWMSS + depends on SOC_AM33XX || ARCH_DAVINCI_DA8XX help PWM driver support for the ECAP APWM controller found on AM33XX TI SOC @@ -158,8 +157,7 @@ config PWM_TIECAP config PWM_TIEHRPWM tristate "EHRPWM PWM support" - depends on SOC_AM33XX - select PWM_TIPWMSS + depends on SOC_AM33XX || ARCH_DAVINCI_DA8XX help PWM driver support for the EHRPWM controller found on AM33XX TI SOC @@ -169,7 +167,7 @@ config PWM_TIEHRPWM config PWM_TIPWMSS bool - depends on SOC_AM33XX && (PWM_TIEHRPWM || PWM_TIECAP) + default y if SOC_AM33XX && (PWM_TIECAP || PWM_TIEHRPWM) help PWM Subsystem driver support for AM33xx SOC. diff --git a/drivers/pwm/pwm-tipwmss.h b/drivers/pwm/pwm-tipwmss.h index 11f76a1e266b..10ad8040408b 100644 --- a/drivers/pwm/pwm-tipwmss.h +++ b/drivers/pwm/pwm-tipwmss.h @@ -18,7 +18,6 @@ #ifndef __TIPWMSS_H #define __TIPWMSS_H -#ifdef CONFIG_PWM_TIPWMSS /* PWM substem clock gating */ #define PWMSS_ECAPCLK_EN BIT(0) #define PWMSS_ECAPCLK_STOP_REQ BIT(1) @@ -28,6 +27,7 @@ #define PWMSS_ECAPCLK_EN_ACK BIT(0) #define PWMSS_EPWMCLK_EN_ACK BIT(8) +#ifdef CONFIG_PWM_TIPWMSS extern u16 pwmss_submodule_state_change(struct device *dev, int set); #else static inline u16 pwmss_submodule_state_change(struct device *dev, int set) -- GitLab From 9114d79569a3fb858a7ecb1f21cb1dec93dc2f21 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:42 +0100 Subject: [PATCH 0133/3163] drbd: cleanup bogus assert message This fixes ASSERT( mdev->state.disk == D_FAILED ) in drivers/block/drbd/drbd_main.c When we detach from local disk, we let the local refcount hit zero twice. First, we transition to D_FAILED, so we won't give out new references to incoming requests; we still may give out *internal* references, though. Once the refcount hits zero [1] while in D_FAILED, we queue a transition to D_DISKLESS to our worker. We need to queue it, because we may be in atomic context when putting the reference. Once the transition to D_DISKLESS actually happened [2] from worker context, we don't give out new internal references either. Between hitting zero the first time [1] and actually transition to D_DISKLESS [2], there may be a few very short lived internal get/put, so we may hit zero more than once while being in D_FAILED, or even see a race where a an internal get_ldev() happened while D_FAILED, but the corresponding put_ldev() happens just after the transition to D_DISKLESS. That's why we have the additional test_and_set_bit(GO_DISKLESS,); and that's why the assert was placed wrong. Since there was exactly one code path left to drbd_go_diskless(), and that checks already for D_FAILED, drop that assert, and fold in the drbd_queue_work(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 7 ++++--- drivers/block/drbd/drbd_main.c | 7 ------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6b51afa1aae1..db504d021a6e 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1148,7 +1148,6 @@ extern int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, char *why, enum bm_flag flags); extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); -extern void drbd_go_diskless(struct drbd_conf *mdev); extern void drbd_ldev_destroy(struct drbd_conf *mdev); /* Meta data layout @@ -2053,9 +2052,11 @@ static inline void put_ldev(struct drbd_conf *mdev) if (mdev->state.disk == D_DISKLESS) /* even internal references gone, safe to destroy */ drbd_ldev_destroy(mdev); - if (mdev->state.disk == D_FAILED) + if (mdev->state.disk == D_FAILED) { /* all application IO references gone. */ - drbd_go_diskless(mdev); + if (!test_and_set_bit(GO_DISKLESS, &mdev->flags)) + drbd_queue_work(&mdev->tconn->sender_work, &mdev->go_diskless); + } wake_up(&mdev->misc_wait); } } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e98da675f0c1..731a28eedc56 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3252,13 +3252,6 @@ static int w_go_diskless(struct drbd_work *w, int unused) return 0; } -void drbd_go_diskless(struct drbd_conf *mdev) -{ - D_ASSERT(mdev->state.disk == D_FAILED); - if (!test_and_set_bit(GO_DISKLESS, &mdev->flags)) - drbd_queue_work(&mdev->tconn->sender_work, &mdev->go_diskless); -} - /** * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap * @mdev: DRBD device. -- GitLab From ae8bf312e97d554b6aa32e7b2ceb993812ad0835 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:43 +0100 Subject: [PATCH 0134/3163] drbd: cleanup ondisk meta data layout calculations and defines Add a comment about our meta data layout variants, and rename a few defines (e.g. MD_RESERVED_SECT -> MD_128MB_SECT) to make it clear that they are short hand for fixed constants, and not arbitrarily to be redefined as one may see fit. Properly pad struct meta_data_on_disk to 4kB, and initialize to zero not only the first 512 Byte, but all of it in drbd_md_sync(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 28 ++++++++--- drivers/block/drbd/drbd_bitmap.c | 13 ++++- drivers/block/drbd/drbd_int.h | 86 +++++++++++++++++++------------- drivers/block/drbd/drbd_main.c | 11 ++-- drivers/block/drbd/drbd_nl.c | 42 ++++++++++++---- 5 files changed, 123 insertions(+), 57 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 92510f8ad013..b230d91ec430 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -209,7 +209,8 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, current->comm, current->pid, __func__, (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); - err = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE); + /* we do all our meta data IO in aligned 4k blocks. */ + err = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, 4096); if (err) { dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n", (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err); @@ -350,6 +351,24 @@ static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) (BM_EXT_SHIFT - BM_BLOCK_SHIFT)); } +static sector_t al_tr_number_to_on_disk_sector(struct drbd_conf *mdev) +{ + const unsigned int stripes = 1; + const unsigned int stripe_size_4kB = MD_32kB_SECT/MD_4kB_SECT; + + /* transaction number, modulo on-disk ring buffer wrap around */ + unsigned int t = mdev->al_tr_number % (stripe_size_4kB * stripes); + + /* ... to aligned 4k on disk block */ + t = ((t % stripes) * stripe_size_4kB) + t/stripes; + + /* ... to 512 byte sector in activity log */ + t *= 8; + + /* ... plus offset to the on disk position */ + return mdev->ldev->md.md_offset + mdev->ldev->md.al_offset + t; +} + static int _al_write_transaction(struct drbd_conf *mdev) { @@ -432,13 +451,12 @@ _al_write_transaction(struct drbd_conf *mdev) if (mdev->al_tr_cycle >= mdev->act_log->nr_elements) mdev->al_tr_cycle = 0; - sector = mdev->ldev->md.md_offset - + mdev->ldev->md.al_offset - + mdev->al_tr_pos * (MD_BLOCK_SIZE>>9); + sector = al_tr_number_to_on_disk_sector(mdev); crc = crc32c(0, buffer, 4096); buffer->crc32c = cpu_to_be32(crc); + /* normal execution path goes through all three branches */ if (drbd_bm_write_hinted(mdev)) err = -EIO; /* drbd_chk_io_error done already */ @@ -446,8 +464,6 @@ _al_write_transaction(struct drbd_conf *mdev) err = -EIO; drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); } else { - /* advance ringbuffer position and transaction counter */ - mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); mdev->al_tr_number++; } diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 8dc29502dc08..64fbb8385cdc 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -612,6 +612,17 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) } } +/* For the layout, see comment above drbd_md_set_sector_offsets(). */ +static u64 drbd_md_on_disk_bits(struct drbd_backing_dev *ldev) +{ + u64 bitmap_sectors; + if (ldev->md.al_offset == 8) + bitmap_sectors = ldev->md.md_size_sect - ldev->md.bm_offset; + else + bitmap_sectors = ldev->md.al_offset - ldev->md.bm_offset; + return bitmap_sectors << (9 + 3); +} + /* * make sure the bitmap has enough room for the attached storage, * if necessary, resize. @@ -668,7 +679,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) words = ALIGN(bits, 64) >> LN2_BPL; if (get_ldev(mdev)) { - u64 bits_on_disk = ((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12; + u64 bits_on_disk = drbd_md_on_disk_bits(mdev->ldev); put_ldev(mdev); if (bits > bits_on_disk) { dev_info(DEV, "bits = %lu\n", bits); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index db504d021a6e..60c89e5b298c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -753,13 +753,8 @@ struct drbd_md { u32 flags; u32 md_size_sect; - s32 al_offset; /* signed relative sector offset to al area */ + s32 al_offset; /* signed relative sector offset to activity log */ s32 bm_offset; /* signed relative sector offset to bitmap */ - - /* u32 al_nr_extents; important for restoring the AL - * is stored into ldev->dc.al_extents, which in turn - * gets applied to act_log->nr_elements - */ }; struct drbd_backing_dev { @@ -1009,7 +1004,6 @@ struct drbd_conf { struct lru_cache *act_log; /* activity log */ unsigned int al_tr_number; int al_tr_cycle; - int al_tr_pos; /* position of the next transaction in the journal */ wait_queue_head_t seq_wait; atomic_t packet_seq; unsigned int peer_seq; @@ -1151,21 +1145,41 @@ extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); extern void drbd_ldev_destroy(struct drbd_conf *mdev); /* Meta data layout - We reserve a 128MB Block (4k aligned) - * either at the end of the backing device - * or on a separate meta data device. */ + * + * We currently have two possible layouts. + * Offsets in (512 byte) sectors. + * external: + * |----------- md_size_sect ------------------| + * [ 4k superblock ][ activity log ][ Bitmap ] + * | al_offset == 8 | + * | bm_offset = al_offset + X | + * ==> bitmap sectors = md_size_sect - bm_offset + * + * Variants: + * old, indexed fixed size meta data: + * + * internal: + * |----------- md_size_sect ------------------| + * [data.....][ Bitmap ][ activity log ][ 4k superblock ][padding*] + * | al_offset < 0 | + * | bm_offset = al_offset - Y | + * ==> bitmap sectors = Y = al_offset - bm_offset + * + * [padding*] are zero or up to 7 unused 512 Byte sectors to the + * end of the device, so that the [4k superblock] will be 4k aligned. + * + * The activity log consists of 4k transaction blocks, + * which are written in a ring-buffer, or striped ring-buffer like fashion, + * which are writtensize used to be fixed 32kB, + * but is about to become configurable. + */ -/* The following numbers are sectors */ -/* Allows up to about 3.8TB, so if you want more, +/* Our old fixed size meta data layout + * allows up to about 3.8TB, so if you want more, * you need to use the "flexible" meta data format. */ -#define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */ -#define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */ -#define MD_AL_SECTORS 64 /* = 32 kB on disk activity log ring buffer */ -#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_SECTORS) - -/* we do all meta data IO in 4k blocks */ -#define MD_BLOCK_SHIFT 12 -#define MD_BLOCK_SIZE (1<md.md_offset + MD_AL_OFFSET - 1; + return bdev->md.md_offset + MD_4kB_SECT -1; case DRBD_MD_INDEX_FLEX_EXT: default: - return bdev->md.md_offset + bdev->md.md_size_sect; + return bdev->md.md_offset + bdev->md.md_size_sect -1; } } @@ -1861,13 +1876,11 @@ static inline sector_t drbd_md_ss__(struct drbd_conf *mdev, rcu_read_unlock(); switch (meta_dev_idx) { - default: /* external, some index */ - return MD_RESERVED_SECT * meta_dev_idx; + default: /* external, some index; this is the old fixed size layout */ + return MD_128MB_SECT * meta_dev_idx; case DRBD_MD_INDEX_INTERNAL: /* with drbd08, internal meta data is always "flexible" */ case DRBD_MD_INDEX_FLEX_INT: - /* sizeof(struct md_on_disk_07) == 4k - * position: last 4k aligned block of 4k size */ if (!bdev->backing_bdev) { if (__ratelimit(&drbd_ratelimit_state)) { dev_err(DEV, "bdev->backing_bdev==NULL\n"); @@ -1875,8 +1888,9 @@ static inline sector_t drbd_md_ss__(struct drbd_conf *mdev, } return 0; } - return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) - - MD_AL_OFFSET; + /* sizeof(struct md_on_disk_07) == 4k + * position: last 4k aligned block of 4k size */ + return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) - 8; case DRBD_MD_INDEX_FLEX_EXT: return 0; } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 731a28eedc56..76faeab40c8f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2834,6 +2834,7 @@ void conn_md_sync(struct drbd_tconn *tconn) rcu_read_unlock(); } +/* aligned 4kByte */ struct meta_data_on_disk { u64 la_size; /* last agreed size. */ u64 uuid[UI_SIZE]; /* UUIDs. */ @@ -2843,13 +2844,13 @@ struct meta_data_on_disk { u32 magic; u32 md_size_sect; u32 al_offset; /* offset to this block */ - u32 al_nr_extents; /* important for restoring the AL */ + u32 al_nr_extents; /* important for restoring the AL (userspace) */ /* `-- act_log->nr_elements <-- ldev->dc.al_extents */ u32 bm_offset; /* offset to the bitmap, from here */ u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ u32 la_peer_max_bio_size; /* last peer max_bio_size */ - u32 reserved_u32[3]; + u8 reserved_u8[4096 - (7*8 + 8*4)]; } __packed; /** @@ -2862,6 +2863,10 @@ void drbd_md_sync(struct drbd_conf *mdev) sector_t sector; int i; + /* Don't accidentally change the DRBD meta data layout. */ + BUILD_BUG_ON(UI_SIZE != 4); + BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096); + del_timer(&mdev->md_sync_timer); /* timer may be rearmed by drbd_md_mark_dirty() now. */ if (!test_and_clear_bit(MD_DIRTY, &mdev->flags)) @@ -2876,7 +2881,7 @@ void drbd_md_sync(struct drbd_conf *mdev) if (!buffer) goto out; - memset(buffer, 0, 512); + memset(buffer, 0, sizeof(*buffer)); buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); for (i = UI_CURRENT; i < UI_SIZE; i++) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 2af26fc95280..581f6800cc30 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -696,12 +696,32 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) return 0; } -/* initializes the md.*_offset members, so we are able to find - * the on disk meta data */ +/* Initializes the md.*_offset members, so we are able to find + * the on disk meta data. + * + * We currently have two possible layouts: + * external: + * |----------- md_size_sect ------------------| + * [ 4k superblock ][ activity log ][ Bitmap ] + * | al_offset == 8 | + * | bm_offset = al_offset + X | + * ==> bitmap sectors = md_size_sect - bm_offset + * + * internal: + * |----------- md_size_sect ------------------| + * [data.....][ Bitmap ][ activity log ][ 4k superblock ] + * | al_offset < 0 | + * | bm_offset = al_offset - Y | + * ==> bitmap sectors = Y = al_offset - bm_offset + * + * Activity log size used to be fixed 32kB, + * but is about to become configurable. + */ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { sector_t md_size_sect = 0; + unsigned int al_size_sect = MD_32kB_SECT; int meta_dev_idx; rcu_read_lock(); @@ -710,23 +730,23 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, switch (meta_dev_idx) { default: /* v07 style fixed size indexed meta data */ - bdev->md.md_size_sect = MD_RESERVED_SECT; + bdev->md.md_size_sect = MD_128MB_SECT; bdev->md.md_offset = drbd_md_ss__(mdev, bdev); - bdev->md.al_offset = MD_AL_OFFSET; - bdev->md.bm_offset = MD_BM_OFFSET; + bdev->md.al_offset = MD_4kB_SECT; + bdev->md.bm_offset = MD_4kB_SECT + al_size_sect; break; case DRBD_MD_INDEX_FLEX_EXT: /* just occupy the full device; unit: sectors */ bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev); bdev->md.md_offset = 0; - bdev->md.al_offset = MD_AL_OFFSET; - bdev->md.bm_offset = MD_BM_OFFSET; + bdev->md.al_offset = MD_4kB_SECT; + bdev->md.bm_offset = MD_4kB_SECT + al_size_sect; break; case DRBD_MD_INDEX_INTERNAL: case DRBD_MD_INDEX_FLEX_INT: bdev->md.md_offset = drbd_md_ss__(mdev, bdev); /* al size is still fixed */ - bdev->md.al_offset = -MD_AL_SECTORS; + bdev->md.al_offset = -al_size_sect; /* we need (slightly less than) ~ this much bitmap sectors: */ md_size_sect = drbd_get_capacity(bdev->backing_bdev); md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT); @@ -735,11 +755,11 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, /* plus the "drbd meta data super block", * and the activity log; */ - md_size_sect += MD_BM_OFFSET; + md_size_sect += MD_4kB_SECT + al_size_sect; bdev->md.md_size_sect = md_size_sect; /* bitmap offset is adjusted by 'super' block size */ - bdev->md.bm_offset = -md_size_sect + MD_AL_OFFSET; + bdev->md.bm_offset = -md_size_sect + MD_4kB_SECT; break; } rcu_read_unlock(); @@ -1416,7 +1436,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) min_md_device_sectors = (2<<10); } else { max_possible_sectors = DRBD_MAX_SECTORS; - min_md_device_sectors = MD_RESERVED_SECT * (new_disk_conf->meta_dev_idx + 1); + min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1); } if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { -- GitLab From 3a4d4eb3cb03fbc66696fc8cd472701d56f3aee7 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:44 +0100 Subject: [PATCH 0135/3163] drbd: prepare for new striped layout of activity log Introduce two new on-disk meta data fields: al_stripes and al_stripe_size_4k The intended use case is activity log on RAID 0 or similar. Logically consecutive transactions will advance their on-disk position by al_stripe_size_4k 4kB (transaction sized) blocks. Right now, these are still asserted to be the backward compatible values al_stripes = 1, al_stripe_size_4k = 8 (which amounts to 32kB). Also introduce a caching member for meta_dev_idx in the in-core structure: even though it is initially passed in in the rcu-protected disk_conf structure, it cannot change without a detach/attach cycle. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 6 +-- drivers/block/drbd/drbd_int.h | 46 +++++++++---------- drivers/block/drbd/drbd_main.c | 77 ++++++++++++++++++++++++++++---- drivers/block/drbd/drbd_nl.c | 5 +-- 4 files changed, 94 insertions(+), 40 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index b230d91ec430..7e7680e8da6c 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -353,11 +353,11 @@ static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) static sector_t al_tr_number_to_on_disk_sector(struct drbd_conf *mdev) { - const unsigned int stripes = 1; - const unsigned int stripe_size_4kB = MD_32kB_SECT/MD_4kB_SECT; + const unsigned int stripes = mdev->ldev->md.al_stripes; + const unsigned int stripe_size_4kB = mdev->ldev->md.al_stripe_size_4k; /* transaction number, modulo on-disk ring buffer wrap around */ - unsigned int t = mdev->al_tr_number % (stripe_size_4kB * stripes); + unsigned int t = mdev->al_tr_number % (mdev->ldev->md.al_size_4k); /* ... to aligned 4k on disk block */ t = ((t % stripes) * stripe_size_4kB) + t/stripes; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 60c89e5b298c..ee19ba28b59a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -755,6 +755,14 @@ struct drbd_md { s32 al_offset; /* signed relative sector offset to activity log */ s32 bm_offset; /* signed relative sector offset to bitmap */ + + /* cached value of bdev->disk_conf->meta_dev_idx (see below) */ + s32 meta_dev_idx; + + /* see al_tr_number_to_on_disk_sector() */ + u32 al_stripes; + u32 al_stripe_size_4k; + u32 al_size_4k; /* cached product of the above */ }; struct drbd_backing_dev { @@ -1862,38 +1870,24 @@ static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev) } /** - * drbd_md_ss__() - Return the sector number of our meta data super block - * @mdev: DRBD device. + * drbd_md_ss() - Return the sector number of our meta data super block * @bdev: Meta data block device. */ -static inline sector_t drbd_md_ss__(struct drbd_conf *mdev, - struct drbd_backing_dev *bdev) +static inline sector_t drbd_md_ss(struct drbd_backing_dev *bdev) { - int meta_dev_idx; + const int meta_dev_idx = bdev->md.meta_dev_idx; - rcu_read_lock(); - meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; - rcu_read_unlock(); + if (meta_dev_idx == DRBD_MD_INDEX_FLEX_EXT) + return 0; - switch (meta_dev_idx) { - default: /* external, some index; this is the old fixed size layout */ - return MD_128MB_SECT * meta_dev_idx; - case DRBD_MD_INDEX_INTERNAL: - /* with drbd08, internal meta data is always "flexible" */ - case DRBD_MD_INDEX_FLEX_INT: - if (!bdev->backing_bdev) { - if (__ratelimit(&drbd_ratelimit_state)) { - dev_err(DEV, "bdev->backing_bdev==NULL\n"); - dump_stack(); - } - return 0; - } - /* sizeof(struct md_on_disk_07) == 4k - * position: last 4k aligned block of 4k size */ + /* Since drbd08, internal meta data is always "flexible". + * position: last 4k aligned block of 4k size */ + if (meta_dev_idx == DRBD_MD_INDEX_INTERNAL || + meta_dev_idx == DRBD_MD_INDEX_FLEX_INT) return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) - 8; - case DRBD_MD_INDEX_FLEX_EXT: - return 0; - } + + /* external, some index; this is the old fixed size layout */ + return MD_128MB_SECT * bdev->md.meta_dev_idx; } static inline void diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 76faeab40c8f..7a2e07b45ecf 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2850,7 +2850,11 @@ struct meta_data_on_disk { u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ u32 la_peer_max_bio_size; /* last peer max_bio_size */ - u8 reserved_u8[4096 - (7*8 + 8*4)]; + /* see al_tr_number_to_on_disk_sector() */ + u32 al_stripes; + u32 al_stripe_size_4k; + + u8 reserved_u8[4096 - (7*8 + 10*4)]; } __packed; /** @@ -2898,7 +2902,10 @@ void drbd_md_sync(struct drbd_conf *mdev) buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset); buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size); - D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset); + buffer->al_stripes = cpu_to_be32(mdev->ldev->md.al_stripes); + buffer->al_stripe_size_4k = cpu_to_be32(mdev->ldev->md.al_stripe_size_4k); + + D_ASSERT(drbd_md_ss(mdev->ldev) == mdev->ldev->md.md_offset); sector = mdev->ldev->md.md_offset; if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { @@ -2916,13 +2923,60 @@ void drbd_md_sync(struct drbd_conf *mdev) put_ldev(mdev); } +static int check_activity_log_stripe_size(struct drbd_conf *mdev, + struct meta_data_on_disk *on_disk, + struct drbd_md *in_core) +{ + u32 al_stripes = be32_to_cpu(on_disk->al_stripes); + u32 al_stripe_size_4k = be32_to_cpu(on_disk->al_stripe_size_4k); + u64 al_size_4k; + + /* both not set: default to old fixed size activity log */ + if (al_stripes == 0 && al_stripe_size_4k == 0) { + al_stripes = 1; + al_stripe_size_4k = MD_32kB_SECT/8; + } + + /* some paranoia plausibility checks */ + + /* we need both values to be set */ + if (al_stripes == 0 || al_stripe_size_4k == 0) + goto err; + + al_size_4k = (u64)al_stripes * al_stripe_size_4k; + + /* Upper limit of activity log area, to avoid potential overflow + * problems in al_tr_number_to_on_disk_sector(). As right now, more + * than 72 * 4k blocks total only increases the amount of history, + * limiting this arbitrarily to 16 GB is not a real limitation ;-) */ + if (al_size_4k > (16 * 1024 * 1024/4)) + goto err; + + /* Lower limit: we need at least 8 transaction slots (32kB) + * to not break existing setups */ + if (al_size_4k < MD_32kB_SECT/8) + goto err; + + in_core->al_stripe_size_4k = al_stripe_size_4k; + in_core->al_stripes = al_stripes; + in_core->al_size_4k = al_size_4k; + + return 0; +err: + dev_err(DEV, "invalid activity log striping: al_stripes=%u, al_stripe_size_4k=%u\n", + al_stripes, al_stripe_size_4k); + return -EINVAL; +} + /** * drbd_md_read() - Reads in the meta data super block * @mdev: DRBD device. * @bdev: Device from which the meta data should be read in. * - * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case + * Return NO_ERROR on success, and an enum drbd_ret_code in case * something goes wrong. + * + * Called exactly once during drbd_adm_attach() */ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { @@ -2937,6 +2991,10 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!buffer) goto out; + /* First, figure out where our meta data superblock is located. */ + bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx; + bdev->md.md_offset = drbd_md_ss(bdev); + if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { /* NOTE: can't do normal error processing here as this is called BEFORE disk is attached */ @@ -2954,40 +3012,43 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) rv = ERR_MD_UNCLEAN; goto err; } + + rv = ERR_MD_INVALID; if (magic != DRBD_MD_MAGIC_08) { if (magic == DRBD_MD_MAGIC_07) dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n"); else dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n"); - rv = ERR_MD_INVALID; goto err; } + + if (check_activity_log_stripe_size(mdev, buffer, &bdev->md)) + goto err; + if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) { dev_err(DEV, "unexpected al_offset: %d (expected %d)\n", be32_to_cpu(buffer->al_offset), bdev->md.al_offset); - rv = ERR_MD_INVALID; goto err; } if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) { dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n", be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset); - rv = ERR_MD_INVALID; goto err; } if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) { dev_err(DEV, "unexpected md_size: %u (expected %u)\n", be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect); - rv = ERR_MD_INVALID; goto err; } if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) { dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n", be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE); - rv = ERR_MD_INVALID; goto err; } + rv = NO_ERROR; + bdev->md.la_size_sect = be64_to_cpu(buffer->la_size); for (i = UI_CURRENT; i < UI_SIZE; i++) bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 581f6800cc30..104b7cea691e 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -727,24 +727,23 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, rcu_read_lock(); meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; + bdev->md.md_offset = drbd_md_ss(bdev); + switch (meta_dev_idx) { default: /* v07 style fixed size indexed meta data */ bdev->md.md_size_sect = MD_128MB_SECT; - bdev->md.md_offset = drbd_md_ss__(mdev, bdev); bdev->md.al_offset = MD_4kB_SECT; bdev->md.bm_offset = MD_4kB_SECT + al_size_sect; break; case DRBD_MD_INDEX_FLEX_EXT: /* just occupy the full device; unit: sectors */ bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev); - bdev->md.md_offset = 0; bdev->md.al_offset = MD_4kB_SECT; bdev->md.bm_offset = MD_4kB_SECT + al_size_sect; break; case DRBD_MD_INDEX_INTERNAL: case DRBD_MD_INDEX_FLEX_INT: - bdev->md.md_offset = drbd_md_ss__(mdev, bdev); /* al size is still fixed */ bdev->md.al_offset = -al_size_sect; /* we need (slightly less than) ~ this much bitmap sectors: */ -- GitLab From 68e41a43f18b681f83329c8ad83123571bb8db0d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:45 +0100 Subject: [PATCH 0136/3163] drbd: use the cached meta_dev_idx Now we have the cached meta_dev_idx member, we can get rid of a few rcu_read_lock() sections and rcu_dereference(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 32 +++++--------------------------- drivers/block/drbd/drbd_nl.c | 7 +------ 2 files changed, 6 insertions(+), 33 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ee19ba28b59a..6eecdec9da2b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1777,9 +1777,9 @@ static inline void drbd_chk_io_error_(struct drbd_conf *mdev, * BTW, for internal meta data, this happens to be the maximum capacity * we could agree upon with our peer node. */ -static inline sector_t _drbd_md_first_sector(int meta_dev_idx, struct drbd_backing_dev *bdev) +static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev) { - switch (meta_dev_idx) { + switch (bdev->md.meta_dev_idx) { case DRBD_MD_INDEX_INTERNAL: case DRBD_MD_INDEX_FLEX_INT: return bdev->md.md_offset + bdev->md.bm_offset; @@ -1789,30 +1789,13 @@ static inline sector_t _drbd_md_first_sector(int meta_dev_idx, struct drbd_backi } } -static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev) -{ - int meta_dev_idx; - - rcu_read_lock(); - meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; - rcu_read_unlock(); - - return _drbd_md_first_sector(meta_dev_idx, bdev); -} - /** * drbd_md_last_sector() - Return the last sector number of the meta data area * @bdev: Meta data block device. */ static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev) { - int meta_dev_idx; - - rcu_read_lock(); - meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; - rcu_read_unlock(); - - switch (meta_dev_idx) { + switch (bdev->md.meta_dev_idx) { case DRBD_MD_INDEX_INTERNAL: case DRBD_MD_INDEX_FLEX_INT: return bdev->md.md_offset + MD_4kB_SECT -1; @@ -1840,18 +1823,13 @@ static inline sector_t drbd_get_capacity(struct block_device *bdev) static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev) { sector_t s; - int meta_dev_idx; - - rcu_read_lock(); - meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; - rcu_read_unlock(); - switch (meta_dev_idx) { + switch (bdev->md.meta_dev_idx) { case DRBD_MD_INDEX_INTERNAL: case DRBD_MD_INDEX_FLEX_INT: s = drbd_get_capacity(bdev->backing_bdev) ? min_t(sector_t, DRBD_MAX_SECTORS_FLEX, - _drbd_md_first_sector(meta_dev_idx, bdev)) + drbd_md_first_sector(bdev)) : 0; break; case DRBD_MD_INDEX_FLEX_EXT: diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 104b7cea691e..5621df86967a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -722,14 +722,10 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, { sector_t md_size_sect = 0; unsigned int al_size_sect = MD_32kB_SECT; - int meta_dev_idx; - - rcu_read_lock(); - meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; bdev->md.md_offset = drbd_md_ss(bdev); - switch (meta_dev_idx) { + switch (bdev->md.meta_dev_idx) { default: /* v07 style fixed size indexed meta data */ bdev->md.md_size_sect = MD_128MB_SECT; @@ -761,7 +757,6 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, bdev->md.bm_offset = -md_size_sect + MD_4kB_SECT; break; } - rcu_read_unlock(); } /* input size is expected to be in KB */ -- GitLab From cccac9857d624dab74b23bafe0482fcdd91df7d8 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:46 +0100 Subject: [PATCH 0137/3163] drbd: mechanically rename la_size to la_size_sect Make it obvious that this value is in units of 512 Byte sectors. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 6 +++--- drivers/block/drbd/drbd_nl.c | 16 ++++++++-------- drivers/block/drbd/drbd_receiver.c | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7a2e07b45ecf..6b956fc04dc8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2836,7 +2836,7 @@ void conn_md_sync(struct drbd_tconn *tconn) /* aligned 4kByte */ struct meta_data_on_disk { - u64 la_size; /* last agreed size. */ + u64 la_size_sect; /* last agreed size. */ u64 uuid[UI_SIZE]; /* UUIDs. */ u64 device_uuid; u64 reserved_u64_1; @@ -2887,7 +2887,7 @@ void drbd_md_sync(struct drbd_conf *mdev) memset(buffer, 0, sizeof(*buffer)); - buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); + buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); for (i = UI_CURRENT; i < UI_SIZE; i++) buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]); buffer->flags = cpu_to_be32(mdev->ldev->md.flags); @@ -3049,7 +3049,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) rv = NO_ERROR; - bdev->md.la_size_sect = be64_to_cpu(buffer->la_size); + bdev->md.la_size_sect = be64_to_cpu(buffer->la_size_sect); for (i = UI_CURRENT; i < UI_SIZE; i++) bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); bdev->md.flags = be32_to_cpu(buffer->flags); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 5621df86967a..d5211b06df45 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -819,7 +819,7 @@ void drbd_resume_io(struct drbd_conf *mdev) enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local) { sector_t prev_first_sect, prev_size; /* previous meta location */ - sector_t la_size, u_size; + sector_t la_size_sect, u_size; sector_t size; char ppb[10]; @@ -842,7 +842,7 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds prev_first_sect = drbd_md_first_sector(mdev->ldev); prev_size = mdev->ldev->md.md_size_sect; - la_size = mdev->ldev->md.la_size_sect; + la_size_sect = mdev->ldev->md.la_size_sect; /* TODO: should only be some assert here, not (re)init... */ drbd_md_set_sector_offsets(mdev, mdev->ldev); @@ -878,7 +878,7 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds if (rv == dev_size_error) goto out; - la_size_changed = (la_size != mdev->ldev->md.la_size_sect); + la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect); md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev) || prev_size != mdev->ldev->md.md_size_sect; @@ -900,9 +900,9 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds drbd_md_mark_dirty(mdev); } - if (size > la_size) + if (size > la_size_sect) rv = grew; - if (size < la_size) + if (size < la_size_sect) rv = shrunk; out: lc_unlock(mdev->act_log); @@ -917,7 +917,7 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t u_size, int assume_peer_has_space) { sector_t p_size = mdev->p_size; /* partner's disk size. */ - sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */ + sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */ sector_t m_size; /* my size */ sector_t size = 0; @@ -931,8 +931,8 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, if (p_size && m_size) { size = min_t(sector_t, p_size, m_size); } else { - if (la_size) { - size = la_size; + if (la_size_sect) { + size = la_size_sect; if (m_size && m_size < size) size = m_size; if (p_size && p_size < size) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index a9eccfc6079b..8172a2cfdead 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3992,7 +3992,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) clear_bit(DISCARD_MY_DATA, &mdev->flags); - drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ + drbd_md_sync(mdev); /* update connected indicator, la_size_sect, ... */ return 0; } -- GitLab From c04ccaa669e147ffb66e4e74d82c7dbfc100ec5e Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:47 +0100 Subject: [PATCH 0138/3163] drbd: read meta data early, base on-disk offsets on super block We used to calculate all on-disk meta data offsets, and then compare the stored offsets, basically treating them as magic numbers. Now with the activity log striping, the activity log size is no longer fixed. We need to first read the super block, then base the activity log and bitmap offsets on the stored offsets/al stripe settings. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 11 ++- drivers/block/drbd/drbd_main.c | 131 +++++++++++++++++++++++++------ drivers/block/drbd/drbd_nl.c | 15 ++-- drivers/block/drbd/drbd_worker.c | 3 +- 4 files changed, 123 insertions(+), 37 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 7e7680e8da6c..c79625aa8cf2 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -168,7 +168,11 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, bio->bi_end_io = drbd_md_io_complete; bio->bi_rw = rw; - if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */ + if (!(rw & WRITE) && mdev->state.disk == D_DISKLESS && mdev->ldev == NULL) + /* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */ + ; + else if (!get_ldev_if_state(mdev, D_ATTACHING)) { + /* Corresponding put_ldev in drbd_md_io_complete() */ dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); err = -ENODEV; goto out; @@ -199,9 +203,10 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, BUG_ON(!bdev->md_bdev); - dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n", + dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n", current->comm, current->pid, __func__, - (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); + (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", + (void*)_RET_IP_ ); if (sector < drbd_md_first_sector(bdev) || sector + 7 > drbd_md_last_sector(bdev)) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 6b956fc04dc8..e55271d6e7f6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2968,6 +2968,86 @@ static int check_activity_log_stripe_size(struct drbd_conf *mdev, return -EINVAL; } +static int check_offsets_and_sizes(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) +{ + sector_t capacity = drbd_get_capacity(bdev->md_bdev); + struct drbd_md *in_core = &bdev->md; + s32 on_disk_al_sect; + s32 on_disk_bm_sect; + + /* The on-disk size of the activity log, calculated from offsets, and + * the size of the activity log calculated from the stripe settings, + * should match. + * Though we could relax this a bit: it is ok, if the striped activity log + * fits in the available on-disk activity log size. + * Right now, that would break how resize is implemented. + * TODO: make drbd_determine_dev_size() (and the drbdmeta tool) aware + * of possible unused padding space in the on disk layout. */ + if (in_core->al_offset < 0) { + if (in_core->bm_offset > in_core->al_offset) + goto err; + on_disk_al_sect = -in_core->al_offset; + on_disk_bm_sect = in_core->al_offset - in_core->bm_offset; + } else { + if (in_core->al_offset != MD_4kB_SECT) + goto err; + if (in_core->bm_offset < in_core->al_offset + in_core->al_size_4k * MD_4kB_SECT) + goto err; + + on_disk_al_sect = in_core->bm_offset - MD_4kB_SECT; + on_disk_bm_sect = in_core->md_size_sect - in_core->bm_offset; + } + + /* old fixed size meta data is exactly that: fixed. */ + if (in_core->meta_dev_idx >= 0) { + if (in_core->md_size_sect != MD_128MB_SECT + || in_core->al_offset != MD_4kB_SECT + || in_core->bm_offset != MD_4kB_SECT + MD_32kB_SECT + || in_core->al_stripes != 1 + || in_core->al_stripe_size_4k != MD_32kB_SECT/8) + goto err; + } + + if (capacity < in_core->md_size_sect) + goto err; + if (capacity - in_core->md_size_sect < drbd_md_first_sector(bdev)) + goto err; + + /* should be aligned, and at least 32k */ + if ((on_disk_al_sect & 7) || (on_disk_al_sect < MD_32kB_SECT)) + goto err; + + /* should fit (for now: exactly) into the available on-disk space; + * overflow prevention is in check_activity_log_stripe_size() above. */ + if (on_disk_al_sect != in_core->al_size_4k * MD_4kB_SECT) + goto err; + + /* again, should be aligned */ + if (in_core->bm_offset & 7) + goto err; + + /* FIXME check for device grow with flex external meta data? */ + + /* can the available bitmap space cover the last agreed device size? */ + if (on_disk_bm_sect < (in_core->la_size_sect+7)/MD_4kB_SECT/8/512) + goto err; + + return 0; + +err: + dev_err(DEV, "meta data offsets don't make sense: idx=%d " + "al_s=%u, al_sz4k=%u, al_offset=%d, bm_offset=%d, " + "md_size_sect=%u, la_size=%llu, md_capacity=%llu\n", + in_core->meta_dev_idx, + in_core->al_stripes, in_core->al_stripe_size_4k, + in_core->al_offset, in_core->bm_offset, in_core->md_size_sect, + (unsigned long long)in_core->la_size_sect, + (unsigned long long)capacity); + + return -EINVAL; +} + + /** * drbd_md_read() - Reads in the meta data super block * @mdev: DRBD device. @@ -2976,7 +3056,8 @@ static int check_activity_log_stripe_size(struct drbd_conf *mdev, * Return NO_ERROR on success, and an enum drbd_ret_code in case * something goes wrong. * - * Called exactly once during drbd_adm_attach() + * Called exactly once during drbd_adm_attach(), while still being D_DISKLESS, + * even before @bdev is assigned to @mdev->ldev. */ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { @@ -2984,14 +3065,15 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) u32 magic, flags; int i, rv = NO_ERROR; - if (!get_ldev_if_state(mdev, D_ATTACHING)) - return ERR_IO_MD_DISK; + if (mdev->state.disk != D_DISKLESS) + return ERR_DISK_CONFIGURED; buffer = drbd_md_get_buffer(mdev); if (!buffer) - goto out; + return ERR_NOMEM; - /* First, figure out where our meta data superblock is located. */ + /* First, figure out where our meta data superblock is located, + * and read it. */ bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx; bdev->md.md_offset = drbd_md_ss(bdev); @@ -3022,14 +3104,29 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) goto err; } - if (check_activity_log_stripe_size(mdev, buffer, &bdev->md)) + if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) { + dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n", + be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE); goto err; + } - if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) { - dev_err(DEV, "unexpected al_offset: %d (expected %d)\n", - be32_to_cpu(buffer->al_offset), bdev->md.al_offset); + + /* convert to in_core endian */ + bdev->md.la_size_sect = be64_to_cpu(buffer->la_size_sect); + for (i = UI_CURRENT; i < UI_SIZE; i++) + bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); + bdev->md.flags = be32_to_cpu(buffer->flags); + bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); + + bdev->md.md_size_sect = be32_to_cpu(buffer->md_size_sect); + bdev->md.al_offset = be32_to_cpu(buffer->al_offset); + bdev->md.bm_offset = be32_to_cpu(buffer->bm_offset); + + if (check_activity_log_stripe_size(mdev, buffer, &bdev->md)) goto err; - } + if (check_offsets_and_sizes(mdev, bdev)) + goto err; + if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) { dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n", be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset); @@ -3041,20 +3138,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) goto err; } - if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) { - dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n", - be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE); - goto err; - } - rv = NO_ERROR; - bdev->md.la_size_sect = be64_to_cpu(buffer->la_size_sect); - for (i = UI_CURRENT; i < UI_SIZE; i++) - bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); - bdev->md.flags = be32_to_cpu(buffer->flags); - bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); - spin_lock_irq(&mdev->tconn->req_lock); if (mdev->state.conn < C_CONNECTED) { unsigned int peer; @@ -3066,8 +3151,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) err: drbd_md_put_buffer(mdev); - out: - put_ldev(mdev); return rv; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d5211b06df45..974ea47a656a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -721,7 +721,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { sector_t md_size_sect = 0; - unsigned int al_size_sect = MD_32kB_SECT; + unsigned int al_size_sect = bdev->md.al_size_4k * 8; bdev->md.md_offset = drbd_md_ss(bdev); @@ -1413,8 +1413,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */ - drbd_md_set_sector_offsets(mdev, nbc); + /* Read our meta data super block early. + * This also sets other on-disk offsets. */ + retcode = drbd_md_read(mdev, nbc); + if (retcode != NO_ERROR) + goto fail; if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) { dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", @@ -1481,8 +1484,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (!get_ldev_if_state(mdev, D_ATTACHING)) goto force_diskless; - drbd_md_set_sector_offsets(mdev, nbc); - if (!mdev->bitmap) { if (drbd_bm_init(mdev)) { retcode = ERR_NOMEM; @@ -1490,10 +1491,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } } - retcode = drbd_md_read(mdev, nbc); - if (retcode != NO_ERROR) - goto force_diskless_dec; - if (mdev->state.conn < C_CONNECTED && mdev->state.role == R_PRIMARY && (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 424dc7bdf9b7..34b5d5d23ac4 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -89,7 +89,8 @@ void drbd_md_io_complete(struct bio *bio, int error) md_io->done = 1; wake_up(&mdev->misc_wait); bio_put(bio); - put_ldev(mdev); + if (mdev->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */ + put_ldev(mdev); } /* reads on behalf of the partner, -- GitLab From 56392d2f40aac4b520fc50bc356f40e07f7e1c7d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:48 +0100 Subject: [PATCH 0139/3163] drbd: Clarify when activity log I/O is delegated to the worker thread Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 49 ++++++++++++++++-------------- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_req.c | 2 +- drivers/block/drbd/drbd_worker.c | 2 +- 5 files changed, 31 insertions(+), 26 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index c79625aa8cf2..82199d9a9a61 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -104,7 +104,7 @@ struct update_al_work { int err; }; -static int al_write_transaction(struct drbd_conf *mdev); +static int al_write_transaction(struct drbd_conf *mdev, bool delegate); void *drbd_md_get_buffer(struct drbd_conf *mdev) { @@ -246,7 +246,10 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) return al_ext; } -void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) +/* + * @delegate: delegate activity log I/O to the worker thread + */ +void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool delegate) { /* for bios crossing activity log extent boundaries, * we may need to activate two extents in one go */ @@ -255,6 +258,17 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) unsigned enr; bool locked = false; + /* When called through generic_make_request(), we must delegate + * activity log I/O to the worker thread: a further request + * submitted via generic_make_request() within the same task + * would be queued on current->bio_list, and would only start + * after this function returns (see generic_make_request()). + * + * However, if we *are* the worker, we must not delegate to ourselves. + */ + + if (delegate) + BUG_ON(current == mdev->tconn->worker.task); D_ASSERT(first <= last); D_ASSERT(atomic_read(&mdev->local_cnt) > 0); @@ -270,13 +284,6 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) (locked = lc_try_lock_for_transaction(mdev->act_log))); if (locked) { - /* drbd_al_write_transaction(mdev,al_ext,enr); - * recurses into generic_make_request(), which - * disallows recursion, bios being serialized on the - * current->bio_tail list now. - * we have to delegate updates to the activity log - * to the worker thread. */ - /* Double check: it may have been committed by someone else, * while we have been waiting for the lock. */ if (mdev->act_log->pending_changes) { @@ -287,7 +294,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) rcu_read_unlock(); if (write_al_updates) { - al_write_transaction(mdev); + al_write_transaction(mdev, delegate); mdev->al_writ_cnt++; } @@ -495,20 +502,18 @@ static int w_al_write_transaction(struct drbd_work *w, int unused) /* Calls from worker context (see w_restart_disk_io()) need to write the transaction directly. Others came through generic_make_request(), those need to delegate it to the worker. */ -static int al_write_transaction(struct drbd_conf *mdev) +static int al_write_transaction(struct drbd_conf *mdev, bool delegate) { - struct update_al_work al_work; - - if (current == mdev->tconn->worker.task) + if (delegate) { + struct update_al_work al_work; + init_completion(&al_work.event); + al_work.w.cb = w_al_write_transaction; + al_work.w.mdev = mdev; + drbd_queue_work_front(&mdev->tconn->sender_work, &al_work.w); + wait_for_completion(&al_work.event); + return al_work.err; + } else return _al_write_transaction(mdev); - - init_completion(&al_work.event); - al_work.w.cb = w_al_write_transaction; - al_work.w.mdev = mdev; - drbd_queue_work_front(&mdev->tconn->sender_work, &al_work.w); - wait_for_completion(&al_work.event); - - return al_work.err; } static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6eecdec9da2b..453fccfc440c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1598,7 +1598,7 @@ extern const char *drbd_conn_str(enum drbd_conns s); extern const char *drbd_role_str(enum drbd_role s); /* drbd_actlog.c */ -extern void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i); +extern void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool delegate); extern void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i); extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector); extern int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8172a2cfdead..1921871ca9a8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2265,7 +2265,7 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size); peer_req->flags |= EE_CALL_AL_COMPLETE_IO; peer_req->flags &= ~EE_MAY_SET_IN_SYNC; - drbd_al_begin_io(mdev, &peer_req->i); + drbd_al_begin_io(mdev, &peer_req->i, true); } err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 2b8303ad63c9..7d1ff1aaeb71 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1054,7 +1054,7 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long if (rw == WRITE && req->private_bio && req->i.size && !test_bit(AL_SUSPENDED, &mdev->flags)) { req->rq_state |= RQ_IN_ACT_LOG; - drbd_al_begin_io(mdev, &req->i); + drbd_al_begin_io(mdev, &req->i, true); } spin_lock_irq(&mdev->tconn->req_lock); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 34b5d5d23ac4..f41e224caa7c 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1411,7 +1411,7 @@ int w_restart_disk_io(struct drbd_work *w, int cancel) struct drbd_conf *mdev = w->mdev; if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) - drbd_al_begin_io(mdev, &req->i); + drbd_al_begin_io(mdev, &req->i, false); drbd_req_make_private_bio(req, req->master_bio); req->private_bio->bi_bdev = mdev->ldev->backing_bdev; -- GitLab From ebfd5d8f715167b886c9401e6b123847187f137b Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:49 +0100 Subject: [PATCH 0140/3163] drbd: drbd_al_being_io: short circuit to reduce latency A request hitting an already "hot" extent should proceed right away, even if some other requests need to wait for pending transactions. Without that short-circuit, several simultaneous make_request contexts race for committing the transaction, possibly penalizing the innocent. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 82199d9a9a61..1d7244d2a910 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -256,6 +256,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool dele unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); unsigned enr; + bool need_transaction = false; bool locked = false; /* When called through generic_make_request(), we must delegate @@ -273,8 +274,17 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool dele D_ASSERT(first <= last); D_ASSERT(atomic_read(&mdev->local_cnt) > 0); - for (enr = first; enr <= last; enr++) - wait_event(mdev->al_wait, _al_get(mdev, enr) != NULL); + for (enr = first; enr <= last; enr++) { + struct lc_element *al_ext; + wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr)) != NULL); + if (al_ext->lc_number != enr) + need_transaction = true; + } + + /* If *this* request was to an already active extent, + * we're done, even if there are pending changes. */ + if (!need_transaction) + return; /* Serialize multiple transactions. * This uses test_and_set_bit, memory barrier is implicit. -- GitLab From 6d9febe237146156947f0da8407c620b5c33c1df Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:50 +0100 Subject: [PATCH 0141/3163] drbd: split __drbd_make_request in before and after drbd_al_begin_io This is in preparation to be able to defer requests that need to wait for an activity log transaction to a submitter workqueue. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 40 ++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 7d1ff1aaeb71..96d5968fc1e4 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -34,14 +34,14 @@ static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size); /* Update disk stats at start of I/O request */ -static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio) +static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req) { - const int rw = bio_data_dir(bio); + const int rw = bio_data_dir(req->master_bio); int cpu; cpu = part_stat_lock(); part_round_stats(cpu, &mdev->vdisk->part0); part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); - part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); + part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], req->i.size >> 9); (void) cpu; /* The macro invocations above want the cpu argument, I do not like the compiler warning about cpu only assigned but never used... */ part_inc_in_flight(&mdev->vdisk->part0, rw); @@ -1020,12 +1020,16 @@ drbd_submit_req_private_bio(struct drbd_request *req) bio_endio(bio, -EIO); } -void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) +/* returns the new drbd_request pointer, if the caller is expected to + * drbd_send_and_submit() it (to save latency), or NULL if we queued the + * request on the submitter thread. + * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request. + */ +struct drbd_request * +drbd_request_prepare(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) { - const int rw = bio_rw(bio); - struct bio_and_error m = { NULL, }; + const int rw = bio_data_dir(bio); struct drbd_request *req; - bool no_remote = false; /* allocate outside of all locks; */ req = drbd_req_new(mdev, bio); @@ -1035,7 +1039,7 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long * if user cannot handle io errors, that's not our business. */ dev_err(DEV, "could not kmalloc() req\n"); bio_endio(bio, -ENOMEM); - return; + return ERR_PTR(-ENOMEM); } req->start_time = start_time; @@ -1057,6 +1061,15 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long drbd_al_begin_io(mdev, &req->i, true); } + return req; +} + +static void drbd_send_and_submit(struct drbd_conf *mdev, struct drbd_request *req) +{ + const int rw = bio_rw(req->master_bio); + struct bio_and_error m = { NULL, }; + bool no_remote = false; + spin_lock_irq(&mdev->tconn->req_lock); if (rw == WRITE) { /* This may temporarily give up the req_lock, @@ -1079,7 +1092,7 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long } /* Update disk stats */ - _drbd_start_io_acct(mdev, req, bio); + _drbd_start_io_acct(mdev, req); /* We fail READ/READA early, if we can not serve it. * We must do this before req is registered on any lists. @@ -1137,7 +1150,14 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long if (m.bio) complete_master_bio(mdev, &m); - return; +} + +void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) +{ + struct drbd_request *req = drbd_request_prepare(mdev, bio, start_time); + if (IS_ERR_OR_NULL(req)) + return; + drbd_send_and_submit(mdev, req); } void drbd_make_request(struct request_queue *q, struct bio *bio) -- GitLab From 113fef9e20e0d614b3f5940b67c96e719c559eea Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 22 Mar 2013 18:14:40 -0600 Subject: [PATCH 0142/3163] drbd: prepare to queue write requests on a submit worker Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 13 +++++++++++++ drivers/block/drbd/drbd_main.c | 26 +++++++++++++++++++++++++- drivers/block/drbd/drbd_nl.c | 1 + drivers/block/drbd/drbd_req.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 453fccfc440c..a6b71b6076b5 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -894,6 +894,14 @@ struct drbd_tconn { /* is a resource from the config file */ } send; }; +struct submit_worker { + struct workqueue_struct *wq; + struct work_struct worker; + + spinlock_t lock; + struct list_head writes; +}; + struct drbd_conf { struct drbd_tconn *tconn; int vnr; /* volume number within the connection */ @@ -1034,6 +1042,10 @@ struct drbd_conf { atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */ unsigned int peer_max_bio_size; unsigned int local_max_bio_size; + + /* any requests that would block in drbd_make_request() + * are deferred to this single-threaded work queue */ + struct submit_worker submit; }; static inline struct drbd_conf *minor_to_mdev(unsigned int minor) @@ -1440,6 +1452,7 @@ extern void conn_free_crypto(struct drbd_tconn *tconn); extern int proc_details; /* drbd_req */ +extern void do_submit(struct work_struct *ws); extern void __drbd_make_request(struct drbd_conf *, struct bio *, unsigned long); extern void drbd_make_request(struct request_queue *q, struct bio *bio); extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e55271d6e7f6..a150b59897a0 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -45,7 +45,7 @@ #include #include #include - +#include #define __KERNEL_SYSCALLS__ #include #include @@ -2300,6 +2300,7 @@ static void drbd_cleanup(void) idr_for_each_entry(&minors, mdev, i) { idr_remove(&minors, mdev_to_minor(mdev)); idr_remove(&mdev->tconn->volumes, mdev->vnr); + destroy_workqueue(mdev->submit.wq); del_gendisk(mdev->vdisk); /* synchronize_rcu(); No other threads running at this point */ kref_put(&mdev->kref, &drbd_minor_destroy); @@ -2589,6 +2590,21 @@ void conn_destroy(struct kref *kref) kfree(tconn); } +int init_submitter(struct drbd_conf *mdev) +{ + /* opencoded create_singlethread_workqueue(), + * to be able to say "drbd%d", ..., minor */ + mdev->submit.wq = alloc_workqueue("drbd%u_submit", + WQ_UNBOUND | WQ_MEM_RECLAIM, 1, mdev->minor); + if (!mdev->submit.wq) + return -ENOMEM; + + INIT_WORK(&mdev->submit.worker, do_submit); + spin_lock_init(&mdev->submit.lock); + INIT_LIST_HEAD(&mdev->submit.writes); + return 0; +} + enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr) { struct drbd_conf *mdev; @@ -2678,6 +2694,12 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, goto out_idr_remove_minor; } + if (init_submitter(mdev)) { + err = ERR_NOMEM; + drbd_msg_put_info("unable to create submit workqueue"); + goto out_idr_remove_vol; + } + add_disk(disk); kref_init(&mdev->kref); /* one ref for both idrs and the the add_disk */ @@ -2688,6 +2710,8 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, return NO_ERROR; +out_idr_remove_vol: + idr_remove(&tconn->volumes, vnr_got); out_idr_remove_minor: idr_remove(&minors, minor_got); synchronize_rcu(); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 974ea47a656a..bcf900bcd142 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -3173,6 +3173,7 @@ static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev) CS_VERBOSE + CS_WAIT_COMPLETE); idr_remove(&mdev->tconn->volumes, mdev->vnr); idr_remove(&minors, mdev_to_minor(mdev)); + destroy_workqueue(mdev->submit.wq); del_gendisk(mdev->vdisk); synchronize_rcu(); kref_put(&mdev->kref, &drbd_minor_destroy); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 96d5968fc1e4..4af709e0aae5 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1160,6 +1160,35 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long drbd_send_and_submit(mdev, req); } +void __drbd_make_request_from_worker(struct drbd_conf *mdev, struct drbd_request *req) +{ + const int rw = bio_rw(req->master_bio); + + if (rw == WRITE && req->private_bio && req->i.size + && !test_bit(AL_SUSPENDED, &mdev->flags)) { + drbd_al_begin_io(mdev, &req->i, false); + req->rq_state |= RQ_IN_ACT_LOG; + } + drbd_send_and_submit(mdev, req); +} + + +void do_submit(struct work_struct *ws) +{ + struct drbd_conf *mdev = container_of(ws, struct drbd_conf, submit.worker); + LIST_HEAD(writes); + struct drbd_request *req, *tmp; + + spin_lock(&mdev->submit.lock); + list_splice_init(&mdev->submit.writes, &writes); + spin_unlock(&mdev->submit.lock); + + list_for_each_entry_safe(req, tmp, &writes, tl_requests) { + list_del_init(&req->tl_requests); + __drbd_make_request_from_worker(mdev, req); + } +} + void drbd_make_request(struct request_queue *q, struct bio *bio) { struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; -- GitLab From b5bc8e08641805391f2c7834c40d0f647e8563c6 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:52 +0100 Subject: [PATCH 0143/3163] drbd: split drbd_al_begin_io into fastpath, prepare, and commit Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 104 +++++++++++++++++++++---------- drivers/block/drbd/drbd_int.h | 1 + 2 files changed, 72 insertions(+), 33 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 1d7244d2a910..e4f1231c2ef2 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -104,7 +104,6 @@ struct update_al_work { int err; }; -static int al_write_transaction(struct drbd_conf *mdev, bool delegate); void *drbd_md_get_buffer(struct drbd_conf *mdev) { @@ -246,30 +245,37 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) return al_ext; } -/* - * @delegate: delegate activity log I/O to the worker thread - */ -void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool delegate) +bool drbd_al_begin_io_fastpath(struct drbd_conf *mdev, struct drbd_interval *i) { /* for bios crossing activity log extent boundaries, * we may need to activate two extents in one go */ unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); - unsigned enr; - bool need_transaction = false; - bool locked = false; + bool fastpath_ok = true; - /* When called through generic_make_request(), we must delegate - * activity log I/O to the worker thread: a further request - * submitted via generic_make_request() within the same task - * would be queued on current->bio_list, and would only start - * after this function returns (see generic_make_request()). - * - * However, if we *are* the worker, we must not delegate to ourselves. - */ + D_ASSERT((unsigned)(last - first) <= 1); + D_ASSERT(atomic_read(&mdev->local_cnt) > 0); + + /* FIXME figure out a fast path for bios crossing AL extent boundaries */ + if (first != last) + return false; + + spin_lock_irq(&mdev->al_lock); + fastpath_ok = + lc_find(mdev->resync, first/AL_EXT_PER_BM_SECT) == NULL && + lc_try_get(mdev->act_log, first) != NULL; + spin_unlock_irq(&mdev->al_lock); + return fastpath_ok; +} - if (delegate) - BUG_ON(current == mdev->tconn->worker.task); +bool drbd_al_begin_io_prepare(struct drbd_conf *mdev, struct drbd_interval *i) +{ + /* for bios crossing activity log extent boundaries, + * we may need to activate two extents in one go */ + unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); + unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); + unsigned enr; + bool need_transaction = false; D_ASSERT(first <= last); D_ASSERT(atomic_read(&mdev->local_cnt) > 0); @@ -280,11 +286,28 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool dele if (al_ext->lc_number != enr) need_transaction = true; } + return need_transaction; +} - /* If *this* request was to an already active extent, - * we're done, even if there are pending changes. */ - if (!need_transaction) - return; +static int al_write_transaction(struct drbd_conf *mdev, bool delegate); + +/* When called through generic_make_request(), we must delegate + * activity log I/O to the worker thread: a further request + * submitted via generic_make_request() within the same task + * would be queued on current->bio_list, and would only start + * after this function returns (see generic_make_request()). + * + * However, if we *are* the worker, we must not delegate to ourselves. + */ + +/* + * @delegate: delegate activity log I/O to the worker thread + */ +void drbd_al_begin_io_commit(struct drbd_conf *mdev, bool delegate) +{ + bool locked = false; + + BUG_ON(delegate && current == mdev->tconn->worker.task); /* Serialize multiple transactions. * This uses test_and_set_bit, memory barrier is implicit. @@ -303,11 +326,8 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool dele write_al_updates = rcu_dereference(mdev->ldev->disk_conf)->al_updates; rcu_read_unlock(); - if (write_al_updates) { + if (write_al_updates) al_write_transaction(mdev, delegate); - mdev->al_writ_cnt++; - } - spin_lock_irq(&mdev->al_lock); /* FIXME if (err) @@ -321,6 +341,17 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool dele } } +/* + * @delegate: delegate activity log I/O to the worker thread + */ +void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool delegate) +{ + BUG_ON(delegate && current == mdev->tconn->worker.task); + + if (drbd_al_begin_io_prepare(mdev, i)) + drbd_al_begin_io_commit(mdev, delegate); +} + void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i) { /* for bios crossing activity log extent boundaries, @@ -478,15 +509,22 @@ _al_write_transaction(struct drbd_conf *mdev) crc = crc32c(0, buffer, 4096); buffer->crc32c = cpu_to_be32(crc); - /* normal execution path goes through all three branches */ if (drbd_bm_write_hinted(mdev)) err = -EIO; - /* drbd_chk_io_error done already */ - else if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { - err = -EIO; - drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); - } else { - mdev->al_tr_number++; + else { + bool write_al_updates; + rcu_read_lock(); + write_al_updates = rcu_dereference(mdev->ldev->disk_conf)->al_updates; + rcu_read_unlock(); + if (write_al_updates) { + if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { + err = -EIO; + drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); + } else { + mdev->al_tr_number++; + mdev->al_writ_cnt++; + } + } } drbd_md_put_buffer(mdev); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a6b71b6076b5..b7b52dd42325 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1611,6 +1611,7 @@ extern const char *drbd_conn_str(enum drbd_conns s); extern const char *drbd_role_str(enum drbd_role s); /* drbd_actlog.c */ +extern bool drbd_al_begin_io_fastpath(struct drbd_conf *mdev, struct drbd_interval *i); extern void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool delegate); extern void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i); extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector); -- GitLab From 6c3c4355d6bfa418db828684e67910c559402264 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:53 +0100 Subject: [PATCH 0144/3163] drbd: split out some helper functions to drbd_al_begin_io To make the code easier to follow, use an explicit find_active_resync_extent(), and add a "nonblock" parameter to _al_get(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 49 ++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index e4f1231c2ef2..ff03f9053316 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -222,25 +222,37 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, return err; } -static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) +static struct bm_extent *find_active_resync_extent(struct drbd_conf *mdev, unsigned int enr) { - struct lc_element *al_ext; struct lc_element *tmp; - int wake; - - spin_lock_irq(&mdev->al_lock); tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); if (unlikely(tmp != NULL)) { struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); - if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { - wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags); - spin_unlock_irq(&mdev->al_lock); - if (wake) - wake_up(&mdev->al_wait); - return NULL; - } + if (test_bit(BME_NO_WRITES, &bm_ext->flags)) + return bm_ext; } - al_ext = lc_get(mdev->act_log, enr); + return NULL; +} + +static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr, bool nonblock) +{ + struct lc_element *al_ext; + struct bm_extent *bm_ext; + int wake; + + spin_lock_irq(&mdev->al_lock); + bm_ext = find_active_resync_extent(mdev, enr); + if (bm_ext) { + wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags); + spin_unlock_irq(&mdev->al_lock); + if (wake) + wake_up(&mdev->al_wait); + return NULL; + } + if (nonblock) + al_ext = lc_try_get(mdev->act_log, enr); + else + al_ext = lc_get(mdev->act_log, enr); spin_unlock_irq(&mdev->al_lock); return al_ext; } @@ -251,7 +263,6 @@ bool drbd_al_begin_io_fastpath(struct drbd_conf *mdev, struct drbd_interval *i) * we may need to activate two extents in one go */ unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); - bool fastpath_ok = true; D_ASSERT((unsigned)(last - first) <= 1); D_ASSERT(atomic_read(&mdev->local_cnt) > 0); @@ -260,12 +271,7 @@ bool drbd_al_begin_io_fastpath(struct drbd_conf *mdev, struct drbd_interval *i) if (first != last) return false; - spin_lock_irq(&mdev->al_lock); - fastpath_ok = - lc_find(mdev->resync, first/AL_EXT_PER_BM_SECT) == NULL && - lc_try_get(mdev->act_log, first) != NULL; - spin_unlock_irq(&mdev->al_lock); - return fastpath_ok; + return _al_get(mdev, first, true); } bool drbd_al_begin_io_prepare(struct drbd_conf *mdev, struct drbd_interval *i) @@ -282,7 +288,8 @@ bool drbd_al_begin_io_prepare(struct drbd_conf *mdev, struct drbd_interval *i) for (enr = first; enr <= last; enr++) { struct lc_element *al_ext; - wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr)) != NULL); + wait_event(mdev->al_wait, + (al_ext = _al_get(mdev, enr, false)) != NULL); if (al_ext->lc_number != enr) need_transaction = true; } -- GitLab From 779b3fe4c0e9dea19ae3ddef0b5fd1a663b63ee6 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:54 +0100 Subject: [PATCH 0145/3163] drbd: queue writes on submitter thread, unless they pass the activity log fastpath Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 4af709e0aae5..43bc1d064bc7 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1020,6 +1020,14 @@ drbd_submit_req_private_bio(struct drbd_request *req) bio_endio(bio, -EIO); } +static void drbd_queue_write(struct drbd_conf *mdev, struct drbd_request *req) +{ + spin_lock(&mdev->submit.lock); + list_add_tail(&req->tl_requests, &mdev->submit.writes); + spin_unlock(&mdev->submit.lock); + queue_work(mdev->submit.wq, &mdev->submit.worker); +} + /* returns the new drbd_request pointer, if the caller is expected to * drbd_send_and_submit() it (to save latency), or NULL if we queued the * request on the submitter thread. @@ -1048,17 +1056,13 @@ drbd_request_prepare(struct drbd_conf *mdev, struct bio *bio, unsigned long star req->private_bio = NULL; } - /* For WRITES going to the local disk, grab a reference on the target - * extent. This waits for any resync activity in the corresponding - * resync extent to finish, and, if necessary, pulls in the target - * extent into the activity log, which involves further disk io because - * of transactional on-disk meta data updates. - * Empty flushes don't need to go into the activity log, they can only - * flush data for pending writes which are already in there. */ if (rw == WRITE && req->private_bio && req->i.size && !test_bit(AL_SUSPENDED, &mdev->flags)) { + if (!drbd_al_begin_io_fastpath(mdev, &req->i)) { + drbd_queue_write(mdev, req); + return NULL; + } req->rq_state |= RQ_IN_ACT_LOG; - drbd_al_begin_io(mdev, &req->i, true); } return req; -- GitLab From cbe5e6109538ddab57764a88d9f0c2accd0c7d48 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 22 Mar 2013 22:17:36 -0600 Subject: [PATCH 0146/3163] lru_cache: introduce lc_get_cumulative() New helper to be able to consolidate more updates into a single transaction. Without this, we can only grab a single refcount on an updated element while preparing a transaction. lc_get_cumulative - like lc_get; also finds to-be-changed elements @lc: the lru cache to operate on @enr: the label to look up Unlike lc_get this also returns the element for @enr, if it is belonging to a pending transaction, so the return values are like for lc_get(), plus: pointer to an element already on the "to_be_changed" list. In this case, the cache was already marked %LC_DIRTY. Caller needs to make sure that the pending transaction is completed, before proceeding to actually use this element. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Fixed up by Jens to export lc_get_cumulative(). Signed-off-by: Jens Axboe --- include/linux/lru_cache.h | 1 + lib/lru_cache.c | 56 ++++++++++++++++++++++++++++++++------- 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 4019013c6593..46262284de47 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -256,6 +256,7 @@ extern void lc_destroy(struct lru_cache *lc); extern void lc_set(struct lru_cache *lc, unsigned int enr, int index); extern void lc_del(struct lru_cache *lc, struct lc_element *element); +extern struct lc_element *lc_get_cumulative(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr); diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 8335d39d2ccd..4a83ecd03650 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -365,7 +365,13 @@ static int lc_unused_element_available(struct lru_cache *lc) return 0; } -static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool may_change) +/* used as internal flags to __lc_get */ +enum { + LC_GET_MAY_CHANGE = 1, + LC_GET_MAY_USE_UNCOMMITTED = 2, +}; + +static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, unsigned int flags) { struct lc_element *e; @@ -380,22 +386,31 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool * this enr is currently being pulled in already, * and will be available once the pending transaction * has been committed. */ - if (e && e->lc_new_number == e->lc_number) { + if (e) { + if (e->lc_new_number != e->lc_number) { + /* It has been found above, but on the "to_be_changed" + * list, not yet committed. Don't pull it in twice, + * wait for the transaction, then try again... + */ + if (!(flags & LC_GET_MAY_USE_UNCOMMITTED)) + RETURN(NULL); + /* ... unless the caller is aware of the implications, + * probably preparing a cumulative transaction. */ + ++e->refcnt; + ++lc->hits; + RETURN(e); + } + /* else: lc_new_number == lc_number; a real hit. */ ++lc->hits; if (e->refcnt++ == 0) lc->used++; list_move(&e->list, &lc->in_use); /* Not evictable... */ RETURN(e); } + /* e == NULL */ ++lc->misses; - if (!may_change) - RETURN(NULL); - - /* It has been found above, but on the "to_be_changed" list, not yet - * committed. Don't pull it in twice, wait for the transaction, then - * try again */ - if (e) + if (!(flags & LC_GET_MAY_CHANGE)) RETURN(NULL); /* To avoid races with lc_try_lock(), first, mark us dirty @@ -477,7 +492,27 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool */ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) { - return __lc_get(lc, enr, 1); + return __lc_get(lc, enr, LC_GET_MAY_CHANGE); +} + +/** + * lc_get_cumulative - like lc_get; also finds to-be-changed elements + * @lc: the lru cache to operate on + * @enr: the label to look up + * + * Unlike lc_get this also returns the element for @enr, if it is belonging to + * a pending transaction, so the return values are like for lc_get(), + * plus: + * + * pointer to an element already on the "to_be_changed" list. + * In this case, the cache was already marked %LC_DIRTY. + * + * Caller needs to make sure that the pending transaction is completed, + * before proceeding to actually use this element. + */ +struct lc_element *lc_get_cumulative(struct lru_cache *lc, unsigned int enr) +{ + return __lc_get(lc, enr, LC_GET_MAY_CHANGE|LC_GET_MAY_USE_UNCOMMITTED); } /** @@ -648,3 +683,4 @@ EXPORT_SYMBOL(lc_seq_printf_stats); EXPORT_SYMBOL(lc_seq_dump_details); EXPORT_SYMBOL(lc_try_lock); EXPORT_SYMBOL(lc_is_used); +EXPORT_SYMBOL(lc_get_cumulative); -- GitLab From 08a1ddab6df7d3c7b6341774cb1cf4b21b96a214 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:56 +0100 Subject: [PATCH 0147/3163] drbd: consolidate as many updates as possible into one AL transaction Depending on current IO depth, try to consolidate as many updates as possible into one activity log transaction. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 49 ++++++++++++++++++++++ drivers/block/drbd/drbd_int.h | 2 + drivers/block/drbd/drbd_req.c | 70 +++++++++++++++++++++++++------- 3 files changed, 107 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index ff03f9053316..6afe173d5c2b 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -359,6 +359,55 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool dele drbd_al_begin_io_commit(mdev, delegate); } +int drbd_al_begin_io_nonblock(struct drbd_conf *mdev, struct drbd_interval *i) +{ + struct lru_cache *al = mdev->act_log; + /* for bios crossing activity log extent boundaries, + * we may need to activate two extents in one go */ + unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); + unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); + unsigned nr_al_extents; + unsigned available_update_slots; + unsigned enr; + + D_ASSERT(first <= last); + + nr_al_extents = 1 + last - first; /* worst case: all touched extends are cold. */ + available_update_slots = min(al->nr_elements - al->used, + al->max_pending_changes - al->pending_changes); + + /* We want all necessary updates for a given request within the same transaction + * We could first check how many updates are *actually* needed, + * and use that instead of the worst-case nr_al_extents */ + if (available_update_slots < nr_al_extents) + return -EWOULDBLOCK; + + /* Is resync active in this area? */ + for (enr = first; enr <= last; enr++) { + struct lc_element *tmp; + tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); + if (unlikely(tmp != NULL)) { + struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); + if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { + if (!test_and_set_bit(BME_PRIORITY, &bm_ext->flags)); + return -EBUSY; + return -EWOULDBLOCK; + } + } + } + + /* Checkout the refcounts. + * Given that we checked for available elements and update slots above, + * this has to be successful. */ + for (enr = first; enr <= last; enr++) { + struct lc_element *al_ext; + al_ext = lc_get_cumulative(mdev->act_log, enr); + if (!al_ext) + dev_info(DEV, "LOGIC BUG for enr=%u\n", enr); + } + return 0; +} + void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i) { /* for bios crossing activity log extent boundaries, diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b7b52dd42325..f943aacfdad8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1611,6 +1611,8 @@ extern const char *drbd_conn_str(enum drbd_conns s); extern const char *drbd_role_str(enum drbd_role s); /* drbd_actlog.c */ +extern int drbd_al_begin_io_nonblock(struct drbd_conf *mdev, struct drbd_interval *i); +extern void drbd_al_begin_io_commit(struct drbd_conf *mdev, bool delegate); extern bool drbd_al_begin_io_fastpath(struct drbd_conf *mdev, struct drbd_interval *i); extern void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool delegate); extern void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 43bc1d064bc7..b923d41678e1 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1164,32 +1164,74 @@ void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long drbd_send_and_submit(mdev, req); } -void __drbd_make_request_from_worker(struct drbd_conf *mdev, struct drbd_request *req) +static void submit_fast_path(struct drbd_conf *mdev, struct list_head *incoming) { - const int rw = bio_rw(req->master_bio); + struct drbd_request *req, *tmp; + list_for_each_entry_safe(req, tmp, incoming, tl_requests) { + const int rw = bio_data_dir(req->master_bio); - if (rw == WRITE && req->private_bio && req->i.size - && !test_bit(AL_SUSPENDED, &mdev->flags)) { - drbd_al_begin_io(mdev, &req->i, false); - req->rq_state |= RQ_IN_ACT_LOG; + if (rw == WRITE /* rw != WRITE should not even end up here! */ + && req->private_bio && req->i.size + && !test_bit(AL_SUSPENDED, &mdev->flags)) { + if (!drbd_al_begin_io_fastpath(mdev, &req->i)) + continue; + + req->rq_state |= RQ_IN_ACT_LOG; + } + + list_del_init(&req->tl_requests); + drbd_send_and_submit(mdev, req); } - drbd_send_and_submit(mdev, req); } +static bool prepare_al_transaction_nonblock(struct drbd_conf *mdev, + struct list_head *incoming, + struct list_head *pending) +{ + struct drbd_request *req, *tmp; + int wake = 0; + int err; + + spin_lock_irq(&mdev->al_lock); + list_for_each_entry_safe(req, tmp, incoming, tl_requests) { + err = drbd_al_begin_io_nonblock(mdev, &req->i); + if (err == -EBUSY) + wake = 1; + if (err) + continue; + req->rq_state |= RQ_IN_ACT_LOG; + list_move_tail(&req->tl_requests, pending); + } + spin_unlock_irq(&mdev->al_lock); + if (wake) + wake_up(&mdev->al_wait); + + return !list_empty(pending); +} void do_submit(struct work_struct *ws) { struct drbd_conf *mdev = container_of(ws, struct drbd_conf, submit.worker); - LIST_HEAD(writes); + LIST_HEAD(incoming); + LIST_HEAD(pending); struct drbd_request *req, *tmp; - spin_lock(&mdev->submit.lock); - list_splice_init(&mdev->submit.writes, &writes); - spin_unlock(&mdev->submit.lock); + for (;;) { + spin_lock(&mdev->submit.lock); + list_splice_tail_init(&mdev->submit.writes, &incoming); + spin_unlock(&mdev->submit.lock); - list_for_each_entry_safe(req, tmp, &writes, tl_requests) { - list_del_init(&req->tl_requests); - __drbd_make_request_from_worker(mdev, req); + submit_fast_path(mdev, &incoming); + if (list_empty(&incoming)) + break; + + wait_event(mdev->al_wait, prepare_al_transaction_nonblock(mdev, &incoming, &pending)); + drbd_al_begin_io_commit(mdev, false); + + list_for_each_entry_safe(req, tmp, &pending, tl_requests) { + list_del_init(&req->tl_requests); + drbd_send_and_submit(mdev, req); + } } } -- GitLab From 7e8c288f6cde950a6ca001ec06a32c8c2cf4180e Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:57 +0100 Subject: [PATCH 0148/3163] drbd: move start io accounting before activity log transaction The IO accounting of the drbd "queue depth" was misleading. We only started IO accounting once we already wrote the activity log. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index b923d41678e1..d72f2fef1cba 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1056,6 +1056,9 @@ drbd_request_prepare(struct drbd_conf *mdev, struct bio *bio, unsigned long star req->private_bio = NULL; } + /* Update disk stats */ + _drbd_start_io_acct(mdev, req); + if (rw == WRITE && req->private_bio && req->i.size && !test_bit(AL_SUSPENDED, &mdev->flags)) { if (!drbd_al_begin_io_fastpath(mdev, &req->i)) { @@ -1095,9 +1098,6 @@ static void drbd_send_and_submit(struct drbd_conf *mdev, struct drbd_request *re goto out; } - /* Update disk stats */ - _drbd_start_io_acct(mdev, req); - /* We fail READ/READA early, if we can not serve it. * We must do this before req is registered on any lists. * Otherwise, drbd_req_complete() will queue failed READ for retry. */ -- GitLab From 45ad07b3ac1e3062188fb760fe71cafb4a100215 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:58 +0100 Subject: [PATCH 0149/3163] drbd: try hard to max out the updates per AL transaction There may have been more incoming requests while we where preparing the current transaction. Try to consolidate more updates into this transaction until we make no more progres. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d72f2fef1cba..9f7ff1cb46ff 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1226,6 +1226,37 @@ void do_submit(struct work_struct *ws) break; wait_event(mdev->al_wait, prepare_al_transaction_nonblock(mdev, &incoming, &pending)); + /* Maybe more was queued, while we prepared the transaction? + * Try to stuff them into this transaction as well. + * Be strictly non-blocking here, no wait_event, we already + * have something to commit. + * Stop if we don't make any more progres. + */ + for (;;) { + LIST_HEAD(more_pending); + LIST_HEAD(more_incoming); + bool made_progress; + + /* It is ok to look outside the lock, + * it's only an optimization anyways */ + if (list_empty(&mdev->submit.writes)) + break; + + spin_lock(&mdev->submit.lock); + list_splice_tail_init(&mdev->submit.writes, &more_incoming); + spin_unlock(&mdev->submit.lock); + + if (list_empty(&more_incoming)) + break; + + made_progress = prepare_al_transaction_nonblock(mdev, &more_incoming, &more_pending); + + list_splice_tail_init(&more_pending, &pending); + list_splice_tail_init(&more_incoming, &incoming); + + if (!made_progress) + break; + } drbd_al_begin_io_commit(mdev, false); list_for_each_entry_safe(req, tmp, &pending, tl_requests) { -- GitLab From 5bbcf5e6abe97485748b51ea0713cc3012b4a8f0 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 19 Mar 2013 18:16:59 +0100 Subject: [PATCH 0150/3163] drbd: adjust upper limit for activity log extents Now that the on-disk activity-log ring buffer size is adjustable, the maximum active set can become larger, and is now limited by the use of 16bit "labels". This increases the maximum working set from 6433 to 65534 extents, each of which covers an area of 4MiB. Which means that if you use the maximum, you'd have to resync more than 250 GiB after an unclean Primary shutdown. With capable backend storage and replication links, this is entirely feasible. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 47 +++++++++++++++++++++++++++++------- include/linux/drbd_limits.h | 11 ++++----- 2 files changed, 43 insertions(+), 15 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index bcf900bcd142..42fda4ae2f87 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1141,15 +1141,32 @@ static bool should_set_defaults(struct genl_info *info) return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS); } -static void enforce_disk_conf_limits(struct disk_conf *dc) +static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev) { - if (dc->al_extents < DRBD_AL_EXTENTS_MIN) - dc->al_extents = DRBD_AL_EXTENTS_MIN; - if (dc->al_extents > DRBD_AL_EXTENTS_MAX) - dc->al_extents = DRBD_AL_EXTENTS_MAX; + /* This is limited by 16 bit "slot" numbers, + * and by available on-disk context storage. + * + * Also (u16)~0 is special (denotes a "free" extent). + * + * One transaction occupies one 4kB on-disk block, + * we have n such blocks in the on disk ring buffer, + * the "current" transaction may fail (n-1), + * and there is 919 slot numbers context information per transaction. + * + * 72 transaction blocks amounts to more than 2**16 context slots, + * so cap there first. + */ + const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX; + const unsigned int sufficient_on_disk = + (max_al_nr + AL_CONTEXT_PER_TRANSACTION -1) + /AL_CONTEXT_PER_TRANSACTION; - if (dc->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX) - dc->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX; + unsigned int al_size_4k = bdev->md.al_size_4k; + + if (al_size_4k > sufficient_on_disk) + return max_al_nr; + + return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION; } int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) @@ -1196,7 +1213,13 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (!expect(new_disk_conf->resync_rate >= 1)) new_disk_conf->resync_rate = 1; - enforce_disk_conf_limits(new_disk_conf); + if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) + new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN; + if (new_disk_conf->al_extents > drbd_al_extents_max(mdev->ldev)) + new_disk_conf->al_extents = drbd_al_extents_max(mdev->ldev); + + if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX) + new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX; fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; if (fifo_size != mdev->rs_plan_s->size) { @@ -1344,7 +1367,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - enforce_disk_conf_limits(new_disk_conf); + if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX) + new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX; new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ); if (!new_plan) { @@ -1419,6 +1443,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto fail; + if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) + new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN; + if (new_disk_conf->al_extents > drbd_al_extents_max(nbc)) + new_disk_conf->al_extents = drbd_al_extents_max(nbc); + if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) { dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", (unsigned long long) drbd_get_max_capacity(nbc), diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 1fa19c5f5e64..1fedf2b17cc8 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -126,13 +126,12 @@ #define DRBD_RESYNC_RATE_DEF 250 #define DRBD_RESYNC_RATE_SCALE 'k' /* kilobytes */ - /* less than 7 would hit performance unnecessarily. - * 919 slots context information per transaction, - * 32k activity log, 4k transaction size, - * one transaction in flight: - * 919 * 7 = 6433 */ + /* less than 7 would hit performance unnecessarily. */ #define DRBD_AL_EXTENTS_MIN 7 -#define DRBD_AL_EXTENTS_MAX 6433 + /* we use u16 as "slot number", (u16)~0 is "FREE". + * If you use >= 292 kB on-disk ring buffer, + * this is the maximum you can use: */ +#define DRBD_AL_EXTENTS_MAX 0xfffe #define DRBD_AL_EXTENTS_DEF 1237 #define DRBD_AL_EXTENTS_SCALE '1' -- GitLab From 66311274691ec65972cad3626057fa8d00c146d8 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Sat, 23 Mar 2013 11:42:24 +0800 Subject: [PATCH 0151/3163] block: add a flag to identify PM request Add a flag REQ_PM to identify the request is PM related, such requests will not change the device request queue's runtime status. It is intended to be used in driver's runtime PM callback, so that driver can perform some IO to the device there with the queue's runtime status unaffected. e.g. in SCSI disk's runtime suspend callback, the disk will be put into stopped power state, and this require sending a command to the device. Such command processing should not change the disk's runtime status. Signed-off-by: Lin Ming Signed-off-by: Aaron Lu Acked-by: Alan Stern Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index cdf11191e645..fcc1ce28d5ca 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -175,6 +175,7 @@ enum rq_flag_bits { __REQ_IO_STAT, /* account I/O stat */ __REQ_MIXED_MERGE, /* merge of different types, fail separately */ __REQ_KERNEL, /* direct IO to kernel pages */ + __REQ_PM, /* runtime pm request */ __REQ_NR_BITS, /* stops here */ }; @@ -223,5 +224,6 @@ enum rq_flag_bits { #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) #define REQ_SECURE (1 << __REQ_SECURE) #define REQ_KERNEL (1 << __REQ_KERNEL) +#define REQ_PM (1 << __REQ_PM) #endif /* __LINUX_BLK_TYPES_H */ -- GitLab From 6c9546675864f51506af69eca388e5d922942c56 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Sat, 23 Mar 2013 11:42:26 +0800 Subject: [PATCH 0152/3163] block: add runtime pm helpers Add runtime pm helper functions: void blk_pm_runtime_init(struct request_queue *q, struct device *dev) - Initialization function for drivers to call. int blk_pre_runtime_suspend(struct request_queue *q) - If any requests are in the queue, mark last busy and return -EBUSY. Otherwise set q->rpm_status to RPM_SUSPENDING and return 0. void blk_post_runtime_suspend(struct request_queue *q, int err) - If the suspend succeeded then set q->rpm_status to RPM_SUSPENDED. Otherwise set it to RPM_ACTIVE and mark last busy. void blk_pre_runtime_resume(struct request_queue *q) - Set q->rpm_status to RPM_RESUMING. void blk_post_runtime_resume(struct request_queue *q, int err) - If the resume succeeded then set q->rpm_status to RPM_ACTIVE and call __blk_run_queue, then mark last busy and autosuspend. Otherwise set q->rpm_status to RPM_SUSPENDED. The idea and API is designed by Alan Stern and described here: http://marc.info/?l=linux-scsi&m=133727953625963&w=2 Signed-off-by: Lin Ming Signed-off-by: Aaron Lu Acked-by: Alan Stern Signed-off-by: Jens Axboe --- block/blk-core.c | 144 +++++++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 27 ++++++++ 2 files changed, 171 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 074b758efc42..123d240132bf 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -30,6 +30,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -3045,6 +3046,149 @@ void blk_finish_plug(struct blk_plug *plug) } EXPORT_SYMBOL(blk_finish_plug); +#ifdef CONFIG_PM_RUNTIME +/** + * blk_pm_runtime_init - Block layer runtime PM initialization routine + * @q: the queue of the device + * @dev: the device the queue belongs to + * + * Description: + * Initialize runtime-PM-related fields for @q and start auto suspend for + * @dev. Drivers that want to take advantage of request-based runtime PM + * should call this function after @dev has been initialized, and its + * request queue @q has been allocated, and runtime PM for it can not happen + * yet(either due to disabled/forbidden or its usage_count > 0). In most + * cases, driver should call this function before any I/O has taken place. + * + * This function takes care of setting up using auto suspend for the device, + * the autosuspend delay is set to -1 to make runtime suspend impossible + * until an updated value is either set by user or by driver. Drivers do + * not need to touch other autosuspend settings. + * + * The block layer runtime PM is request based, so only works for drivers + * that use request as their IO unit instead of those directly use bio's. + */ +void blk_pm_runtime_init(struct request_queue *q, struct device *dev) +{ + q->dev = dev; + q->rpm_status = RPM_ACTIVE; + pm_runtime_set_autosuspend_delay(q->dev, -1); + pm_runtime_use_autosuspend(q->dev); +} +EXPORT_SYMBOL(blk_pm_runtime_init); + +/** + * blk_pre_runtime_suspend - Pre runtime suspend check + * @q: the queue of the device + * + * Description: + * This function will check if runtime suspend is allowed for the device + * by examining if there are any requests pending in the queue. If there + * are requests pending, the device can not be runtime suspended; otherwise, + * the queue's status will be updated to SUSPENDING and the driver can + * proceed to suspend the device. + * + * For the not allowed case, we mark last busy for the device so that + * runtime PM core will try to autosuspend it some time later. + * + * This function should be called near the start of the device's + * runtime_suspend callback. + * + * Return: + * 0 - OK to runtime suspend the device + * -EBUSY - Device should not be runtime suspended + */ +int blk_pre_runtime_suspend(struct request_queue *q) +{ + int ret = 0; + + spin_lock_irq(q->queue_lock); + if (q->nr_pending) { + ret = -EBUSY; + pm_runtime_mark_last_busy(q->dev); + } else { + q->rpm_status = RPM_SUSPENDING; + } + spin_unlock_irq(q->queue_lock); + return ret; +} +EXPORT_SYMBOL(blk_pre_runtime_suspend); + +/** + * blk_post_runtime_suspend - Post runtime suspend processing + * @q: the queue of the device + * @err: return value of the device's runtime_suspend function + * + * Description: + * Update the queue's runtime status according to the return value of the + * device's runtime suspend function and mark last busy for the device so + * that PM core will try to auto suspend the device at a later time. + * + * This function should be called near the end of the device's + * runtime_suspend callback. + */ +void blk_post_runtime_suspend(struct request_queue *q, int err) +{ + spin_lock_irq(q->queue_lock); + if (!err) { + q->rpm_status = RPM_SUSPENDED; + } else { + q->rpm_status = RPM_ACTIVE; + pm_runtime_mark_last_busy(q->dev); + } + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_post_runtime_suspend); + +/** + * blk_pre_runtime_resume - Pre runtime resume processing + * @q: the queue of the device + * + * Description: + * Update the queue's runtime status to RESUMING in preparation for the + * runtime resume of the device. + * + * This function should be called near the start of the device's + * runtime_resume callback. + */ +void blk_pre_runtime_resume(struct request_queue *q) +{ + spin_lock_irq(q->queue_lock); + q->rpm_status = RPM_RESUMING; + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_pre_runtime_resume); + +/** + * blk_post_runtime_resume - Post runtime resume processing + * @q: the queue of the device + * @err: return value of the device's runtime_resume function + * + * Description: + * Update the queue's runtime status according to the return value of the + * device's runtime_resume function. If it is successfully resumed, process + * the requests that are queued into the device's queue when it is resuming + * and then mark last busy and initiate autosuspend for it. + * + * This function should be called near the end of the device's + * runtime_resume callback. + */ +void blk_post_runtime_resume(struct request_queue *q, int err) +{ + spin_lock_irq(q->queue_lock); + if (!err) { + q->rpm_status = RPM_ACTIVE; + __blk_run_queue(q); + pm_runtime_mark_last_busy(q->dev); + pm_runtime_autosuspend(q->dev); + } else { + q->rpm_status = RPM_SUSPENDED; + } + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_post_runtime_resume); +#endif + int __init blk_dev_init(void) { BUILD_BUG_ON(__REQ_NR_BITS > 8 * diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 78feda9bbae2..89d89c7162aa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -361,6 +361,12 @@ struct request_queue { */ struct kobject kobj; +#ifdef CONFIG_PM_RUNTIME + struct device *dev; + int rpm_status; + unsigned int nr_pending; +#endif + /* * queue settings */ @@ -960,6 +966,27 @@ struct request_queue *blk_alloc_queue(gfp_t); struct request_queue *blk_alloc_queue_node(gfp_t, int); extern void blk_put_queue(struct request_queue *); +/* + * block layer runtime pm functions + */ +#ifdef CONFIG_PM_RUNTIME +extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev); +extern int blk_pre_runtime_suspend(struct request_queue *q); +extern void blk_post_runtime_suspend(struct request_queue *q, int err); +extern void blk_pre_runtime_resume(struct request_queue *q); +extern void blk_post_runtime_resume(struct request_queue *q, int err); +#else +static inline void blk_pm_runtime_init(struct request_queue *q, + struct device *dev) {} +static inline int blk_pre_runtime_suspend(struct request_queue *q) +{ + return -ENOSYS; +} +static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {} +static inline void blk_pre_runtime_resume(struct request_queue *q) {} +static inline void blk_post_runtime_resume(struct request_queue *q, int err) {} +#endif + /* * blk_plug permits building a queue of related requests by holding the I/O * fragments for a short period. This allows merging of sequential requests -- GitLab From c8158819d506a8aedeca53c52dfb709a0aabe011 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Sat, 23 Mar 2013 11:42:27 +0800 Subject: [PATCH 0153/3163] block: implement runtime pm strategy When a request is added: If device is suspended or is suspending and the request is not a PM request, resume the device. When the last request finishes: Call pm_runtime_mark_last_busy(). When pick a request: If device is resuming/suspending, then only PM request is allowed to go. The idea and API is designed by Alan Stern and described here: http://marc.info/?l=linux-scsi&m=133727953625963&w=2 Signed-off-by: Lin Ming Signed-off-by: Aaron Lu Acked-by: Alan Stern Signed-off-by: Jens Axboe --- block/blk-core.c | 39 +++++++++++++++++++++++++++++++++++++++ block/elevator.c | 26 ++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 123d240132bf..441f3488a766 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1264,6 +1264,16 @@ void part_round_stats(int cpu, struct hd_struct *part) } EXPORT_SYMBOL_GPL(part_round_stats); +#ifdef CONFIG_PM_RUNTIME +static void blk_pm_put_request(struct request *rq) +{ + if (rq->q->dev && !(rq->cmd_flags & REQ_PM) && !--rq->q->nr_pending) + pm_runtime_mark_last_busy(rq->q->dev); +} +#else +static inline void blk_pm_put_request(struct request *rq) {} +#endif + /* * queue lock must be held */ @@ -1274,6 +1284,8 @@ void __blk_put_request(struct request_queue *q, struct request *req) if (unlikely(--req->ref_count)) return; + blk_pm_put_request(req); + elv_completed_request(q, req); /* this is a bio leak */ @@ -2053,6 +2065,28 @@ static void blk_account_io_done(struct request *req) } } +#ifdef CONFIG_PM_RUNTIME +/* + * Don't process normal requests when queue is suspended + * or in the process of suspending/resuming + */ +static struct request *blk_pm_peek_request(struct request_queue *q, + struct request *rq) +{ + if (q->dev && (q->rpm_status == RPM_SUSPENDED || + (q->rpm_status != RPM_ACTIVE && !(rq->cmd_flags & REQ_PM)))) + return NULL; + else + return rq; +} +#else +static inline struct request *blk_pm_peek_request(struct request_queue *q, + struct request *rq) +{ + return rq; +} +#endif + /** * blk_peek_request - peek at the top of a request queue * @q: request queue to peek at @@ -2075,6 +2109,11 @@ struct request *blk_peek_request(struct request_queue *q) int ret; while ((rq = __elv_next_request(q)) != NULL) { + + rq = blk_pm_peek_request(q, rq); + if (!rq) + break; + if (!(rq->cmd_flags & REQ_STARTED)) { /* * This is the first time the device driver diff --git a/block/elevator.c b/block/elevator.c index a0ffdd943c98..eba5b04c29b1 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -34,6 +34,7 @@ #include #include #include +#include #include @@ -536,6 +537,27 @@ void elv_bio_merged(struct request_queue *q, struct request *rq, e->type->ops.elevator_bio_merged_fn(q, rq, bio); } +#ifdef CONFIG_PM_RUNTIME +static void blk_pm_requeue_request(struct request *rq) +{ + if (rq->q->dev && !(rq->cmd_flags & REQ_PM)) + rq->q->nr_pending--; +} + +static void blk_pm_add_request(struct request_queue *q, struct request *rq) +{ + if (q->dev && !(rq->cmd_flags & REQ_PM) && q->nr_pending++ == 0 && + (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING)) + pm_request_resume(q->dev); +} +#else +static inline void blk_pm_requeue_request(struct request *rq) {} +static inline void blk_pm_add_request(struct request_queue *q, + struct request *rq) +{ +} +#endif + void elv_requeue_request(struct request_queue *q, struct request *rq) { /* @@ -550,6 +572,8 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) rq->cmd_flags &= ~REQ_STARTED; + blk_pm_requeue_request(rq); + __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE); } @@ -572,6 +596,8 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) { trace_block_rq_insert(q, rq); + blk_pm_add_request(q, rq); + rq->q = q; if (rq->cmd_flags & REQ_SOFTBARRIER) { -- GitLab From 57fb233f078beb5d0437a4ae575fbd4d9eb9c738 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 24 Aug 2012 04:56:11 -0700 Subject: [PATCH 0154/3163] block: Reorder struct bio_set This is prep work for the next patch, which embeds a struct bio_list in struct bio_set. Signed-off-by: Kent Overstreet CC: Jens Axboe --- include/linux/bio.h | 66 ++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/include/linux/bio.h b/include/linux/bio.h index 820e7aaad4fd..93d3d17a300d 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -298,39 +298,6 @@ static inline int bio_associate_current(struct bio *bio) { return -ENOENT; } static inline void bio_disassociate_task(struct bio *bio) { } #endif /* CONFIG_BLK_CGROUP */ -/* - * bio_set is used to allow other portions of the IO system to - * allocate their own private memory pools for bio and iovec structures. - * These memory pools in turn all allocate from the bio_slab - * and the bvec_slabs[]. - */ -#define BIO_POOL_SIZE 2 -#define BIOVEC_NR_POOLS 6 -#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1) - -struct bio_set { - struct kmem_cache *bio_slab; - unsigned int front_pad; - - mempool_t *bio_pool; -#if defined(CONFIG_BLK_DEV_INTEGRITY) - mempool_t *bio_integrity_pool; -#endif - mempool_t *bvec_pool; -}; - -struct biovec_slab { - int nr_vecs; - char *name; - struct kmem_cache *slab; -}; - -/* - * a small number of entries is fine, not going to be performance critical. - * basically we just need to survive - */ -#define BIO_SPLIT_ENTRIES 2 - #ifdef CONFIG_HIGHMEM /* * remember never ever reenable interrupts between a bvec_kmap_irq and @@ -527,6 +494,39 @@ static inline struct bio *bio_list_get(struct bio_list *bl) return bio; } +/* + * bio_set is used to allow other portions of the IO system to + * allocate their own private memory pools for bio and iovec structures. + * These memory pools in turn all allocate from the bio_slab + * and the bvec_slabs[]. + */ +#define BIO_POOL_SIZE 2 +#define BIOVEC_NR_POOLS 6 +#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1) + +struct bio_set { + struct kmem_cache *bio_slab; + unsigned int front_pad; + + mempool_t *bio_pool; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + mempool_t *bio_integrity_pool; +#endif + mempool_t *bvec_pool; +}; + +struct biovec_slab { + int nr_vecs; + char *name; + struct kmem_cache *slab; +}; + +/* + * a small number of entries is fine, not going to be performance critical. + * basically we just need to survive + */ +#define BIO_SPLIT_ENTRIES 2 + #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) -- GitLab From df2cb6daa4cbc34406bc4b1ac9b9335df1083a72 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Sep 2012 14:33:46 -0700 Subject: [PATCH 0155/3163] block: Avoid deadlocks with bio allocation by stacking drivers Previously, if we ever try to allocate more than once from the same bio set while running under generic_make_request() (i.e. a stacking block driver), we risk deadlock. This is because of the code in generic_make_request() that converts recursion to iteration; any bios we submit won't actually be submitted (so they can complete and eventually be freed) until after we return - this means if we allocate a second bio, we're blocking the first one from ever being freed. Thus if enough threads call into a stacking block driver at the same time with bios that need multiple splits, and the bio_set's reserve gets used up, we deadlock. This can be worked around in the driver code - we could check if we're running under generic_make_request(), then mask out __GFP_WAIT when we go to allocate a bio, and if the allocation fails punt to workqueue and retry the allocation. But this is tricky and not a generic solution. This patch solves it for all users by inverting the previously described technique. We allocate a rescuer workqueue for each bio_set, and then in the allocation code if there are bios on current->bio_list we would be blocking, we punt them to the rescuer workqueue to be submitted. This guarantees forward progress for bio allocations under generic_make_request() provided each bio is submitted before allocating the next, and provided the bios are freed after they complete. Note that this doesn't do anything for allocation from other mempools. Instead of allocating per bio data structures from a mempool, code should use bio_set's front_pad. Tested it by forcing the rescue codepath to be taken (by disabling the first GFP_NOWAIT) attempt, and then ran it with bcache (which does a lot of arbitrary bio splitting) and verified that the rescuer was being invoked. Signed-off-by: Kent Overstreet CC: Jens Axboe Acked-by: Tejun Heo Reviewed-by: Muthukumar Ratty --- fs/bio.c | 116 +++++++++++++++++++++++++++++++++++++++++++- include/linux/bio.h | 9 ++++ 2 files changed, 123 insertions(+), 2 deletions(-) diff --git a/fs/bio.c b/fs/bio.c index bb5768f59b32..73b544709945 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -297,6 +297,54 @@ void bio_reset(struct bio *bio) } EXPORT_SYMBOL(bio_reset); +static void bio_alloc_rescue(struct work_struct *work) +{ + struct bio_set *bs = container_of(work, struct bio_set, rescue_work); + struct bio *bio; + + while (1) { + spin_lock(&bs->rescue_lock); + bio = bio_list_pop(&bs->rescue_list); + spin_unlock(&bs->rescue_lock); + + if (!bio) + break; + + generic_make_request(bio); + } +} + +static void punt_bios_to_rescuer(struct bio_set *bs) +{ + struct bio_list punt, nopunt; + struct bio *bio; + + /* + * In order to guarantee forward progress we must punt only bios that + * were allocated from this bio_set; otherwise, if there was a bio on + * there for a stacking driver higher up in the stack, processing it + * could require allocating bios from this bio_set, and doing that from + * our own rescuer would be bad. + * + * Since bio lists are singly linked, pop them all instead of trying to + * remove from the middle of the list: + */ + + bio_list_init(&punt); + bio_list_init(&nopunt); + + while ((bio = bio_list_pop(current->bio_list))) + bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); + + *current->bio_list = nopunt; + + spin_lock(&bs->rescue_lock); + bio_list_merge(&bs->rescue_list, &punt); + spin_unlock(&bs->rescue_lock); + + queue_work(bs->rescue_workqueue, &bs->rescue_work); +} + /** * bio_alloc_bioset - allocate a bio for I/O * @gfp_mask: the GFP_ mask given to the slab allocator @@ -314,11 +362,27 @@ EXPORT_SYMBOL(bio_reset); * previously allocated bio for IO before attempting to allocate a new one. * Failure to do so can cause deadlocks under memory pressure. * + * Note that when running under generic_make_request() (i.e. any block + * driver), bios are not submitted until after you return - see the code in + * generic_make_request() that converts recursion into iteration, to prevent + * stack overflows. + * + * This would normally mean allocating multiple bios under + * generic_make_request() would be susceptible to deadlocks, but we have + * deadlock avoidance code that resubmits any blocked bios from a rescuer + * thread. + * + * However, we do not guarantee forward progress for allocations from other + * mempools. Doing multiple allocations from the same mempool under + * generic_make_request() should be avoided - instead, use bio_set's front_pad + * for per bio allocations. + * * RETURNS: * Pointer to new bio on success, NULL on failure. */ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { + gfp_t saved_gfp = gfp_mask; unsigned front_pad; unsigned inline_vecs; unsigned long idx = BIO_POOL_NONE; @@ -336,7 +400,37 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) front_pad = 0; inline_vecs = nr_iovecs; } else { + /* + * generic_make_request() converts recursion to iteration; this + * means if we're running beneath it, any bios we allocate and + * submit will not be submitted (and thus freed) until after we + * return. + * + * This exposes us to a potential deadlock if we allocate + * multiple bios from the same bio_set() while running + * underneath generic_make_request(). If we were to allocate + * multiple bios (say a stacking block driver that was splitting + * bios), we would deadlock if we exhausted the mempool's + * reserve. + * + * We solve this, and guarantee forward progress, with a rescuer + * workqueue per bio_set. If we go to allocate and there are + * bios on current->bio_list, we first try the allocation + * without __GFP_WAIT; if that fails, we punt those bios we + * would be blocking to the rescuer workqueue before we retry + * with the original gfp_flags. + */ + + if (current->bio_list && !bio_list_empty(current->bio_list)) + gfp_mask &= ~__GFP_WAIT; + p = mempool_alloc(bs->bio_pool, gfp_mask); + if (!p && gfp_mask != saved_gfp) { + punt_bios_to_rescuer(bs); + gfp_mask = saved_gfp; + p = mempool_alloc(bs->bio_pool, gfp_mask); + } + front_pad = bs->front_pad; inline_vecs = BIO_INLINE_VECS; } @@ -349,6 +443,12 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) if (nr_iovecs > inline_vecs) { bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); + if (!bvl && gfp_mask != saved_gfp) { + punt_bios_to_rescuer(bs); + gfp_mask = saved_gfp; + bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); + } + if (unlikely(!bvl)) goto err_free; } else if (nr_iovecs) { @@ -1579,6 +1679,9 @@ static void biovec_free_pools(struct bio_set *bs) void bioset_free(struct bio_set *bs) { + if (bs->rescue_workqueue) + destroy_workqueue(bs->rescue_workqueue); + if (bs->bio_pool) mempool_destroy(bs->bio_pool); @@ -1614,6 +1717,10 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) bs->front_pad = front_pad; + spin_lock_init(&bs->rescue_lock); + bio_list_init(&bs->rescue_list); + INIT_WORK(&bs->rescue_work, bio_alloc_rescue); + bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad); if (!bs->bio_slab) { kfree(bs); @@ -1624,9 +1731,14 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) if (!bs->bio_pool) goto bad; - if (!biovec_create_pools(bs, pool_size)) - return bs; + if (biovec_create_pools(bs, pool_size)) + goto bad; + + bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); + if (!bs->rescue_workqueue) + goto bad; + return bs; bad: bioset_free(bs); return NULL; diff --git a/include/linux/bio.h b/include/linux/bio.h index 93d3d17a300d..b31036ff779f 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -513,6 +513,15 @@ struct bio_set { mempool_t *bio_integrity_pool; #endif mempool_t *bvec_pool; + + /* + * Deadlock avoidance for stacking block drivers: see comments in + * bio_alloc_bioset() for details + */ + spinlock_t rescue_lock; + struct bio_list rescue_list; + struct work_struct rescue_work; + struct workqueue_struct *rescue_workqueue; }; struct biovec_slab { -- GitLab From 6fda981cafbf908acd11e1e636fec50e99d56a47 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 12 Oct 2012 13:18:27 -0700 Subject: [PATCH 0156/3163] block: Fix a buffer overrun in bio_integrity_split() bio_integrity_split() seemed to be confusing pointers and arrays - bip_vec in bio_integrity_payload was an array appended to the end of the payload, so the bio_vecs in struct bio_pair should have come after the bio_integrity_payload they're for. Fix it by making bip_vec a pointer to the inline vecs - a later patch is going to make more use of this pointer. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Martin K. Petersen --- fs/bio-integrity.c | 5 +++-- include/linux/bio.h | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index a3f28f331b2b..94fa1c562c0e 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -112,6 +112,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, bip->bip_slab = idx; bip->bip_bio = bio; + bip->bip_vec = bip->bip_inline_vecs; bio->bi_integrity = bip; return bip; @@ -697,8 +698,8 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors) bp->iv1 = bip->bip_vec[0]; bp->iv2 = bip->bip_vec[0]; - bp->bip1.bip_vec[0] = bp->iv1; - bp->bip2.bip_vec[0] = bp->iv2; + bp->bip1.bip_vec = &bp->iv1; + bp->bip2.bip_vec = &bp->iv2; bp->iv1.bv_len = sectors * bi->tuple_size; bp->iv2.bv_offset += sectors * bi->tuple_size; diff --git a/include/linux/bio.h b/include/linux/bio.h index b31036ff779f..81004fdcc277 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -182,7 +182,9 @@ struct bio_integrity_payload { unsigned short bip_idx; /* current bip_vec index */ struct work_struct bip_work; /* I/O completion */ - struct bio_vec bip_vec[0]; /* embedded bvec array */ + + struct bio_vec *bip_vec; + struct bio_vec bip_inline_vecs[0];/* embedded bvec array */ }; #endif /* CONFIG_BLK_DEV_INTEGRITY */ -- GitLab From 9f060e2231ca96ca94f2ffcff730acd72606b280 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 12 Oct 2012 15:29:33 -0700 Subject: [PATCH 0157/3163] block: Convert integrity to bvec_alloc_bs() This adds a pointer to the bvec array to struct bio_integrity_payload, instead of the bvecs always being inline; then the bvecs are allocated with bvec_alloc_bs(). Changed bvec_alloc_bs() and bvec_free_bs() to take a pointer to a mempool instead of the bioset, so that bio integrity can use a different mempool for its bvecs, and thus avoid a potential deadlock. This is eventually for immutable bio vecs - immutable bvecs aren't useful if we still have to copy them, hence the need for the pointer. Less code is always nice too, though. Also, bio_integrity_alloc() was using fs_bio_set if no bio_set was specified. This was wrong - using the bio_set doesn't protect us from memory allocation failures, because we just used kmalloc for the bio_integrity_payload. But it does introduce the possibility of deadlock, if for some reason we weren't supposed to be using fs_bio_set. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Martin K. Petersen --- fs/bio-integrity.c | 132 ++++++++++++++++---------------------------- fs/bio.c | 36 +++++------- include/linux/bio.h | 8 ++- 3 files changed, 68 insertions(+), 108 deletions(-) diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 94fa1c562c0e..8c4c604c840d 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -27,48 +27,11 @@ #include #include -struct integrity_slab { - struct kmem_cache *slab; - unsigned short nr_vecs; - char name[8]; -}; - -#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) } -struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = { - IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES), -}; -#undef IS +#define BIP_INLINE_VECS 4 +static struct kmem_cache *bip_slab; static struct workqueue_struct *kintegrityd_wq; -static inline unsigned int vecs_to_idx(unsigned int nr) -{ - switch (nr) { - case 1: - return 0; - case 2 ... 4: - return 1; - case 5 ... 16: - return 2; - case 17 ... 64: - return 3; - case 65 ... 128: - return 4; - case 129 ... BIO_MAX_PAGES: - return 5; - default: - BUG(); - } -} - -static inline int use_bip_pool(unsigned int idx) -{ - if (idx == BIOVEC_MAX_IDX) - return 1; - - return 0; -} - /** * bio_integrity_alloc - Allocate integrity payload and attach it to bio * @bio: bio to attach integrity metadata to @@ -84,38 +47,41 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, unsigned int nr_vecs) { struct bio_integrity_payload *bip; - unsigned int idx = vecs_to_idx(nr_vecs); struct bio_set *bs = bio->bi_pool; - - if (!bs) - bs = fs_bio_set; - - BUG_ON(bio == NULL); - bip = NULL; - - /* Lower order allocations come straight from slab */ - if (!use_bip_pool(idx)) - bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask); - - /* Use mempool if lower order alloc failed or max vecs were requested */ - if (bip == NULL) { - idx = BIOVEC_MAX_IDX; /* so we free the payload properly later */ + unsigned long idx = BIO_POOL_NONE; + unsigned inline_vecs; + + if (!bs) { + bip = kmalloc(sizeof(struct bio_integrity_payload) + + sizeof(struct bio_vec) * nr_vecs, gfp_mask); + inline_vecs = nr_vecs; + } else { bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); - - if (unlikely(bip == NULL)) { - printk(KERN_ERR "%s: could not alloc bip\n", __func__); - return NULL; - } + inline_vecs = BIP_INLINE_VECS; } + if (unlikely(!bip)) + return NULL; + memset(bip, 0, sizeof(*bip)); + if (nr_vecs > inline_vecs) { + bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx, + bs->bvec_integrity_pool); + if (!bip->bip_vec) + goto err; + } else { + bip->bip_vec = bip->bip_inline_vecs; + } + bip->bip_slab = idx; bip->bip_bio = bio; - bip->bip_vec = bip->bip_inline_vecs; bio->bi_integrity = bip; return bip; +err: + mempool_free(bip, bs->bio_integrity_pool); + return NULL; } EXPORT_SYMBOL(bio_integrity_alloc); @@ -131,20 +97,20 @@ void bio_integrity_free(struct bio *bio) struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_set *bs = bio->bi_pool; - if (!bs) - bs = fs_bio_set; - - BUG_ON(bip == NULL); - /* A cloned bio doesn't own the integrity metadata */ if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY) && bip->bip_buf != NULL) kfree(bip->bip_buf); - if (use_bip_pool(bip->bip_slab)) + if (bs) { + if (bip->bip_slab != BIO_POOL_NONE) + bvec_free(bs->bvec_integrity_pool, bip->bip_vec, + bip->bip_slab); + mempool_free(bip, bs->bio_integrity_pool); - else - kmem_cache_free(bip_slab[bip->bip_slab].slab, bip); + } else { + kfree(bip); + } bio->bi_integrity = NULL; } @@ -747,13 +713,14 @@ EXPORT_SYMBOL(bio_integrity_clone); int bioset_integrity_create(struct bio_set *bs, int pool_size) { - unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES); - if (bs->bio_integrity_pool) return 0; - bs->bio_integrity_pool = - mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab); + bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab); + + bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size); + if (!bs->bvec_integrity_pool) + return -1; if (!bs->bio_integrity_pool) return -1; @@ -766,13 +733,14 @@ void bioset_integrity_free(struct bio_set *bs) { if (bs->bio_integrity_pool) mempool_destroy(bs->bio_integrity_pool); + + if (bs->bvec_integrity_pool) + mempool_destroy(bs->bio_integrity_pool); } EXPORT_SYMBOL(bioset_integrity_free); void __init bio_integrity_init(void) { - unsigned int i; - /* * kintegrityd won't block much but may burn a lot of CPU cycles. * Make it highpri CPU intensive wq with max concurrency of 1. @@ -782,14 +750,10 @@ void __init bio_integrity_init(void) if (!kintegrityd_wq) panic("Failed to create kintegrityd\n"); - for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) { - unsigned int size; - - size = sizeof(struct bio_integrity_payload) - + bip_slab[i].nr_vecs * sizeof(struct bio_vec); - - bip_slab[i].slab = - kmem_cache_create(bip_slab[i].name, size, 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - } + bip_slab = kmem_cache_create("bio_integrity_payload", + sizeof(struct bio_integrity_payload) + + sizeof(struct bio_vec) * BIP_INLINE_VECS, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + if (!bip_slab) + panic("Failed to create slab\n"); } diff --git a/fs/bio.c b/fs/bio.c index 73b544709945..40aa96eae99f 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -160,12 +160,12 @@ unsigned int bvec_nr_vecs(unsigned short idx) return bvec_slabs[idx].nr_vecs; } -void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx) +void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx) { BIO_BUG_ON(idx >= BIOVEC_NR_POOLS); if (idx == BIOVEC_MAX_IDX) - mempool_free(bv, bs->bvec_pool); + mempool_free(bv, pool); else { struct biovec_slab *bvs = bvec_slabs + idx; @@ -173,8 +173,8 @@ void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx) } } -struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, - struct bio_set *bs) +struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx, + mempool_t *pool) { struct bio_vec *bvl; @@ -210,7 +210,7 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, */ if (*idx == BIOVEC_MAX_IDX) { fallback: - bvl = mempool_alloc(bs->bvec_pool, gfp_mask); + bvl = mempool_alloc(pool, gfp_mask); } else { struct biovec_slab *bvs = bvec_slabs + *idx; gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO); @@ -253,7 +253,7 @@ static void bio_free(struct bio *bio) if (bs) { if (bio_has_allocated_vec(bio)) - bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); + bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio)); /* * If we have front padding, adjust the bio pointer before freeing @@ -442,11 +442,11 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) bio_init(bio); if (nr_iovecs > inline_vecs) { - bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); + bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); if (!bvl && gfp_mask != saved_gfp) { punt_bios_to_rescuer(bs); gfp_mask = saved_gfp; - bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); + bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); } if (unlikely(!bvl)) @@ -1661,20 +1661,11 @@ EXPORT_SYMBOL(bio_sector_offset); * create memory pools for biovec's in a bio_set. * use the global biovec slabs created for general use. */ -static int biovec_create_pools(struct bio_set *bs, int pool_entries) +mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries) { struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; - bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab); - if (!bs->bvec_pool) - return -ENOMEM; - - return 0; -} - -static void biovec_free_pools(struct bio_set *bs) -{ - mempool_destroy(bs->bvec_pool); + return mempool_create_slab_pool(pool_entries, bp->slab); } void bioset_free(struct bio_set *bs) @@ -1685,8 +1676,10 @@ void bioset_free(struct bio_set *bs) if (bs->bio_pool) mempool_destroy(bs->bio_pool); + if (bs->bvec_pool) + mempool_destroy(bs->bvec_pool); + bioset_integrity_free(bs); - biovec_free_pools(bs); bio_put_slab(bs); kfree(bs); @@ -1731,7 +1724,8 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) if (!bs->bio_pool) goto bad; - if (biovec_create_pools(bs, pool_size)) + bs->bvec_pool = biovec_create_pool(bs, pool_size); + if (!bs->bvec_pool) goto bad; bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); diff --git a/include/linux/bio.h b/include/linux/bio.h index 81004fdcc277..669b1cb18fee 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -213,6 +213,7 @@ extern void bio_pair_release(struct bio_pair *dbio); extern struct bio_set *bioset_create(unsigned int, unsigned int); extern void bioset_free(struct bio_set *); +extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries); extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); @@ -288,8 +289,8 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, int, int, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); -extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); -extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int); +extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *); +extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); #ifdef CONFIG_BLK_CGROUP @@ -511,10 +512,11 @@ struct bio_set { unsigned int front_pad; mempool_t *bio_pool; + mempool_t *bvec_pool; #if defined(CONFIG_BLK_DEV_INTEGRITY) mempool_t *bio_integrity_pool; + mempool_t *bvec_integrity_pool; #endif - mempool_t *bvec_pool; /* * Deadlock avoidance for stacking block drivers: see comments in -- GitLab From 054bdf646e36c2f7dc1bf6bc6209dbbb5909164b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 28 Sep 2012 13:17:55 -0700 Subject: [PATCH 0158/3163] block: Add bio_advance() This is prep work for immutable bio vecs; we first want to centralize where bvecs are modified. Next two patches convert some existing code to use this function. Signed-off-by: Kent Overstreet CC: Jens Axboe --- fs/bio.c | 41 +++++++++++++++++++++++++++++++++++++++ include/linux/bio.h | 2 ++ include/linux/blk_types.h | 2 ++ 3 files changed, 45 insertions(+) diff --git a/fs/bio.c b/fs/bio.c index 40aa96eae99f..7edc08d2246c 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -752,6 +752,47 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, } EXPORT_SYMBOL(bio_add_page); +/** + * bio_advance - increment/complete a bio by some number of bytes + * @bio: bio to advance + * @bytes: number of bytes to complete + * + * This updates bi_sector, bi_size and bi_idx; if the number of bytes to + * complete doesn't align with a bvec boundary, then bv_len and bv_offset will + * be updated on the last bvec as well. + * + * @bio will then represent the remaining, uncompleted portion of the io. + */ +void bio_advance(struct bio *bio, unsigned bytes) +{ + if (bio_integrity(bio)) + bio_integrity_advance(bio, bytes); + + bio->bi_sector += bytes >> 9; + bio->bi_size -= bytes; + + if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK) + return; + + while (bytes) { + if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { + WARN_ONCE(1, "bio idx %d >= vcnt %d\n", + bio->bi_idx, bio->bi_vcnt); + break; + } + + if (bytes >= bio_iovec(bio)->bv_len) { + bytes -= bio_iovec(bio)->bv_len; + bio->bi_idx++; + } else { + bio_iovec(bio)->bv_len -= bytes; + bio_iovec(bio)->bv_offset += bytes; + bytes = 0; + } + } +} +EXPORT_SYMBOL(bio_advance); + struct bio_map_data { struct bio_vec *iovecs; struct sg_iovec *sgvecs; diff --git a/include/linux/bio.h b/include/linux/bio.h index 669b1cb18fee..fcb4dba2d8ea 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -248,6 +248,8 @@ extern void bio_endio(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); +extern void bio_advance(struct bio *, unsigned); + extern void bio_init(struct bio *); extern void bio_reset(struct bio *); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index cdf11191e645..c178d25e588b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -197,6 +197,8 @@ enum rq_flag_bits { REQ_SECURE) #define REQ_CLONE_MASK REQ_COMMON_MASK +#define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME) + /* This mask is used for both bio and request merge checking */ #define REQ_NOMERGE_FLAGS \ (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) -- GitLab From f79ea4161434b31e351658283b24e92c3e570142 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 20 Sep 2012 16:38:30 -0700 Subject: [PATCH 0159/3163] block: Refactor blk_update_request() Converts it to use bio_advance(), simplifying it quite a bit in the process. Note that req_bio_endio() now always calls bio_advance() - which means it always loops over the biovec, not just on partial completions. Don't expect it to affect performance, but worth noting. Tested it by forcing partial updates, and dumping before and after on various bio/bvec fields when doing a partial update. Signed-off-by: Kent Overstreet CC: Jens Axboe --- block/blk-core.c | 80 ++++++++---------------------------------------- 1 file changed, 12 insertions(+), 68 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 074b758efc42..86a1afeef606 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -158,20 +158,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio, else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) error = -EIO; - if (unlikely(nbytes > bio->bi_size)) { - printk(KERN_ERR "%s: want %u bytes done, %u left\n", - __func__, nbytes, bio->bi_size); - nbytes = bio->bi_size; - } - if (unlikely(rq->cmd_flags & REQ_QUIET)) set_bit(BIO_QUIET, &bio->bi_flags); - bio->bi_size -= nbytes; - bio->bi_sector += (nbytes >> 9); - - if (bio_integrity(bio)) - bio_integrity_advance(bio, nbytes); + bio_advance(bio, nbytes); /* don't actually finish bio if it's part of flush sequence */ if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) @@ -2252,8 +2242,7 @@ EXPORT_SYMBOL(blk_fetch_request); **/ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) { - int total_bytes, bio_nbytes, next_idx = 0; - struct bio *bio; + int total_bytes; if (!req->bio) return false; @@ -2299,56 +2288,21 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) blk_account_io_completion(req, nr_bytes); - total_bytes = bio_nbytes = 0; - while ((bio = req->bio) != NULL) { - int nbytes; + total_bytes = 0; + while (req->bio) { + struct bio *bio = req->bio; + unsigned bio_bytes = min(bio->bi_size, nr_bytes); - if (nr_bytes >= bio->bi_size) { + if (bio_bytes == bio->bi_size) req->bio = bio->bi_next; - nbytes = bio->bi_size; - req_bio_endio(req, bio, nbytes, error); - next_idx = 0; - bio_nbytes = 0; - } else { - int idx = bio->bi_idx + next_idx; - - if (unlikely(idx >= bio->bi_vcnt)) { - blk_dump_rq_flags(req, "__end_that"); - printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n", - __func__, idx, bio->bi_vcnt); - break; - } - - nbytes = bio_iovec_idx(bio, idx)->bv_len; - BIO_BUG_ON(nbytes > bio->bi_size); - - /* - * not a complete bvec done - */ - if (unlikely(nbytes > nr_bytes)) { - bio_nbytes += nr_bytes; - total_bytes += nr_bytes; - break; - } - /* - * advance to the next vector - */ - next_idx++; - bio_nbytes += nbytes; - } + req_bio_endio(req, bio, bio_bytes, error); - total_bytes += nbytes; - nr_bytes -= nbytes; + total_bytes += bio_bytes; + nr_bytes -= bio_bytes; - bio = req->bio; - if (bio) { - /* - * end more in this run, or just return 'not-done' - */ - if (unlikely(nr_bytes <= 0)) - break; - } + if (!nr_bytes) + break; } /* @@ -2364,16 +2318,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) return false; } - /* - * if the request wasn't completed, update state - */ - if (bio_nbytes) { - req_bio_endio(req, bio, bio_nbytes, error); - bio->bi_idx += next_idx; - bio_iovec(bio)->bv_offset += nr_bytes; - bio_iovec(bio)->bv_len -= nr_bytes; - } - req->__data_len -= total_bytes; req->buffer = bio_data(req->bio); -- GitLab From fb9e3534768c1daa0bd09dd00d1a8828e15079c8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 26 Sep 2012 21:41:20 -0700 Subject: [PATCH 0160/3163] md: Convert md_trim_bio() to use bio_advance() Signed-off-by: Kent Overstreet CC: Jens Axboe CC: NeilBrown Acked-by: NeilBrown --- drivers/md/md.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index fcb878f88796..d323676580a9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -194,21 +194,12 @@ void md_trim_bio(struct bio *bio, int offset, int size) if (offset == 0 && size == bio->bi_size) return; - bio->bi_sector += offset; - bio->bi_size = size; - offset <<= 9; clear_bit(BIO_SEG_VALID, &bio->bi_flags); - while (bio->bi_idx < bio->bi_vcnt && - bio->bi_io_vec[bio->bi_idx].bv_len <= offset) { - /* remove this whole bio_vec */ - offset -= bio->bi_io_vec[bio->bi_idx].bv_len; - bio->bi_idx++; - } - if (bio->bi_idx < bio->bi_vcnt) { - bio->bi_io_vec[bio->bi_idx].bv_offset += offset; - bio->bi_io_vec[bio->bi_idx].bv_len -= offset; - } + bio_advance(bio, offset << 9); + + bio->bi_size = size; + /* avoid any complications with bi_idx being non-zero*/ if (bio->bi_idx) { memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx, -- GitLab From f73a1c7d117d07a96d89475066188a2b79e53c48 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 25 Sep 2012 15:05:12 -0700 Subject: [PATCH 0161/3163] block: Add bio_end_sector() Just a little convenience macro - main reason to add it now is preparing for immutable bio vecs, it'll reduce the size of the patch that puts bi_sector/bi_size/bi_idx into a struct bvec_iter. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Lars Ellenberg CC: Jiri Kosina CC: Alasdair Kergon CC: dm-devel@redhat.com CC: Neil Brown CC: Martin Schwidefsky CC: Heiko Carstens CC: linux-s390@vger.kernel.org CC: Chris Mason CC: Steven Whitehouse Acked-by: Steven Whitehouse --- block/blk-core.c | 2 +- block/cfq-iosched.c | 7 ++----- block/deadline-iosched.c | 2 +- drivers/block/brd.c | 3 +-- drivers/block/pktcdvd.c | 6 +++--- drivers/md/dm-stripe.c | 2 +- drivers/md/dm-verity.c | 2 +- drivers/md/faulty.c | 6 ++---- drivers/md/linear.c | 3 +-- drivers/md/raid1.c | 4 ++-- drivers/md/raid5.c | 14 +++++++------- drivers/s390/block/dcssblk.c | 3 +-- fs/btrfs/extent_io.c | 3 +-- fs/gfs2/lops.c | 2 +- include/linux/bio.h | 1 + 15 files changed, 26 insertions(+), 34 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 86a1afeef606..7236b826f4a1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1586,7 +1586,7 @@ static void handle_bad_sector(struct bio *bio) printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", bdevname(bio->bi_bdev, b), bio->bi_rw, - (unsigned long long)bio->bi_sector + bio_sectors(bio), + (unsigned long long)bio_end_sector(bio), (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); set_bit(BIO_EOF, &bio->bi_flags); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 4f0ade74cfd0..d5cd3131c57a 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2270,11 +2270,8 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) return NULL; cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); - if (cfqq) { - sector_t sector = bio->bi_sector + bio_sectors(bio); - - return elv_rb_find(&cfqq->sort_list, sector); - } + if (cfqq) + return elv_rb_find(&cfqq->sort_list, bio_end_sector(bio)); return NULL; } diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 90037b5eb17f..ba19a3afab79 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -132,7 +132,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio) * check for front merge */ if (dd->front_merges) { - sector_t sector = bio->bi_sector + bio_sectors(bio); + sector_t sector = bio_end_sector(bio); __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector); if (__rq) { diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 531ceb31d0ff..f1a29f8e9d33 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -334,8 +334,7 @@ static void brd_make_request(struct request_queue *q, struct bio *bio) int err = -EIO; sector = bio->bi_sector; - if (sector + (bio->bi_size >> SECTOR_SHIFT) > - get_capacity(bdev->bd_disk)) + if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) goto out; if (unlikely(bio->bi_rw & REQ_DISCARD)) { diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 2e7de7a59bfc..26938e8e2fc3 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -901,7 +901,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd) pd->iosched.successive_reads += bio->bi_size >> 10; else { pd->iosched.successive_reads = 0; - pd->iosched.last_write = bio->bi_sector + bio_sectors(bio); + pd->iosched.last_write = bio_end_sector(bio); } if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) { if (pd->read_speed == pd->write_speed) { @@ -2454,7 +2454,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) zone = ZONE(bio->bi_sector, pd); VPRINTK("pkt_make_request: start = %6llx stop = %6llx\n", (unsigned long long)bio->bi_sector, - (unsigned long long)(bio->bi_sector + bio_sectors(bio))); + (unsigned long long)bio_end_sector(bio)); /* Check if we have to split the bio */ { @@ -2462,7 +2462,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) sector_t last_zone; int first_sectors; - last_zone = ZONE(bio->bi_sector + bio_sectors(bio) - 1, pd); + last_zone = ZONE(bio_end_sector(bio) - 1, pd); if (last_zone != zone) { BUG_ON(last_zone != zone + pd->settings.size); first_sectors = last_zone - bio->bi_sector; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index d8837d313f54..ea5e878a30b9 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -258,7 +258,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio, sector_t begin, end; stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin); - stripe_map_range_sector(sc, bio->bi_sector + bio_sectors(bio), + stripe_map_range_sector(sc, bio_end_sector(bio), target_stripe, &end); if (begin < end) { bio->bi_bdev = sc->stripe[target_stripe].dev->bdev; diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 6ad538375c3c..923115d08baa 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -472,7 +472,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) return -EIO; } - if ((bio->bi_sector + bio_sectors(bio)) >> + if (bio_end_sector(bio) >> (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) { DMERR_LIMIT("io out of range"); return -EIO; diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 5e7dc772f5de..3193aefe982b 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -185,8 +185,7 @@ static void make_request(struct mddev *mddev, struct bio *bio) return; } - if (check_sector(conf, bio->bi_sector, bio->bi_sector+(bio->bi_size>>9), - WRITE)) + if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), WRITE)) failit = 1; if (check_mode(conf, WritePersistent)) { add_sector(conf, bio->bi_sector, WritePersistent); @@ -196,8 +195,7 @@ static void make_request(struct mddev *mddev, struct bio *bio) failit = 1; } else { /* read request */ - if (check_sector(conf, bio->bi_sector, bio->bi_sector + (bio->bi_size>>9), - READ)) + if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), READ)) failit = 1; if (check_mode(conf, ReadTransient)) failit = 1; diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 21014836bdbf..f03fabd2b37b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -317,8 +317,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) bio_io_error(bio); return; } - if (unlikely(bio->bi_sector + (bio->bi_size >> 9) > - tmp_dev->end_sector)) { + if (unlikely(bio_end_sector(bio) > tmp_dev->end_sector)) { /* This bio crosses a device boundary, so we have to * split it. */ diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fd86b372692d..4d8c2e0a6bad 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1018,7 +1018,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) md_write_start(mddev, bio); /* wait on superblock update early */ if (bio_data_dir(bio) == WRITE && - bio->bi_sector + bio->bi_size/512 > mddev->suspend_lo && + bio_end_sector(bio) > mddev->suspend_lo && bio->bi_sector < mddev->suspend_hi) { /* As the suspend_* range is controlled by * userspace, we want an interruptible @@ -1029,7 +1029,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) flush_signals(current); prepare_to_wait(&conf->wait_barrier, &w, TASK_INTERRUPTIBLE); - if (bio->bi_sector + bio->bi_size/512 <= mddev->suspend_lo || + if (bio_end_sector(bio) <= mddev->suspend_lo || bio->bi_sector >= mddev->suspend_hi) break; schedule(); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3ee2912889e7..68706970d217 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2384,11 +2384,11 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in } else bip = &sh->dev[dd_idx].toread; while (*bip && (*bip)->bi_sector < bi->bi_sector) { - if ((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector) + if (bio_end_sector(*bip) > bi->bi_sector) goto overlap; bip = & (*bip)->bi_next; } - if (*bip && (*bip)->bi_sector < bi->bi_sector + ((bi->bi_size)>>9)) + if (*bip && (*bip)->bi_sector < bio_end_sector(bi)) goto overlap; BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next); @@ -2404,8 +2404,8 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in sector < sh->dev[dd_idx].sector + STRIPE_SECTORS && bi && bi->bi_sector <= sector; bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) { - if (bi->bi_sector + (bi->bi_size>>9) >= sector) - sector = bi->bi_sector + (bi->bi_size>>9); + if (bio_end_sector(bi) >= sector) + sector = bio_end_sector(bi); } if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); @@ -3941,7 +3941,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) 0, &dd_idx, NULL); - end_sector = align_bi->bi_sector + (align_bi->bi_size >> 9); + end_sector = bio_end_sector(align_bi); rcu_read_lock(); rdev = rcu_dereference(conf->disks[dd_idx].replacement); if (!rdev || test_bit(Faulty, &rdev->flags) || @@ -4216,7 +4216,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) } logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); - last_sector = bi->bi_sector + (bi->bi_size>>9); + last_sector = bio_end_sector(bi); bi->bi_next = NULL; bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ @@ -4679,7 +4679,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1); sector = raid5_compute_sector(conf, logical_sector, 0, &dd_idx, NULL); - last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9); + last_sector = bio_end_sector(raid_bio); for (; logical_sector < last_sector; logical_sector += STRIPE_SECTORS, diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index b6ad0de07930..12d08b4529e9 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -826,8 +826,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio) if ((bio->bi_sector & 7) != 0 || (bio->bi_size & 4095) != 0) /* Request is not page-aligned. */ goto fail; - if (((bio->bi_size >> 9) + bio->bi_sector) - > get_capacity(bio->bi_bdev->bd_disk)) { + if (bio_end_sector(bio) > get_capacity(bio->bi_bdev->bd_disk)) { /* Request beyond end of DCSS segment. */ goto fail; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f173c5af6461..bed072aa461f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2527,8 +2527,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, if (old_compressed) contig = bio->bi_sector == sector; else - contig = bio->bi_sector + (bio->bi_size >> 9) == - sector; + contig = bio_end_sector(bio) == sector; if (prev_bio_flags != bio_flags || !contig || merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index a5055977a214..5c37ef982390 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -300,7 +300,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno) u64 nblk; if (bio) { - nblk = bio->bi_sector + bio_sectors(bio); + nblk = bio_end_sector(bio); nblk >>= sdp->sd_fsb2bb_shift; if (blkno == nblk) return bio; diff --git a/include/linux/bio.h b/include/linux/bio.h index fcb4dba2d8ea..20507eb7c979 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -67,6 +67,7 @@ #define bio_offset(bio) bio_iovec((bio))->bv_offset #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) #define bio_sectors(bio) ((bio)->bi_size >> 9) +#define bio_end_sector(bio) ((bio)->bi_sector + bio_sectors((bio))) static inline unsigned int bio_cur_bytes(struct bio *bio) { -- GitLab From aa8b57aa3d1c06ca53312294ee6dfc767ee3ddb3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 5 Feb 2013 15:19:29 -0800 Subject: [PATCH 0162/3163] block: Use bio_sectors() more consistently Bunch of places in the code weren't using it where they could be - this'll reduce the size of the patch that puts bi_sector/bi_size/bi_idx into a struct bvec_iter. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: "Ed L. Cashin" CC: Nick Piggin CC: Jiri Kosina CC: Jim Paris CC: Geoff Levand CC: Alasdair Kergon CC: dm-devel@redhat.com CC: Neil Brown CC: Steven Rostedt Acked-by: Ed Cashin --- drivers/block/pktcdvd.c | 2 +- drivers/md/dm-raid1.c | 2 +- drivers/md/raid0.c | 6 +++--- drivers/md/raid1.c | 17 ++++++++--------- drivers/md/raid10.c | 24 +++++++++++------------- drivers/md/raid5.c | 8 ++++---- fs/btrfs/volumes.c | 2 +- include/trace/events/block.h | 12 ++++++------ 8 files changed, 35 insertions(+), 38 deletions(-) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 26938e8e2fc3..2c27744b9ca6 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2433,7 +2433,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) cloned_bio->bi_bdev = pd->bdev; cloned_bio->bi_private = psd; cloned_bio->bi_end_io = pkt_end_io_read_cloned; - pd->stats.secs_r += bio->bi_size >> 9; + pd->stats.secs_r += bio_sectors(bio); pkt_queue_bio(pd, cloned_bio); return; } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index d053098c6a91..699b5be68d31 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -458,7 +458,7 @@ static void map_region(struct dm_io_region *io, struct mirror *m, { io->bdev = m->dev->bdev; io->sector = map_sector(m, bio); - io->count = bio->bi_size >> 9; + io->count = bio_sectors(bio); } static void hold_bio(struct mirror_set *ms, struct bio *bio) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 0505452de8d6..23a38afec351 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -502,11 +502,11 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev, { if (likely(is_power_of_2(chunk_sects))) { return chunk_sects >= ((bio->bi_sector & (chunk_sects-1)) - + (bio->bi_size >> 9)); + + bio_sectors(bio)); } else{ sector_t sector = bio->bi_sector; return chunk_sects >= (sector_div(sector, chunk_sects) - + (bio->bi_size >> 9)); + + bio_sectors(bio)); } } @@ -567,7 +567,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) printk("md/raid0:%s: make_request bug: can't convert block across chunks" " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects / 2, - (unsigned long long)bio->bi_sector, bio->bi_size >> 10); + (unsigned long long)bio->bi_sector, bio_sectors(bio) / 2); bio_io_error(bio); return; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 4d8c2e0a6bad..f741c9fe25c8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -267,7 +267,7 @@ static void raid_end_bio_io(struct r1bio *r1_bio) (bio_data_dir(bio) == WRITE) ? "write" : "read", (unsigned long long) bio->bi_sector, (unsigned long long) bio->bi_sector + - (bio->bi_size >> 9) - 1); + bio_sectors(bio) - 1); call_bio_endio(r1_bio); } @@ -458,7 +458,7 @@ static void raid1_end_write_request(struct bio *bio, int error) " %llu-%llu\n", (unsigned long long) mbio->bi_sector, (unsigned long long) mbio->bi_sector + - (mbio->bi_size >> 9) - 1); + bio_sectors(mbio) - 1); call_bio_endio(r1_bio); } } @@ -1049,7 +1049,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); r1_bio->master_bio = bio; - r1_bio->sectors = bio->bi_size >> 9; + r1_bio->sectors = bio_sectors(bio); r1_bio->state = 0; r1_bio->mddev = mddev; r1_bio->sector = bio->bi_sector; @@ -1127,7 +1127,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); r1_bio->master_bio = bio; - r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled; + r1_bio->sectors = bio_sectors(bio) - sectors_handled; r1_bio->state = 0; r1_bio->mddev = mddev; r1_bio->sector = bio->bi_sector + sectors_handled; @@ -1329,14 +1329,14 @@ static void make_request(struct mddev *mddev, struct bio * bio) /* Mustn't call r1_bio_write_done before this next test, * as it could result in the bio being freed. */ - if (sectors_handled < (bio->bi_size >> 9)) { + if (sectors_handled < bio_sectors(bio)) { r1_bio_write_done(r1_bio); /* We need another r1_bio. It has already been counted * in bio->bi_phys_segments */ r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); r1_bio->master_bio = bio; - r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled; + r1_bio->sectors = bio_sectors(bio) - sectors_handled; r1_bio->state = 0; r1_bio->mddev = mddev; r1_bio->sector = bio->bi_sector + sectors_handled; @@ -1947,7 +1947,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) wbio->bi_rw = WRITE; wbio->bi_end_io = end_sync_write; atomic_inc(&r1_bio->remaining); - md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9); + md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio)); generic_make_request(wbio); } @@ -2284,8 +2284,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); r1_bio->master_bio = mbio; - r1_bio->sectors = (mbio->bi_size >> 9) - - sectors_handled; + r1_bio->sectors = bio_sectors(mbio) - sectors_handled; r1_bio->state = 0; set_bit(R1BIO_ReadError, &r1_bio->state); r1_bio->mddev = mddev; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 77b562d18a90..5ee14ab16a05 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1169,7 +1169,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) /* If this request crosses a chunk boundary, we need to * split it. This will only happen for 1 PAGE (or less) requests. */ - if (unlikely((bio->bi_sector & chunk_mask) + (bio->bi_size >> 9) + if (unlikely((bio->bi_sector & chunk_mask) + bio_sectors(bio) > chunk_sects && (conf->geo.near_copies < conf->geo.raid_disks || conf->prev.near_copies < conf->prev.raid_disks))) { @@ -1209,7 +1209,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) bad_map: printk("md/raid10:%s: make_request bug: can't convert block across chunks" " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2, - (unsigned long long)bio->bi_sector, bio->bi_size >> 10); + (unsigned long long)bio->bi_sector, bio_sectors(bio) / 2); bio_io_error(bio); return; @@ -1224,7 +1224,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) */ wait_barrier(conf); - sectors = bio->bi_size >> 9; + sectors = bio_sectors(bio); while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && bio->bi_sector < conf->reshape_progress && bio->bi_sector + sectors > conf->reshape_progress) { @@ -1326,8 +1326,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); r10_bio->master_bio = bio; - r10_bio->sectors = ((bio->bi_size >> 9) - - sectors_handled); + r10_bio->sectors = bio_sectors(bio) - sectors_handled; r10_bio->state = 0; r10_bio->mddev = mddev; r10_bio->sector = bio->bi_sector + sectors_handled; @@ -1569,7 +1568,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) * after checking if we need to go around again. */ - if (sectors_handled < (bio->bi_size >> 9)) { + if (sectors_handled < bio_sectors(bio)) { one_write_done(r10_bio); /* We need another r10_bio. It has already been counted * in bio->bi_phys_segments. @@ -1577,7 +1576,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); r10_bio->master_bio = bio; - r10_bio->sectors = (bio->bi_size >> 9) - sectors_handled; + r10_bio->sectors = bio_sectors(bio) - sectors_handled; r10_bio->mddev = mddev; r10_bio->sector = bio->bi_sector + sectors_handled; @@ -2103,7 +2102,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) d = r10_bio->devs[i].devnum; atomic_inc(&conf->mirrors[d].rdev->nr_pending); atomic_inc(&r10_bio->remaining); - md_sync_acct(conf->mirrors[d].rdev->bdev, tbio->bi_size >> 9); + md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio)); tbio->bi_sector += conf->mirrors[d].rdev->data_offset; tbio->bi_bdev = conf->mirrors[d].rdev->bdev; @@ -2128,7 +2127,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) d = r10_bio->devs[i].devnum; atomic_inc(&r10_bio->remaining); md_sync_acct(conf->mirrors[d].replacement->bdev, - tbio->bi_size >> 9); + bio_sectors(tbio)); generic_make_request(tbio); } @@ -2254,13 +2253,13 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) wbio2 = r10_bio->devs[1].repl_bio; if (wbio->bi_end_io) { atomic_inc(&conf->mirrors[d].rdev->nr_pending); - md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9); + md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio)); generic_make_request(wbio); } if (wbio2 && wbio2->bi_end_io) { atomic_inc(&conf->mirrors[d].replacement->nr_pending); md_sync_acct(conf->mirrors[d].replacement->bdev, - wbio2->bi_size >> 9); + bio_sectors(wbio2)); generic_make_request(wbio2); } } @@ -2690,8 +2689,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); r10_bio->master_bio = mbio; - r10_bio->sectors = (mbio->bi_size >> 9) - - sectors_handled; + r10_bio->sectors = bio_sectors(mbio) - sectors_handled; r10_bio->state = 0; set_bit(R10BIO_ReadError, &r10_bio->state); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 68706970d217..4e0f87e462ce 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -90,7 +90,7 @@ static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect) */ static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector) { - int sectors = bio->bi_size >> 9; + int sectors = bio_sectors(bio); if (bio->bi_sector + sectors < sector + STRIPE_SECTORS) return bio->bi_next; else @@ -3804,7 +3804,7 @@ static int in_chunk_boundary(struct mddev *mddev, struct bio *bio) { sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); unsigned int chunk_sectors = mddev->chunk_sectors; - unsigned int bio_sectors = bio->bi_size >> 9; + unsigned int bio_sectors = bio_sectors(bio); if (mddev->new_chunk_sectors < mddev->chunk_sectors) chunk_sectors = mddev->new_chunk_sectors; @@ -3894,7 +3894,7 @@ static int bio_fits_rdev(struct bio *bi) { struct request_queue *q = bdev_get_queue(bi->bi_bdev); - if ((bi->bi_size>>9) > queue_max_sectors(q)) + if (bio_sectors(bi) > queue_max_sectors(q)) return 0; blk_recount_segments(q, bi); if (bi->bi_phys_segments > queue_max_segments(q)) @@ -3964,7 +3964,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); if (!bio_fits_rdev(align_bi) || - is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9, + is_badblock(rdev, align_bi->bi_sector, bio_sectors(align_bi), &first_bad, &bad_sectors)) { /* too big in some way, or has a known bad block */ bio_put(align_bi); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5989a92236f7..d90e0485e01b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5166,7 +5166,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio, } prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; - if ((bio->bi_size >> 9) > max_sectors) + if (bio_sectors(bio) > max_sectors) return 0; if (!q->merge_bvec_fn) diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 9961726523d0..5a28843725df 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -244,7 +244,7 @@ TRACE_EVENT(block_bio_bounce, __entry->dev = bio->bi_bdev ? bio->bi_bdev->bd_dev : 0; __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; + __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -281,7 +281,7 @@ TRACE_EVENT(block_bio_complete, __entry->dev = bio->bi_bdev ? bio->bi_bdev->bd_dev : 0; __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; + __entry->nr_sector = bio_sectors(bio); __entry->error = error; blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); ), @@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(block_bio_merge, TP_fast_assign( __entry->dev = bio->bi_bdev->bd_dev; __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; + __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -376,7 +376,7 @@ TRACE_EVENT(block_bio_queue, TP_fast_assign( __entry->dev = bio->bi_bdev->bd_dev; __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; + __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -404,7 +404,7 @@ DECLARE_EVENT_CLASS(block_get_rq, TP_fast_assign( __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; __entry->sector = bio ? bio->bi_sector : 0; - __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; + __entry->nr_sector = bio ? bio_sectors(bio) : 0; blk_fill_rwbs(__entry->rwbs, bio ? bio->bi_rw : 0, __entry->nr_sector); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); @@ -580,7 +580,7 @@ TRACE_EVENT(block_bio_remap, TP_fast_assign( __entry->dev = bio->bi_bdev->bd_dev; __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; + __entry->nr_sector = bio_sectors(bio); __entry->old_dev = dev; __entry->old_sector = from; blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); -- GitLab From 5b83636ae3c3b4f87d02a5929ad4dee831534db0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 4 Sep 2012 15:20:38 -0700 Subject: [PATCH 0163/3163] block: Change bio_split() to respect the current value of bi_idx In the current code bio_split() won't be seeing partially completed bios so this doesn't change any behaviour, but this makes the code a bit clearer as to what bio_split() actually requires. The immediate purpose of the patch is removing unnecessary bi_idx references, but the end goal is to allow partial completed bios to be submitted, which along with immutable biovecs enables effecient bio splitting. Some of the callers were (double) checking that bios could be split, so update their checks too. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Lars Ellenberg CC: Neil Brown CC: Martin K. Petersen --- drivers/md/raid0.c | 3 +-- drivers/md/raid10.c | 3 +-- fs/bio-integrity.c | 4 ++-- fs/bio.c | 7 +++---- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 23a38afec351..fcf65e512cf5 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -527,8 +527,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) sector_t sector = bio->bi_sector; struct bio_pair *bp; /* Sanity check -- queue functions should prevent this happening */ - if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) || - bio->bi_idx != 0) + if (bio_segments(bio) > 1) goto bad_map; /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5ee14ab16a05..2e29df960bf5 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1175,8 +1175,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) || conf->prev.near_copies < conf->prev.raid_disks))) { struct bio_pair *bp; /* Sanity check -- queue functions should prevent this happening */ - if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) || - bio->bi_idx != 0) + if (bio_segments(bio) > 1) goto bad_map; /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 8c4c604c840d..ca7b02dbf09d 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -661,8 +661,8 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors) bp->bio1.bi_integrity = &bp->bip1; bp->bio2.bi_integrity = &bp->bip2; - bp->iv1 = bip->bip_vec[0]; - bp->iv2 = bip->bip_vec[0]; + bp->iv1 = bip->bip_vec[bip->bip_idx]; + bp->iv2 = bip->bip_vec[bip->bip_idx]; bp->bip1.bip_vec = &bp->iv1; bp->bip2.bip_vec = &bp->iv2; diff --git a/fs/bio.c b/fs/bio.c index 7edc08d2246c..f1b4c1651089 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1620,8 +1620,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) trace_block_split(bdev_get_queue(bi->bi_bdev), bi, bi->bi_sector + first_sectors); - BUG_ON(bi->bi_vcnt != 1 && bi->bi_vcnt != 0); - BUG_ON(bi->bi_idx != 0); + BUG_ON(bio_segments(bi) > 1); atomic_set(&bp->cnt, 3); bp->error = 0; bp->bio1 = *bi; @@ -1631,8 +1630,8 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) bp->bio1.bi_size = first_sectors << 9; if (bi->bi_vcnt != 0) { - bp->bv1 = bi->bi_io_vec[0]; - bp->bv2 = bi->bi_io_vec[0]; + bp->bv1 = *bio_iovec(bi); + bp->bv2 = *bio_iovec(bi); if (bio_is_rw(bi)) { bp->bv2.bv_offset += first_sectors << 9; -- GitLab From 4f2ac93c175c4922bdddbfec6cad94b32cea0070 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 4 Sep 2012 15:23:14 -0700 Subject: [PATCH 0164/3163] block: Remove bi_idx references For immutable bvecs, all bi_idx usage needs to be audited - so here we're removing all the unnecessary uses. Most of these are places where it was being initialized on a bio that was just allocated, a few others are conversions to standard macros. Signed-off-by: Kent Overstreet CC: Jens Axboe --- drivers/block/aoe/aoecmd.c | 2 +- drivers/block/floppy.c | 1 - drivers/md/dm-verity.c | 2 +- drivers/md/raid10.c | 1 - fs/buffer.c | 1 - fs/jfs/jfs_logmgr.c | 2 -- fs/logfs/dev_bdev.c | 5 ----- mm/page_io.c | 1 - 8 files changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 25ef5c014fca..8188ad131b97 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -927,7 +927,7 @@ bufinit(struct buf *buf, struct request *rq, struct bio *bio) buf->resid = bio->bi_size; buf->sector = bio->bi_sector; bio_pageinc(bio); - buf->bv = bv = &bio->bi_io_vec[bio->bi_idx]; + buf->bv = bio_iovec(bio); buf->bv_resid = bv->bv_len; WARN_ON(buf->bv_resid == 0); } diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 2ddd64a9ffde..83232639034e 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3777,7 +3777,6 @@ static int __floppy_read_block_0(struct block_device *bdev) bio_vec.bv_len = size; bio_vec.bv_offset = 0; bio.bi_vcnt = 1; - bio.bi_idx = 0; bio.bi_size = size; bio.bi_bdev = bdev; bio.bi_sector = 0; diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 923115d08baa..4f06d9adf1ed 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -490,7 +490,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) bio->bi_end_io = verity_end_io; bio->bi_private = io; - io->io_vec_size = bio->bi_vcnt - bio->bi_idx; + io->io_vec_size = bio_segments(bio); if (io->io_vec_size < DM_VERITY_IO_VEC_INLINE) io->io_vec = io->io_vec_inline; else diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 2e29df960bf5..6ffb6c08aec5 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4386,7 +4386,6 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, read_bio->bi_flags &= ~(BIO_POOL_MASK - 1); read_bio->bi_flags |= 1 << BIO_UPTODATE; read_bio->bi_vcnt = 0; - read_bio->bi_idx = 0; read_bio->bi_size = 0; r10_bio->master_bio = read_bio; r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum; diff --git a/fs/buffer.c b/fs/buffer.c index b4dcb34c9635..ecd3792ae0e9 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2979,7 +2979,6 @@ int submit_bh(int rw, struct buffer_head * bh) bio->bi_io_vec[0].bv_offset = bh_offset(bh); bio->bi_vcnt = 1; - bio->bi_idx = 0; bio->bi_size = bh->b_size; bio->bi_end_io = end_bio_bh_io_sync; diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 2eb952c41a69..8ae5e350da43 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -2004,7 +2004,6 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) bio->bi_io_vec[0].bv_offset = bp->l_offset; bio->bi_vcnt = 1; - bio->bi_idx = 0; bio->bi_size = LOGPSIZE; bio->bi_end_io = lbmIODone; @@ -2145,7 +2144,6 @@ static void lbmStartIO(struct lbuf * bp) bio->bi_io_vec[0].bv_offset = bp->l_offset; bio->bi_vcnt = 1; - bio->bi_idx = 0; bio->bi_size = LOGPSIZE; bio->bi_end_io = lbmIODone; diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index e784a217b500..550475ca6a0e 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -32,7 +32,6 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw) bio_vec.bv_len = PAGE_SIZE; bio_vec.bv_offset = 0; bio.bi_vcnt = 1; - bio.bi_idx = 0; bio.bi_size = PAGE_SIZE; bio.bi_bdev = bdev; bio.bi_sector = page->index * (PAGE_SIZE >> 9); @@ -108,7 +107,6 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, if (i >= max_pages) { /* Block layer cannot split bios :( */ bio->bi_vcnt = i; - bio->bi_idx = 0; bio->bi_size = i * PAGE_SIZE; bio->bi_bdev = super->s_bdev; bio->bi_sector = ofs >> 9; @@ -136,7 +134,6 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, unlock_page(page); } bio->bi_vcnt = nr_pages; - bio->bi_idx = 0; bio->bi_size = nr_pages * PAGE_SIZE; bio->bi_bdev = super->s_bdev; bio->bi_sector = ofs >> 9; @@ -202,7 +199,6 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, if (i >= max_pages) { /* Block layer cannot split bios :( */ bio->bi_vcnt = i; - bio->bi_idx = 0; bio->bi_size = i * PAGE_SIZE; bio->bi_bdev = super->s_bdev; bio->bi_sector = ofs >> 9; @@ -224,7 +220,6 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, bio->bi_io_vec[i].bv_offset = 0; } bio->bi_vcnt = nr_pages; - bio->bi_idx = 0; bio->bi_size = nr_pages * PAGE_SIZE; bio->bi_bdev = super->s_bdev; bio->bi_sector = ofs >> 9; diff --git a/mm/page_io.c b/mm/page_io.c index 78eee32ee486..8d3c0c088105 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -35,7 +35,6 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, bio->bi_io_vec[0].bv_len = PAGE_SIZE; bio->bi_io_vec[0].bv_offset = 0; bio->bi_vcnt = 1; - bio->bi_idx = 0; bio->bi_size = PAGE_SIZE; bio->bi_end_io = end_io; } -- GitLab From 2f477877f8c4be18f054aeb7c4be8cc748cfe932 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 5 Sep 2012 17:31:42 -0700 Subject: [PATCH 0165/3163] block: Remove some unnecessary bi_vcnt usage More prep work for immutable bvecs/effecient bio splitting - usage of bi_vcnt has to be auditing, so getting rid of all the unnecessary usage makes that easier. Plus, bio_segments() is really what this code wanted, as it respects the current value of bi_idx. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Eric Moore CC: "James E.J. Bottomley" CC: linux-scsi@vger.kernel.org --- drivers/message/fusion/mptsas.c | 6 +++--- drivers/scsi/libsas/sas_expander.c | 6 +++--- drivers/scsi/mpt2sas/mpt2sas_transport.c | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index fa43c391c8ed..2bb01546df0b 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -2235,10 +2235,10 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, } /* do we need to support multiple segments? */ - if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) { + if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) { printk(MYIOC_s_ERR_FMT "%s: multiple segments req %u %u, rsp %u %u\n", - ioc->name, __func__, req->bio->bi_vcnt, blk_rq_bytes(req), - rsp->bio->bi_vcnt, blk_rq_bytes(rsp)); + ioc->name, __func__, bio_segments(req->bio), blk_rq_bytes(req), + bio_segments(rsp->bio), blk_rq_bytes(rsp)); return -EINVAL; } diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index aec2e0da5016..7af776737b40 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -2151,10 +2151,10 @@ int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, } /* do we need to support multiple segments? */ - if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) { + if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) { printk("%s: multiple segments req %u %u, rsp %u %u\n", - __func__, req->bio->bi_vcnt, blk_rq_bytes(req), - rsp->bio->bi_vcnt, blk_rq_bytes(rsp)); + __func__, bio_segments(req->bio), blk_rq_bytes(req), + bio_segments(rsp->bio), blk_rq_bytes(rsp)); return -EINVAL; } diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c index 8c2ffbe6af0f..193e7ae90c3b 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_transport.c +++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c @@ -1939,7 +1939,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, ioc->transport_cmds.status = MPT2_CMD_PENDING; /* Check if the request is split across multiple segments */ - if (req->bio->bi_vcnt > 1) { + if (bio_segments(req->bio) > 1) { u32 offset = 0; /* Allocate memory and copy the request */ @@ -1971,7 +1971,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, /* Check if the response needs to be populated across * multiple segments */ - if (rsp->bio->bi_vcnt > 1) { + if (bio_segments(rsp->bio) > 1) { pci_addr_in = pci_alloc_consistent(ioc->pdev, blk_rq_bytes(rsp), &pci_dma_in); if (!pci_addr_in) { @@ -2038,7 +2038,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, sgl_flags = (MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC); sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; - if (req->bio->bi_vcnt > 1) { + if (bio_segments(req->bio) > 1) { ioc->base_add_sg_single(psge, sgl_flags | (blk_rq_bytes(req) - 4), pci_dma_out); } else { @@ -2054,7 +2054,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_END_OF_LIST); sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; - if (rsp->bio->bi_vcnt > 1) { + if (bio_segments(rsp->bio) > 1) { ioc->base_add_sg_single(psge, sgl_flags | (blk_rq_bytes(rsp) + 4), pci_dma_in); } else { @@ -2099,7 +2099,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, le16_to_cpu(mpi_reply->ResponseDataLength); /* check if the resp needs to be copied from the allocated * pci mem */ - if (rsp->bio->bi_vcnt > 1) { + if (bio_segments(rsp->bio) > 1) { u32 offset = 0; u32 bytes_to_copy = le16_to_cpu(mpi_reply->ResponseDataLength); -- GitLab From 9e882242c6193ae6f416f2d8d8db0d9126bd996b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Sep 2012 14:41:12 -0700 Subject: [PATCH 0166/3163] block: Add submit_bio_wait(), remove from md Random cleanup - this code was duplicated and it's not really specific to md. Also added the ability to return the actual error code. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: NeilBrown Acked-by: Tejun Heo --- drivers/md/raid1.c | 19 ------------------- drivers/md/raid10.c | 19 ------------------- fs/bio.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/bio.h | 1 + 4 files changed, 37 insertions(+), 38 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f741c9fe25c8..800748d585ca 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2059,25 +2059,6 @@ static void fix_read_error(struct r1conf *conf, int read_disk, } } -static void bi_complete(struct bio *bio, int error) -{ - complete((struct completion *)bio->bi_private); -} - -static int submit_bio_wait(int rw, struct bio *bio) -{ - struct completion event; - rw |= REQ_SYNC; - - init_completion(&event); - bio->bi_private = &event; - bio->bi_end_io = bi_complete; - submit_bio(rw, bio); - wait_for_completion(&event); - - return test_bit(BIO_UPTODATE, &bio->bi_flags); -} - static int narrow_write_error(struct r1bio *r1_bio, int i) { struct mddev *mddev = r1_bio->mddev; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 6ffb6c08aec5..434586d43115 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2529,25 +2529,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 } } -static void bi_complete(struct bio *bio, int error) -{ - complete((struct completion *)bio->bi_private); -} - -static int submit_bio_wait(int rw, struct bio *bio) -{ - struct completion event; - rw |= REQ_SYNC; - - init_completion(&event); - bio->bi_private = &event; - bio->bi_end_io = bi_complete; - submit_bio(rw, bio); - wait_for_completion(&event); - - return test_bit(BIO_UPTODATE, &bio->bi_flags); -} - static int narrow_write_error(struct r10bio *r10_bio, int i) { struct bio *bio = r10_bio->master_bio; diff --git a/fs/bio.c b/fs/bio.c index f1b4c1651089..4ce24ee5dcd0 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -752,6 +752,42 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, } EXPORT_SYMBOL(bio_add_page); +struct submit_bio_ret { + struct completion event; + int error; +}; + +static void submit_bio_wait_endio(struct bio *bio, int error) +{ + struct submit_bio_ret *ret = bio->bi_private; + + ret->error = error; + complete(&ret->event); +} + +/** + * submit_bio_wait - submit a bio, and wait until it completes + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) + * @bio: The &struct bio which describes the I/O + * + * Simple wrapper around submit_bio(). Returns 0 on success, or the error from + * bio_endio() on failure. + */ +int submit_bio_wait(int rw, struct bio *bio) +{ + struct submit_bio_ret ret; + + rw |= REQ_SYNC; + init_completion(&ret.event); + bio->bi_private = &ret; + bio->bi_end_io = submit_bio_wait_endio; + submit_bio(rw, bio); + wait_for_completion(&ret.event); + + return ret.error; +} +EXPORT_SYMBOL(submit_bio_wait); + /** * bio_advance - increment/complete a bio by some number of bytes * @bio: bio to advance diff --git a/include/linux/bio.h b/include/linux/bio.h index 20507eb7c979..b20a9cd776dd 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -249,6 +249,7 @@ extern void bio_endio(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); +extern int submit_bio_wait(int rw, struct bio *bio); extern void bio_advance(struct bio *, unsigned); extern void bio_init(struct bio *); -- GitLab From 8be185f2c9d54d6bc0bac1445227b67cb14c0b13 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Sep 2012 14:14:43 -0700 Subject: [PATCH 0167/3163] raid10: Use bio_reset() More prep work for immutable bio vecs, mainly getting rid of references to bi_idx. bio_reset was being open coded in a few places. The one in sync_request was a bit nontrivial to convert, so could use some extra eyeballs. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: NeilBrown Acked-by: NeilBrown --- drivers/md/raid10.c | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 434586d43115..e32e8b1042f8 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2077,13 +2077,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) * First we need to fixup bv_offset, bv_len and * bi_vecs, as the read request might have corrupted these */ + bio_reset(tbio); + tbio->bi_vcnt = vcnt; tbio->bi_size = r10_bio->sectors << 9; - tbio->bi_idx = 0; - tbio->bi_phys_segments = 0; - tbio->bi_flags &= ~(BIO_POOL_MASK - 1); - tbio->bi_flags |= 1 << BIO_UPTODATE; - tbio->bi_next = NULL; tbio->bi_rw = WRITE; tbio->bi_private = r10_bio; tbio->bi_sector = r10_bio->devs[i].addr; @@ -3090,6 +3087,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, } } bio = r10_bio->devs[0].bio; + bio_reset(bio); bio->bi_next = biolist; biolist = bio; bio->bi_private = r10_bio; @@ -3114,6 +3112,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, rdev = mirror->rdev; if (!test_bit(In_sync, &rdev->flags)) { bio = r10_bio->devs[1].bio; + bio_reset(bio); bio->bi_next = biolist; biolist = bio; bio->bi_private = r10_bio; @@ -3142,6 +3141,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, if (rdev == NULL || bio == NULL || test_bit(Faulty, &rdev->flags)) break; + bio_reset(bio); bio->bi_next = biolist; biolist = bio; bio->bi_private = r10_bio; @@ -3240,7 +3240,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, r10_bio->devs[i].repl_bio->bi_end_io = NULL; bio = r10_bio->devs[i].bio; - bio->bi_end_io = NULL; + bio_reset(bio); clear_bit(BIO_UPTODATE, &bio->bi_flags); if (conf->mirrors[d].rdev == NULL || test_bit(Faulty, &conf->mirrors[d].rdev->flags)) @@ -3277,6 +3277,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, /* Need to set up for writing to the replacement */ bio = r10_bio->devs[i].repl_bio; + bio_reset(bio); clear_bit(BIO_UPTODATE, &bio->bi_flags); sector = r10_bio->devs[i].addr; @@ -3310,17 +3311,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, } } - for (bio = biolist; bio ; bio=bio->bi_next) { - - bio->bi_flags &= ~(BIO_POOL_MASK - 1); - if (bio->bi_end_io) - bio->bi_flags |= 1 << BIO_UPTODATE; - bio->bi_vcnt = 0; - bio->bi_idx = 0; - bio->bi_phys_segments = 0; - bio->bi_size = 0; - } - nr_sectors = 0; if (sector_nr + max_sync < max_sector) max_sector = sector_nr + max_sync; @@ -4390,17 +4380,14 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, } if (!rdev2 || test_bit(Faulty, &rdev2->flags)) continue; + + bio_reset(b); b->bi_bdev = rdev2->bdev; b->bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset; b->bi_private = r10_bio; b->bi_end_io = end_reshape_write; b->bi_rw = WRITE; - b->bi_flags &= ~(BIO_POOL_MASK - 1); - b->bi_flags |= 1 << BIO_UPTODATE; b->bi_next = blist; - b->bi_vcnt = 0; - b->bi_idx = 0; - b->bi_size = 0; blist = b; } -- GitLab From 2aabaa65ad147bf8238a8a261b15a0c95e9e2afa Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 11 Sep 2012 11:26:12 -0700 Subject: [PATCH 0168/3163] raid1: use bio_reset() Signed-off-by: Kent Overstreet CC: Jens Axboe CC: NeilBrown --- drivers/md/raid1.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 800748d585ca..bb5ec7710c00 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1862,7 +1862,7 @@ static int process_checks(struct r1bio *r1_bio) struct bio *sbio = r1_bio->bios[i]; int size; - if (r1_bio->bios[i]->bi_end_io != end_sync_read) + if (sbio->bi_end_io != end_sync_read) continue; if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) { @@ -1887,16 +1887,15 @@ static int process_checks(struct r1bio *r1_bio) continue; } /* fixup the bio for reuse */ + bio_reset(sbio); sbio->bi_vcnt = vcnt; sbio->bi_size = r1_bio->sectors << 9; - sbio->bi_idx = 0; - sbio->bi_phys_segments = 0; - sbio->bi_flags &= ~(BIO_POOL_MASK - 1); - sbio->bi_flags |= 1 << BIO_UPTODATE; - sbio->bi_next = NULL; sbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; sbio->bi_bdev = conf->mirrors[i].rdev->bdev; + sbio->bi_end_io = end_sync_read; + sbio->bi_private = r1_bio; + size = sbio->bi_size; for (j = 0; j < vcnt ; j++) { struct bio_vec *bi; @@ -2439,18 +2438,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp for (i = 0; i < conf->raid_disks * 2; i++) { struct md_rdev *rdev; bio = r1_bio->bios[i]; - - /* take from bio_init */ - bio->bi_next = NULL; - bio->bi_flags &= ~(BIO_POOL_MASK-1); - bio->bi_flags |= 1 << BIO_UPTODATE; - bio->bi_rw = READ; - bio->bi_vcnt = 0; - bio->bi_idx = 0; - bio->bi_phys_segments = 0; - bio->bi_size = 0; - bio->bi_end_io = NULL; - bio->bi_private = NULL; + bio_reset(bio); rdev = rcu_dereference(conf->mirrors[i].rdev); if (rdev == NULL || -- GitLab From 2f6db2a7073452b123726e2baab1f37d511bf86d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 11 Sep 2012 12:26:38 -0700 Subject: [PATCH 0169/3163] raid5: use bio_reset() Had to shuffle the code around a bit (where bi_rw and bi_end_io were set), but shouldn't really be anything tricky here Signed-off-by: Kent Overstreet CC: Jens Axboe CC: NeilBrown --- drivers/md/raid5.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4e0f87e462ce..7bbd28546214 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -567,14 +567,6 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) bi = &sh->dev[i].req; rbi = &sh->dev[i].rreq; /* For writing to replacement */ - bi->bi_rw = rw; - rbi->bi_rw = rw; - if (rw & WRITE) { - bi->bi_end_io = raid5_end_write_request; - rbi->bi_end_io = raid5_end_write_request; - } else - bi->bi_end_io = raid5_end_read_request; - rcu_read_lock(); rrdev = rcu_dereference(conf->disks[i].replacement); smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */ @@ -649,7 +641,14 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) set_bit(STRIPE_IO_STARTED, &sh->state); + bio_reset(bi); bi->bi_bdev = rdev->bdev; + bi->bi_rw = rw; + bi->bi_end_io = (rw & WRITE) + ? raid5_end_write_request + : raid5_end_read_request; + bi->bi_private = sh; + pr_debug("%s: for %llu schedule op %ld on disc %d\n", __func__, (unsigned long long)sh->sector, bi->bi_rw, i); @@ -663,12 +662,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) bi->bi_rw |= REQ_FLUSH; - bi->bi_flags = 1 << BIO_UPTODATE; - bi->bi_idx = 0; bi->bi_io_vec[0].bv_len = STRIPE_SIZE; bi->bi_io_vec[0].bv_offset = 0; bi->bi_size = STRIPE_SIZE; - bi->bi_next = NULL; if (rrdev) set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), @@ -683,7 +679,13 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) set_bit(STRIPE_IO_STARTED, &sh->state); + bio_reset(rbi); rbi->bi_bdev = rrdev->bdev; + rbi->bi_rw = rw; + BUG_ON(!(rw & WRITE)); + rbi->bi_end_io = raid5_end_write_request; + rbi->bi_private = sh; + pr_debug("%s: for %llu schedule op %ld on " "replacement disc %d\n", __func__, (unsigned long long)sh->sector, @@ -695,12 +697,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) else rbi->bi_sector = (sh->sector + rrdev->data_offset); - rbi->bi_flags = 1 << BIO_UPTODATE; - rbi->bi_idx = 0; rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; rbi->bi_io_vec[0].bv_offset = 0; rbi->bi_size = STRIPE_SIZE; - rbi->bi_next = NULL; trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), rbi, disk_devt(conf->mddev->gendisk), sh->dev[i].sector); -- GitLab From b783863f68c26c5411c50002f98a047a40b94e8e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Sep 2012 15:17:11 -0700 Subject: [PATCH 0170/3163] raid1: Refactor narrow_write_error() to not use bi_idx More bi_idx removal. This code was just open coding bio_clone(). This could probably be further improved by using bio_advance() instead of skipping over null pages, but that'd be a larger rework. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: NeilBrown --- drivers/md/raid1.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index bb5ec7710c00..b36231866336 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2063,8 +2063,6 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) struct mddev *mddev = r1_bio->mddev; struct r1conf *conf = mddev->private; struct md_rdev *rdev = conf->mirrors[i].rdev; - int vcnt, idx; - struct bio_vec *vec; /* bio has the data to be written to device 'i' where * we just recently had a write error. @@ -2092,30 +2090,32 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) & ~(sector_t)(block_sectors - 1)) - sector; - if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { - vcnt = r1_bio->behind_page_count; - vec = r1_bio->behind_bvecs; - idx = 0; - while (vec[idx].bv_page == NULL) - idx++; - } else { - vcnt = r1_bio->master_bio->bi_vcnt; - vec = r1_bio->master_bio->bi_io_vec; - idx = r1_bio->master_bio->bi_idx; - } while (sect_to_write) { struct bio *wbio; if (sectors > sect_to_write) sectors = sect_to_write; /* Write at 'sector' for 'sectors'*/ - wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev); - memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec)); - wbio->bi_sector = r1_bio->sector; + if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { + unsigned vcnt = r1_bio->behind_page_count; + struct bio_vec *vec = r1_bio->behind_bvecs; + + while (!vec->bv_page) { + vec++; + vcnt--; + } + + wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev); + memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec)); + + wbio->bi_vcnt = vcnt; + } else { + wbio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev); + } + wbio->bi_rw = WRITE; - wbio->bi_vcnt = vcnt; + wbio->bi_sector = r1_bio->sector; wbio->bi_size = r1_bio->sectors << 9; - wbio->bi_idx = idx; md_trim_bio(wbio, sector - r1_bio->sector, sectors); wbio->bi_sector += rdev->data_offset; -- GitLab From 16ac3d63e74f3d6e34e42d6e523b6a61de0020f0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Sep 2012 13:57:51 -0700 Subject: [PATCH 0171/3163] block: Add bio_copy_data() This gets open coded quite a bit and it's tricky to get right, so make a generic version and convert some existing users over to it instead. Signed-off-by: Kent Overstreet CC: Jens Axboe --- fs/bio.c | 70 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/bio.h | 2 ++ 2 files changed, 72 insertions(+) diff --git a/fs/bio.c b/fs/bio.c index 4ce24ee5dcd0..e437f9aae67d 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -829,6 +829,76 @@ void bio_advance(struct bio *bio, unsigned bytes) } EXPORT_SYMBOL(bio_advance); +/** + * bio_copy_data - copy contents of data buffers from one chain of bios to + * another + * @src: source bio list + * @dst: destination bio list + * + * If @src and @dst are single bios, bi_next must be NULL - otherwise, treats + * @src and @dst as linked lists of bios. + * + * Stops when it reaches the end of either @src or @dst - that is, copies + * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios). + */ +void bio_copy_data(struct bio *dst, struct bio *src) +{ + struct bio_vec *src_bv, *dst_bv; + unsigned src_offset, dst_offset, bytes; + void *src_p, *dst_p; + + src_bv = bio_iovec(src); + dst_bv = bio_iovec(dst); + + src_offset = src_bv->bv_offset; + dst_offset = dst_bv->bv_offset; + + while (1) { + if (src_offset == src_bv->bv_offset + src_bv->bv_len) { + src_bv++; + if (src_bv == bio_iovec_idx(src, src->bi_vcnt)) { + src = src->bi_next; + if (!src) + break; + + src_bv = bio_iovec(src); + } + + src_offset = src_bv->bv_offset; + } + + if (dst_offset == dst_bv->bv_offset + dst_bv->bv_len) { + dst_bv++; + if (dst_bv == bio_iovec_idx(dst, dst->bi_vcnt)) { + dst = dst->bi_next; + if (!dst) + break; + + dst_bv = bio_iovec(dst); + } + + dst_offset = dst_bv->bv_offset; + } + + bytes = min(dst_bv->bv_offset + dst_bv->bv_len - dst_offset, + src_bv->bv_offset + src_bv->bv_len - src_offset); + + src_p = kmap_atomic(src_bv->bv_page); + dst_p = kmap_atomic(dst_bv->bv_page); + + memcpy(dst_p + dst_bv->bv_offset, + src_p + src_bv->bv_offset, + bytes); + + kunmap_atomic(dst_p); + kunmap_atomic(src_p); + + src_offset += bytes; + dst_offset += bytes; + } +} +EXPORT_SYMBOL(bio_copy_data); + struct bio_map_data { struct bio_vec *iovecs; struct sg_iovec *sgvecs; diff --git a/include/linux/bio.h b/include/linux/bio.h index b20a9cd776dd..90d36c65cb70 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -286,6 +286,8 @@ static inline void bio_flush_dcache_pages(struct bio *bi) } #endif +extern void bio_copy_data(struct bio *dst, struct bio *src); + extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, unsigned long, unsigned int, int, gfp_t); extern struct bio *bio_copy_user_iov(struct request_queue *, -- GitLab From ffb25dc60ff14f90a581975307b0c1d07e1f362a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 4 Sep 2012 06:44:57 -0700 Subject: [PATCH 0172/3163] pktcdvd: use bio_copy_data() Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Jiri Kosina --- drivers/block/pktcdvd.c | 79 +++++++---------------------------------- 1 file changed, 12 insertions(+), 67 deletions(-) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 2c27744b9ca6..783c96c89b75 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -947,31 +947,6 @@ static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_que } } -/* - * Copy CD_FRAMESIZE bytes from src_bio into a destination page - */ -static void pkt_copy_bio_data(struct bio *src_bio, int seg, int offs, struct page *dst_page, int dst_offs) -{ - unsigned int copy_size = CD_FRAMESIZE; - - while (copy_size > 0) { - struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg); - void *vfrom = kmap_atomic(src_bvl->bv_page) + - src_bvl->bv_offset + offs; - void *vto = page_address(dst_page) + dst_offs; - int len = min_t(int, copy_size, src_bvl->bv_len - offs); - - BUG_ON(len < 0); - memcpy(vto, vfrom, len); - kunmap_atomic(vfrom); - - seg++; - offs = 0; - dst_offs += len; - copy_size -= len; - } -} - /* * Copy all data for this packet to pkt->pages[], so that * a) The number of required segments for the write bio is minimized, which @@ -1325,55 +1300,35 @@ static int pkt_handle_queue(struct pktcdvd_device *pd) */ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) { - struct bio *bio; int f; - int frames_write; struct bio_vec *bvec = pkt->w_bio->bi_io_vec; + bio_reset(pkt->w_bio); + pkt->w_bio->bi_sector = pkt->sector; + pkt->w_bio->bi_bdev = pd->bdev; + pkt->w_bio->bi_end_io = pkt_end_io_packet_write; + pkt->w_bio->bi_private = pkt; + + /* XXX: locking? */ for (f = 0; f < pkt->frames; f++) { bvec[f].bv_page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE]; bvec[f].bv_offset = (f * CD_FRAMESIZE) % PAGE_SIZE; + if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset)) + BUG(); } + VPRINTK(DRIVER_NAME": vcnt=%d\n", pkt->w_bio->bi_vcnt); /* * Fill-in bvec with data from orig_bios. */ - frames_write = 0; spin_lock(&pkt->lock); - bio_list_for_each(bio, &pkt->orig_bios) { - int segment = bio->bi_idx; - int src_offs = 0; - int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9); - int num_frames = bio->bi_size / CD_FRAMESIZE; - BUG_ON(first_frame < 0); - BUG_ON(first_frame + num_frames > pkt->frames); - for (f = first_frame; f < first_frame + num_frames; f++) { - struct bio_vec *src_bvl = bio_iovec_idx(bio, segment); - - while (src_offs >= src_bvl->bv_len) { - src_offs -= src_bvl->bv_len; - segment++; - BUG_ON(segment >= bio->bi_vcnt); - src_bvl = bio_iovec_idx(bio, segment); - } + bio_copy_data(pkt->w_bio, pkt->orig_bios.head); - if (src_bvl->bv_len - src_offs >= CD_FRAMESIZE) { - bvec[f].bv_page = src_bvl->bv_page; - bvec[f].bv_offset = src_bvl->bv_offset + src_offs; - } else { - pkt_copy_bio_data(bio, segment, src_offs, - bvec[f].bv_page, bvec[f].bv_offset); - } - src_offs += CD_FRAMESIZE; - frames_write++; - } - } pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE); spin_unlock(&pkt->lock); VPRINTK("pkt_start_write: Writing %d frames for zone %llx\n", - frames_write, (unsigned long long)pkt->sector); - BUG_ON(frames_write != pkt->write_size); + pkt->write_size, (unsigned long long)pkt->sector); if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) { pkt_make_local_copy(pkt, bvec); @@ -1383,16 +1338,6 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) } /* Start the write request */ - bio_reset(pkt->w_bio); - pkt->w_bio->bi_sector = pkt->sector; - pkt->w_bio->bi_bdev = pd->bdev; - pkt->w_bio->bi_end_io = pkt_end_io_packet_write; - pkt->w_bio->bi_private = pkt; - for (f = 0; f < pkt->frames; f++) - if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset)) - BUG(); - VPRINTK(DRIVER_NAME": vcnt=%d\n", pkt->w_bio->bi_vcnt); - atomic_set(&pkt->io_wait, 1); pkt->w_bio->bi_rw = WRITE; pkt_queue_bio(pd, pkt->w_bio); -- GitLab From ff8e0070d1a4d09cb462d3059956530fa67fd91d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 5 Sep 2012 14:11:38 -0700 Subject: [PATCH 0173/3163] pktcdvd: Use bio_reset() in disabled code to kill bi_idx usage In the short term this'll help with code auditing, and if this code ever gets used now it's converted :) Signed-off-by: Kent Overstreet CC: Jiri Kosina --- drivers/block/pktcdvd.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 783c96c89b75..11190424536a 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -1156,16 +1156,15 @@ static int pkt_start_recovery(struct packet_data *pkt) new_sector = new_block * (CD_FRAMESIZE >> 9); pkt->sector = new_sector; + bio_reset(pkt->bio); + pkt->bio->bi_bdev = pd->bdev; + pkt->bio->bi_rw = REQ_WRITE; pkt->bio->bi_sector = new_sector; - pkt->bio->bi_next = NULL; - pkt->bio->bi_flags = 1 << BIO_UPTODATE; - pkt->bio->bi_idx = 0; - - BUG_ON(pkt->bio->bi_rw != REQ_WRITE); - BUG_ON(pkt->bio->bi_vcnt != pkt->frames); - BUG_ON(pkt->bio->bi_size != pkt->frames * CD_FRAMESIZE); - BUG_ON(pkt->bio->bi_end_io != pkt_end_io_packet_write); - BUG_ON(pkt->bio->bi_private != pkt); + pkt->bio->bi_size = pkt->frames * CD_FRAMESIZE; + pkt->bio->bi_vcnt = pkt->frames; + + pkt->bio->bi_end_io = pkt_end_io_packet_write; + pkt->bio->bi_private = pkt; drop_super(sb); return 1; -- GitLab From d3b45c2a056c5df443d1ddf27fbfada65f234af1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Sep 2012 13:49:33 -0700 Subject: [PATCH 0174/3163] raid1: use bio_copy_data() This doesn't really delete any code _yet_, but once immutable bvecs are done we can just delete the rest of the code in that loop. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: NeilBrown --- drivers/md/raid1.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index b36231866336..0a3988a25aab 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1906,10 +1906,9 @@ static int process_checks(struct r1bio *r1_bio) else bi->bv_len = size; size -= PAGE_SIZE; - memcpy(page_address(bi->bv_page), - page_address(pbio->bi_io_vec[j].bv_page), - PAGE_SIZE); } + + bio_copy_data(sbio, pbio); } return 0; } -- GitLab From 6bc454d150047fcfbf53346412e64cdf3bf61a79 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Sep 2012 14:30:37 -0700 Subject: [PATCH 0175/3163] bounce: Refactor __blk_queue_bounce to not use bi_io_vec A bunch of what __blk_queue_bounce() was doing was problematic for the immutable bvec work; this cleans that up and the code is quite a bit smaller, too. The __bio_for_each_segment() in copy_to_high_bio_irq() was changed because that one's looping over the original bio, not the bounce bio - a later patch renames __bio_for_each_segment() -> bio_for_each_segment_all(), and documents that bio_for_each_segment_all() is only for code that owns the bio. Signed-off-by: Kent Overstreet CC: Jens Axboe --- mm/bounce.c | 73 ++++++++++++++--------------------------------------- 1 file changed, 19 insertions(+), 54 deletions(-) diff --git a/mm/bounce.c b/mm/bounce.c index 5f8901768602..55f512af50c7 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -101,7 +101,7 @@ static void copy_to_high_bio_irq(struct bio *to, struct bio *from) struct bio_vec *tovec, *fromvec; int i; - __bio_for_each_segment(tovec, to, i, 0) { + bio_for_each_segment(tovec, to, i) { fromvec = from->bi_io_vec + i; /* @@ -218,78 +218,43 @@ static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio) static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, mempool_t *pool, int force) { - struct page *page; - struct bio *bio = NULL; - int i, rw = bio_data_dir(*bio_orig); + struct bio *bio; + int rw = bio_data_dir(*bio_orig); struct bio_vec *to, *from; + unsigned i; - bio_for_each_segment(from, *bio_orig, i) { - page = from->bv_page; + bio_for_each_segment(from, *bio_orig, i) + if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q)) + goto bounce; - /* - * is destination page below bounce pfn? - */ - if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force) - continue; + return; +bounce: + bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set); - /* - * irk, bounce it - */ - if (!bio) { - unsigned int cnt = (*bio_orig)->bi_vcnt; - - bio = bio_alloc(GFP_NOIO, cnt); - memset(bio->bi_io_vec, 0, cnt * sizeof(struct bio_vec)); - } - + bio_for_each_segment(to, bio, i) { + struct page *page = to->bv_page; - to = bio->bi_io_vec + i; + if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force) + continue; - to->bv_page = mempool_alloc(pool, q->bounce_gfp); - to->bv_len = from->bv_len; - to->bv_offset = from->bv_offset; inc_zone_page_state(to->bv_page, NR_BOUNCE); + to->bv_page = mempool_alloc(pool, q->bounce_gfp); if (rw == WRITE) { char *vto, *vfrom; - flush_dcache_page(from->bv_page); + flush_dcache_page(page); + vto = page_address(to->bv_page) + to->bv_offset; - vfrom = kmap(from->bv_page) + from->bv_offset; + vfrom = kmap_atomic(page) + to->bv_offset; memcpy(vto, vfrom, to->bv_len); - kunmap(from->bv_page); + kunmap_atomic(vfrom); } } - /* - * no pages bounced - */ - if (!bio) - return; - trace_block_bio_bounce(q, *bio_orig); - /* - * at least one page was bounced, fill in possible non-highmem - * pages - */ - __bio_for_each_segment(from, *bio_orig, i, 0) { - to = bio_iovec_idx(bio, i); - if (!to->bv_page) { - to->bv_page = from->bv_page; - to->bv_len = from->bv_len; - to->bv_offset = from->bv_offset; - } - } - - bio->bi_bdev = (*bio_orig)->bi_bdev; bio->bi_flags |= (1 << BIO_BOUNCED); - bio->bi_sector = (*bio_orig)->bi_sector; - bio->bi_rw = (*bio_orig)->bi_rw; - - bio->bi_vcnt = (*bio_orig)->bi_vcnt; - bio->bi_idx = (*bio_orig)->bi_idx; - bio->bi_size = (*bio_orig)->bi_size; if (pool == page_pool) { bio->bi_end_io = bounce_end_io_write; -- GitLab From d74c6d514fe314b8bdab58b487b25992291577ec Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 6 Feb 2013 12:23:11 -0800 Subject: [PATCH 0176/3163] block: Add bio_for_each_segment_all() __bio_for_each_segment() iterates bvecs from the specified index instead of bio->bv_idx. Currently, the only usage is to walk all the bvecs after the bio has been advanced by specifying 0 index. For immutable bvecs, we need to split these apart; bio_for_each_segment() is going to have a different implementation. This will also help document the intent of code that's using it - bio_for_each_segment_all() is only legal to use for code that owns the bio. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Neil Brown CC: Boaz Harrosh --- drivers/block/rbd.c | 2 +- drivers/md/raid1.c | 2 +- fs/bio.c | 12 ++++++------ fs/exofs/ore.c | 2 +- fs/exofs/ore_raid.c | 2 +- include/linux/bio.h | 17 ++++++++++++++--- mm/bounce.c | 2 +- 7 files changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 6c81a4c040b9..11e179826b60 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -952,7 +952,7 @@ static struct bio *bio_clone_range(struct bio *bio_src, /* Find first affected segment... */ resid = offset; - __bio_for_each_segment(bv, bio_src, idx, 0) { + bio_for_each_segment(bv, bio_src, idx) { if (resid < bv->bv_len) break; resid -= bv->bv_len; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0a3988a25aab..853482015d3d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1291,7 +1291,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) * know the original bi_idx, so we just free * them all */ - __bio_for_each_segment(bvec, mbio, j, 0) + bio_for_each_segment_all(bvec, mbio, j) bvec->bv_page = r1_bio->behind_bvecs[j].bv_page; if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) atomic_inc(&r1_bio->behind_remaining); diff --git a/fs/bio.c b/fs/bio.c index e437f9aae67d..618f9044c414 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -961,7 +961,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, int iov_idx = 0; unsigned int iov_off = 0; - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment_all(bvec, bio, i) { char *bv_addr = page_address(bvec->bv_page); unsigned int bv_len = iovecs[i].bv_len; @@ -1143,7 +1143,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, return bio; cleanup: if (!map_data) - bio_for_each_segment(bvec, bio, i) + bio_for_each_segment_all(bvec, bio, i) __free_page(bvec->bv_page); bio_put(bio); @@ -1357,7 +1357,7 @@ static void __bio_unmap_user(struct bio *bio) /* * make sure we dirty pages we wrote to */ - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment_all(bvec, bio, i) { if (bio_data_dir(bio) == READ) set_page_dirty_lock(bvec->bv_page); @@ -1463,7 +1463,7 @@ static void bio_copy_kern_endio(struct bio *bio, int err) int i; char *p = bmd->sgvecs[0].iov_base; - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment_all(bvec, bio, i) { char *addr = page_address(bvec->bv_page); int len = bmd->iovecs[i].bv_len; @@ -1503,7 +1503,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, if (!reading) { void *p = data; - bio_for_each_segment(bvec, bio, i) { + bio_for_each_segment_all(bvec, bio, i) { char *addr = page_address(bvec->bv_page); memcpy(addr, p, bvec->bv_len); @@ -1789,7 +1789,7 @@ sector_t bio_sector_offset(struct bio *bio, unsigned short index, if (index >= bio->bi_idx) index = bio->bi_vcnt - 1; - __bio_for_each_segment(bv, bio, i, 0) { + bio_for_each_segment_all(bv, bio, i) { if (i == index) { if (offset > bv->bv_offset) sectors += (offset - bv->bv_offset) / sector_sz; diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index f936cb50dc0d..b74422888604 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -401,7 +401,7 @@ static void _clear_bio(struct bio *bio) struct bio_vec *bv; unsigned i; - __bio_for_each_segment(bv, bio, i, 0) { + bio_for_each_segment_all(bv, bio, i) { unsigned this_count = bv->bv_len; if (likely(PAGE_SIZE == this_count)) diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index b963f38ac298..7682b970d0f1 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c @@ -432,7 +432,7 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) if (!bio) continue; - __bio_for_each_segment(bv, bio, i, 0) { + bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; SetPageUptodate(page); diff --git a/include/linux/bio.h b/include/linux/bio.h index 90d36c65cb70..be2efa09f9bf 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -137,16 +137,27 @@ static inline int bio_has_allocated_vec(struct bio *bio) #define bio_io_error(bio) bio_endio((bio), -EIO) /* - * drivers should not use the __ version unless they _really_ want to - * run through the entire bio and not just pending pieces + * drivers should not use the __ version unless they _really_ know what + * they're doing */ #define __bio_for_each_segment(bvl, bio, i, start_idx) \ for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \ i < (bio)->bi_vcnt; \ bvl++, i++) +/* + * drivers should _never_ use the all version - the bio may have been split + * before it got to the driver and the driver won't own all of it + */ +#define bio_for_each_segment_all(bvl, bio, i) \ + for (i = 0; \ + bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ + i++) + #define bio_for_each_segment(bvl, bio, i) \ - __bio_for_each_segment(bvl, bio, i, (bio)->bi_idx) + for (i = (bio)->bi_idx; \ + bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ + i++) /* * get a reference to a bio, so it won't disappear. the intended use is diff --git a/mm/bounce.c b/mm/bounce.c index 55f512af50c7..2ee1b6fef44a 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -134,7 +134,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) /* * free up bounce indirect pages used */ - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment_all(bvec, bio, i) { org_vec = bio_orig->bi_io_vec + i; if (bvec->bv_page == org_vec->bv_page) continue; -- GitLab From cb34e057ad22a1c2c6f2cb6cd1cbd05cc2f28f28 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 5 Sep 2012 15:22:02 -0700 Subject: [PATCH 0177/3163] block: Convert some code to bio_for_each_segment_all() More prep work for immutable bvecs: A few places in the code were either open coding or using the wrong version - fix. After we introduce the bvec iter, it'll no longer be possible to modify the biovec through bio_for_each_segment_all() - it doesn't increment a pointer to the current bvec, you pass in a struct bio_vec (not a pointer) which is updated with what the current biovec would be (taking into account bi_bvec_done and bi_size). So because of that it's more worthwhile to be consistent about bio_for_each_segment()/bio_for_each_segment_all() usage. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: NeilBrown CC: Alasdair Kergon CC: dm-devel@redhat.com CC: Alexander Viro --- drivers/md/dm-crypt.c | 3 +-- drivers/md/raid1.c | 10 +++------- fs/bio.c | 20 ++++++++++---------- fs/direct-io.c | 8 ++++---- mm/bounce.c | 2 +- 5 files changed, 19 insertions(+), 24 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 13c15480d940..6d2d41ae9e32 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -858,8 +858,7 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) unsigned int i; struct bio_vec *bv; - for (i = 0; i < clone->bi_vcnt; i++) { - bv = bio_iovec_idx(clone, i); + bio_for_each_segment_all(bv, clone, i) { BUG_ON(!bv->bv_page); mempool_free(bv->bv_page, cc->page_pool); bv->bv_page = NULL; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 853482015d3d..a7ea954abe1d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -925,7 +925,7 @@ static void alloc_behind_pages(struct bio *bio, struct r1bio *r1_bio) if (unlikely(!bvecs)) return; - bio_for_each_segment(bvec, bio, i) { + bio_for_each_segment_all(bvec, bio, i) { bvecs[i] = *bvec; bvecs[i].bv_page = alloc_page(GFP_NOIO); if (unlikely(!bvecs[i].bv_page)) @@ -1284,12 +1284,8 @@ static void make_request(struct mddev *mddev, struct bio * bio) struct bio_vec *bvec; int j; - /* Yes, I really want the '__' version so that - * we clear any unused pointer in the io_vec, rather - * than leave them unchanged. This is important - * because when we come to free the pages, we won't - * know the original bi_idx, so we just free - * them all + /* + * We trimmed the bio, so _all is legit */ bio_for_each_segment_all(bvec, mbio, j) bvec->bv_page = r1_bio->behind_bvecs[j].bv_page; diff --git a/fs/bio.c b/fs/bio.c index 618f9044c414..fe3aee90c988 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1548,11 +1548,11 @@ EXPORT_SYMBOL(bio_copy_kern); */ void bio_set_pages_dirty(struct bio *bio) { - struct bio_vec *bvec = bio->bi_io_vec; + struct bio_vec *bvec; int i; - for (i = 0; i < bio->bi_vcnt; i++) { - struct page *page = bvec[i].bv_page; + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; if (page && !PageCompound(page)) set_page_dirty_lock(page); @@ -1561,11 +1561,11 @@ void bio_set_pages_dirty(struct bio *bio) static void bio_release_pages(struct bio *bio) { - struct bio_vec *bvec = bio->bi_io_vec; + struct bio_vec *bvec; int i; - for (i = 0; i < bio->bi_vcnt; i++) { - struct page *page = bvec[i].bv_page; + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; if (page) put_page(page); @@ -1614,16 +1614,16 @@ static void bio_dirty_fn(struct work_struct *work) void bio_check_pages_dirty(struct bio *bio) { - struct bio_vec *bvec = bio->bi_io_vec; + struct bio_vec *bvec; int nr_clean_pages = 0; int i; - for (i = 0; i < bio->bi_vcnt; i++) { - struct page *page = bvec[i].bv_page; + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; if (PageDirty(page) || PageCompound(page)) { page_cache_release(page); - bvec[i].bv_page = NULL; + bvec->bv_page = NULL; } else { nr_clean_pages++; } diff --git a/fs/direct-io.c b/fs/direct-io.c index f853263cf74f..38484b08a39a 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -441,8 +441,8 @@ static struct bio *dio_await_one(struct dio *dio) static int dio_bio_complete(struct dio *dio, struct bio *bio) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec; - int page_no; + struct bio_vec *bvec; + unsigned i; if (!uptodate) dio->io_error = -EIO; @@ -450,8 +450,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) if (dio->is_async && dio->rw == READ) { bio_check_pages_dirty(bio); /* transfers ownership */ } else { - for (page_no = 0; page_no < bio->bi_vcnt; page_no++) { - struct page *page = bvec[page_no].bv_page; + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; if (dio->rw == READ && !PageCompound(page)) set_page_dirty_lock(page); diff --git a/mm/bounce.c b/mm/bounce.c index 2ee1b6fef44a..f5326b24d65d 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -231,7 +231,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, bounce: bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set); - bio_for_each_segment(to, bio, i) { + bio_for_each_segment_all(to, bio, i) { struct page *page = to->bv_page; if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force) -- GitLab From a07876064a0b73ab5ef1ebcf14b1cf0231c07858 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Sep 2012 14:03:28 -0700 Subject: [PATCH 0178/3163] block: Add bio_alloc_pages() More utility code to replace stuff that's getting open coded. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: NeilBrown --- drivers/md/raid1.c | 16 +++------------- fs/bio.c | 28 ++++++++++++++++++++++++++++ include/linux/bio.h | 1 + 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a7ea954abe1d..aeb4e3f74791 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -92,7 +92,6 @@ static void r1bio_pool_free(void *r1_bio, void *data) static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) { struct pool_info *pi = data; - struct page *page; struct r1bio *r1_bio; struct bio *bio; int i, j; @@ -122,14 +121,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) j = 1; while(j--) { bio = r1_bio->bios[j]; - for (i = 0; i < RESYNC_PAGES; i++) { - page = alloc_page(gfp_flags); - if (unlikely(!page)) - goto out_free_pages; + bio->bi_vcnt = RESYNC_PAGES; - bio->bi_io_vec[i].bv_page = page; - bio->bi_vcnt = i+1; - } + if (bio_alloc_pages(bio, gfp_flags)) + goto out_free_bio; } /* If not user-requests, copy the page pointers to all bios */ if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) { @@ -143,11 +138,6 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) return r1_bio; -out_free_pages: - for (j=0 ; j < pi->raid_disks; j++) - for (i=0; i < r1_bio->bios[j]->bi_vcnt ; i++) - put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page); - j = -1; out_free_bio: while (++j < pi->raid_disks) bio_put(r1_bio->bios[j]); diff --git a/fs/bio.c b/fs/bio.c index fe3aee90c988..e545a440d376 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -829,6 +829,34 @@ void bio_advance(struct bio *bio, unsigned bytes) } EXPORT_SYMBOL(bio_advance); +/** + * bio_alloc_pages - allocates a single page for each bvec in a bio + * @bio: bio to allocate pages for + * @gfp_mask: flags for allocation + * + * Allocates pages up to @bio->bi_vcnt. + * + * Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are + * freed. + */ +int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask) +{ + int i; + struct bio_vec *bv; + + bio_for_each_segment_all(bv, bio, i) { + bv->bv_page = alloc_page(gfp_mask); + if (!bv->bv_page) { + while (--bv >= bio->bi_io_vec) + __free_page(bv->bv_page); + return -ENOMEM; + } + } + + return 0; +} +EXPORT_SYMBOL(bio_alloc_pages); + /** * bio_copy_data - copy contents of data buffers from one chain of bios to * another diff --git a/include/linux/bio.h b/include/linux/bio.h index be2efa09f9bf..e25378f2f408 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -298,6 +298,7 @@ static inline void bio_flush_dcache_pages(struct bio *bi) #endif extern void bio_copy_data(struct bio *dst, struct bio *src); +extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, unsigned long, unsigned int, int, gfp_t); -- GitLab From a38352e0ac02dbbd4fa464dc22d1352b5fbd06fd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 25 May 2012 13:03:11 -0700 Subject: [PATCH 0179/3163] block: Add an explicit bio flag for bios that own their bvec This is for the new bio splitting code. When we split a bio, if the split occured on a bvec boundry we reuse the bvec for the new bio. But that means bio_free() can't free it, hence the explicit flag. Signed-off-by: Kent Overstreet CC: Jens Axboe Acked-by: Tejun Heo --- fs/bio.c | 4 +++- include/linux/bio.h | 5 ----- include/linux/blk_types.h | 1 + 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/bio.c b/fs/bio.c index e545a440d376..9238a54b562c 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -252,7 +252,7 @@ static void bio_free(struct bio *bio) __bio_free(bio); if (bs) { - if (bio_has_allocated_vec(bio)) + if (bio_flagged(bio, BIO_OWNS_VEC)) bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio)); /* @@ -451,6 +451,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) if (unlikely(!bvl)) goto err_free; + + bio->bi_flags |= 1 << BIO_OWNS_VEC; } else if (nr_iovecs) { bvl = bio->bi_inline_vecs; } diff --git a/include/linux/bio.h b/include/linux/bio.h index e25378f2f408..794bcd0c5039 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -85,11 +85,6 @@ static inline void *bio_data(struct bio *bio) return NULL; } -static inline int bio_has_allocated_vec(struct bio *bio) -{ - return bio->bi_io_vec && bio->bi_io_vec != bio->bi_inline_vecs; -} - /* * will die */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index c178d25e588b..538289ffc704 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -117,6 +117,7 @@ struct bio { * BIO_POOL_IDX() */ #define BIO_RESET_BITS 12 +#define BIO_OWNS_VEC 12 /* bio_free() should free bvec */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) -- GitLab From 29ed7813ce5c4661261aeebddb1b8660e0860223 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 4 Sep 2012 09:54:22 -0700 Subject: [PATCH 0180/3163] bio-integrity: Add explicit field for owner of bip_buf This was the only real user of BIO_CLONED, which didn't have very clear semantics. Convert to its own flag so we can get rid of BIO_CLONED. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Martin K. Petersen --- fs/bio-integrity.c | 5 ++--- include/linux/bio.h | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index ca7b02dbf09d..8fb42916d8a2 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -97,9 +97,7 @@ void bio_integrity_free(struct bio *bio) struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_set *bs = bio->bi_pool; - /* A cloned bio doesn't own the integrity metadata */ - if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY) - && bip->bip_buf != NULL) + if (bip->bip_owns_buf) kfree(bip->bip_buf); if (bs) { @@ -386,6 +384,7 @@ int bio_integrity_prep(struct bio *bio) return -EIO; } + bip->bip_owns_buf = 1; bip->bip_buf = buf; bip->bip_size = len; bip->bip_sector = bio->bi_sector; diff --git a/include/linux/bio.h b/include/linux/bio.h index 794bcd0c5039..ef24466d8f82 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -187,6 +187,7 @@ struct bio_integrity_payload { unsigned short bip_slab; /* slab the bip came from */ unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_idx; /* current bip_vec index */ + unsigned bip_owns_buf:1; /* should free bip_buf */ struct work_struct bip_work; /* I/O completion */ -- GitLab From 84759c6d18c5144432781ddca037d929ee9db8a5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Sep 2011 21:43:05 -0700 Subject: [PATCH 0181/3163] Revert "rw_semaphore: remove up/down_read_non_owner" This reverts commit 11b80f459adaf91a712f95e7734a17655a36bf30. Bcache needs rw semaphores for cache coherency in writeback mode - writes have to take a read lock on a per cache device rw sem, and release it when the bio completes. But since this is for bios it's naturally not in the context of the process that originally took the lock. Signed-off-by: Kent Overstreet CC: Christoph Hellwig CC: David Howells --- include/linux/rwsem.h | 10 ++++++++++ kernel/rwsem.c | 16 ++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 8da67d625e13..0616ffe45702 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -133,10 +133,20 @@ do { \ _down_write_nest_lock(sem, &(nest_lock)->dep_map); \ } while (0); +/* + * Take/release a lock when not the owner will release it. + * + * [ This API should be avoided as much as possible - the + * proper abstraction for this case is completions. ] + */ +extern void down_read_non_owner(struct rw_semaphore *sem); +extern void up_read_non_owner(struct rw_semaphore *sem); #else # define down_read_nested(sem, subclass) down_read(sem) # define down_write_nest_lock(sem, nest_lock) down_write(sem) # define down_write_nested(sem, subclass) down_write(sem) +# define down_read_non_owner(sem) down_read(sem) +# define up_read_non_owner(sem) up_read(sem) #endif #endif /* _LINUX_RWSEM_H */ diff --git a/kernel/rwsem.c b/kernel/rwsem.c index b3c6c3fcd847..cfff1435bdfb 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c @@ -126,6 +126,15 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) EXPORT_SYMBOL(_down_write_nest_lock); +void down_read_non_owner(struct rw_semaphore *sem) +{ + might_sleep(); + + __down_read(sem); +} + +EXPORT_SYMBOL(down_read_non_owner); + void down_write_nested(struct rw_semaphore *sem, int subclass) { might_sleep(); @@ -136,6 +145,13 @@ void down_write_nested(struct rw_semaphore *sem, int subclass) EXPORT_SYMBOL(down_write_nested); +void up_read_non_owner(struct rw_semaphore *sem) +{ + __up_read(sem); +} + +EXPORT_SYMBOL(up_read_non_owner); + #endif -- GitLab From 1f8e8ed05184eed5f9adf48fb2f6be00a907a181 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 9 Apr 2012 00:39:00 -0400 Subject: [PATCH 0182/3163] Export get_random_int() Needed for bcache - need a cheap source of random numbers for perturbing IO sizes, for rate limiting IO to the SSD. Signed-off-by: Kent Overstreet CC: "Theodore Ts'o" --- drivers/char/random.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/random.c b/drivers/char/random.c index 32a6c5764950..cd9a6211dcad 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1485,6 +1485,7 @@ unsigned int get_random_int(void) return ret; } +EXPORT_SYMBOL(get_random_int); /* * randomize_range() returns a start address such that -- GitLab From 9ca8f8e51003a51382cb4216e9029ee33ccf6c46 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 13 Apr 2012 21:56:43 -0400 Subject: [PATCH 0183/3163] Export blk_fill_rwbs() Exported so it can be used by bcache's tracepoints Signed-off-by: Kent Overstreet CC: Steven Rostedt CC: Frederic Weisbecker CC: Ingo Molnar --- kernel/trace/blktrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 9e5b8c272eec..20d8a9825f96 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1828,6 +1828,7 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) rwbs[i] = '\0'; } +EXPORT_SYMBOL_GPL(blk_fill_rwbs); #endif /* CONFIG_EVENT_TRACING */ -- GitLab From ea6749c705d9e629ed03c7336cc929fc6014b834 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 27 Dec 2012 22:21:58 -0800 Subject: [PATCH 0184/3163] Export __lockdep_no_validate__ Hack, but bcache needs a way around lockdep for locking during garbage collection - we need to keep multiple btree nodes locked for coalescing and rw_lock_nested() isn't really sufficient or appropriate here. Signed-off-by: Kent Overstreet CC: Peter Zijlstra CC: Ingo Molnar --- kernel/lockdep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 259db207b5d9..60248a78bec0 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2997,6 +2997,7 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, EXPORT_SYMBOL_GPL(lockdep_init_map); struct lock_class_key __lockdep_no_validate__; +EXPORT_SYMBOL_GPL(__lockdep_no_validate__); static int print_lock_nested_lock_not_held(struct task_struct *curr, -- GitLab From cafe563591446cf80bfbc2fe3bc72a2e36cf1060 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Mar 2013 16:11:31 -0700 Subject: [PATCH 0185/3163] bcache: A block layer cache Does writethrough and writeback caching, handles unclean shutdown, and has a bunch of other nifty features motivated by real world usage. See the wiki at http://bcache.evilpiepirate.org for more. Signed-off-by: Kent Overstreet --- Documentation/ABI/testing/sysfs-block-bcache | 156 ++ Documentation/bcache.txt | 343 +++ MAINTAINERS | 7 + drivers/md/Kconfig | 2 + drivers/md/Makefile | 1 + drivers/md/bcache/Kconfig | 42 + drivers/md/bcache/Makefile | 7 + drivers/md/bcache/alloc.c | 583 ++++ drivers/md/bcache/bcache.h | 1232 +++++++++ drivers/md/bcache/bset.c | 1190 +++++++++ drivers/md/bcache/bset.h | 379 +++ drivers/md/bcache/btree.c | 2503 ++++++++++++++++++ drivers/md/bcache/btree.h | 405 +++ drivers/md/bcache/closure.c | 348 +++ drivers/md/bcache/closure.h | 670 +++++ drivers/md/bcache/debug.c | 563 ++++ drivers/md/bcache/debug.h | 54 + drivers/md/bcache/io.c | 390 +++ drivers/md/bcache/journal.c | 785 ++++++ drivers/md/bcache/journal.h | 215 ++ drivers/md/bcache/movinggc.c | 254 ++ drivers/md/bcache/request.c | 1409 ++++++++++ drivers/md/bcache/request.h | 62 + drivers/md/bcache/stats.c | 245 ++ drivers/md/bcache/stats.h | 58 + drivers/md/bcache/super.c | 1941 ++++++++++++++ drivers/md/bcache/sysfs.c | 817 ++++++ drivers/md/bcache/sysfs.h | 110 + drivers/md/bcache/trace.c | 26 + drivers/md/bcache/util.c | 389 +++ drivers/md/bcache/util.h | 589 +++++ drivers/md/bcache/writeback.c | 414 +++ include/linux/cgroup_subsys.h | 6 + include/linux/sched.h | 4 + include/trace/events/bcache.h | 271 ++ kernel/fork.c | 4 + 36 files changed, 16474 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-block-bcache create mode 100644 Documentation/bcache.txt create mode 100644 drivers/md/bcache/Kconfig create mode 100644 drivers/md/bcache/Makefile create mode 100644 drivers/md/bcache/alloc.c create mode 100644 drivers/md/bcache/bcache.h create mode 100644 drivers/md/bcache/bset.c create mode 100644 drivers/md/bcache/bset.h create mode 100644 drivers/md/bcache/btree.c create mode 100644 drivers/md/bcache/btree.h create mode 100644 drivers/md/bcache/closure.c create mode 100644 drivers/md/bcache/closure.h create mode 100644 drivers/md/bcache/debug.c create mode 100644 drivers/md/bcache/debug.h create mode 100644 drivers/md/bcache/io.c create mode 100644 drivers/md/bcache/journal.c create mode 100644 drivers/md/bcache/journal.h create mode 100644 drivers/md/bcache/movinggc.c create mode 100644 drivers/md/bcache/request.c create mode 100644 drivers/md/bcache/request.h create mode 100644 drivers/md/bcache/stats.c create mode 100644 drivers/md/bcache/stats.h create mode 100644 drivers/md/bcache/super.c create mode 100644 drivers/md/bcache/sysfs.c create mode 100644 drivers/md/bcache/sysfs.h create mode 100644 drivers/md/bcache/trace.c create mode 100644 drivers/md/bcache/util.c create mode 100644 drivers/md/bcache/util.h create mode 100644 drivers/md/bcache/writeback.c create mode 100644 include/trace/events/bcache.h diff --git a/Documentation/ABI/testing/sysfs-block-bcache b/Documentation/ABI/testing/sysfs-block-bcache new file mode 100644 index 000000000000..9e4bbc5d51fd --- /dev/null +++ b/Documentation/ABI/testing/sysfs-block-bcache @@ -0,0 +1,156 @@ +What: /sys/block//bcache/unregister +Date: November 2010 +Contact: Kent Overstreet +Description: + A write to this file causes the backing device or cache to be + unregistered. If a backing device had dirty data in the cache, + writeback mode is automatically disabled and all dirty data is + flushed before the device is unregistered. Caches unregister + all associated backing devices before unregistering themselves. + +What: /sys/block//bcache/clear_stats +Date: November 2010 +Contact: Kent Overstreet +Description: + Writing to this file resets all the statistics for the device. + +What: /sys/block//bcache/cache +Date: November 2010 +Contact: Kent Overstreet +Description: + For a backing device that has cache, a symlink to + the bcache/ dir of that cache. + +What: /sys/block//bcache/cache_hits +Date: November 2010 +Contact: Kent Overstreet +Description: + For backing devices: integer number of full cache hits, + counted per bio. A partial cache hit counts as a miss. + +What: /sys/block//bcache/cache_misses +Date: November 2010 +Contact: Kent Overstreet +Description: + For backing devices: integer number of cache misses. + +What: /sys/block//bcache/cache_hit_ratio +Date: November 2010 +Contact: Kent Overstreet +Description: + For backing devices: cache hits as a percentage. + +What: /sys/block//bcache/sequential_cutoff +Date: November 2010 +Contact: Kent Overstreet +Description: + For backing devices: Threshold past which sequential IO will + skip the cache. Read and written as bytes in human readable + units (i.e. echo 10M > sequntial_cutoff). + +What: /sys/block//bcache/bypassed +Date: November 2010 +Contact: Kent Overstreet +Description: + Sum of all reads and writes that have bypassed the cache (due + to the sequential cutoff). Expressed as bytes in human + readable units. + +What: /sys/block//bcache/writeback +Date: November 2010 +Contact: Kent Overstreet +Description: + For backing devices: When on, writeback caching is enabled and + writes will be buffered in the cache. When off, caching is in + writethrough mode; reads and writes will be added to the + cache but no write buffering will take place. + +What: /sys/block//bcache/writeback_running +Date: November 2010 +Contact: Kent Overstreet +Description: + For backing devices: when off, dirty data will not be written + from the cache to the backing device. The cache will still be + used to buffer writes until it is mostly full, at which point + writes transparently revert to writethrough mode. Intended only + for benchmarking/testing. + +What: /sys/block//bcache/writeback_delay +Date: November 2010 +Contact: Kent Overstreet +Description: + For backing devices: In writeback mode, when dirty data is + written to the cache and the cache held no dirty data for that + backing device, writeback from cache to backing device starts + after this delay, expressed as an integer number of seconds. + +What: /sys/block//bcache/writeback_percent +Date: November 2010 +Contact: Kent Overstreet +Description: + For backing devices: If nonzero, writeback from cache to + backing device only takes place when more than this percentage + of the cache is used, allowing more write coalescing to take + place and reducing total number of writes sent to the backing + device. Integer between 0 and 40. + +What: /sys/block//bcache/synchronous +Date: November 2010 +Contact: Kent Overstreet +Description: + For a cache, a boolean that allows synchronous mode to be + switched on and off. In synchronous mode all writes are ordered + such that the cache can reliably recover from unclean shutdown; + if disabled bcache will not generally wait for writes to + complete but if the cache is not shut down cleanly all data + will be discarded from the cache. Should not be turned off with + writeback caching enabled. + +What: /sys/block//bcache/discard +Date: November 2010 +Contact: Kent Overstreet +Description: + For a cache, a boolean allowing discard/TRIM to be turned off + or back on if the device supports it. + +What: /sys/block//bcache/bucket_size +Date: November 2010 +Contact: Kent Overstreet +Description: + For a cache, bucket size in human readable units, as set at + cache creation time; should match the erase block size of the + SSD for optimal performance. + +What: /sys/block//bcache/nbuckets +Date: November 2010 +Contact: Kent Overstreet +Description: + For a cache, the number of usable buckets. + +What: /sys/block//bcache/tree_depth +Date: November 2010 +Contact: Kent Overstreet +Description: + For a cache, height of the btree excluding leaf nodes (i.e. a + one node tree will have a depth of 0). + +What: /sys/block//bcache/btree_cache_size +Date: November 2010 +Contact: Kent Overstreet +Description: + Number of btree buckets/nodes that are currently cached in + memory; cache dynamically grows and shrinks in response to + memory pressure from the rest of the system. + +What: /sys/block//bcache/written +Date: November 2010 +Contact: Kent Overstreet +Description: + For a cache, total amount of data in human readable units + written to the cache, excluding all metadata. + +What: /sys/block//bcache/btree_written +Date: November 2010 +Contact: Kent Overstreet +Description: + For a cache, sum of all btree writes in human readable units. diff --git a/Documentation/bcache.txt b/Documentation/bcache.txt new file mode 100644 index 000000000000..533307d52c87 --- /dev/null +++ b/Documentation/bcache.txt @@ -0,0 +1,343 @@ +Say you've got a big slow raid 6, and an X-25E or three. Wouldn't it be +nice if you could use them as cache... Hence bcache. + +Wiki and git repositories are at: + http://bcache.evilpiepirate.org + http://evilpiepirate.org/git/linux-bcache.git + http://evilpiepirate.org/git/bcache-tools.git + +It's designed around the performance characteristics of SSDs - it only allocates +in erase block sized buckets, and it uses a hybrid btree/log to track cached +extants (which can be anywhere from a single sector to the bucket size). It's +designed to avoid random writes at all costs; it fills up an erase block +sequentially, then issues a discard before reusing it. + +Both writethrough and writeback caching are supported. Writeback defaults to +off, but can be switched on and off arbitrarily at runtime. Bcache goes to +great lengths to protect your data - it reliably handles unclean shutdown. (It +doesn't even have a notion of a clean shutdown; bcache simply doesn't return +writes as completed until they're on stable storage). + +Writeback caching can use most of the cache for buffering writes - writing +dirty data to the backing device is always done sequentially, scanning from the +start to the end of the index. + +Since random IO is what SSDs excel at, there generally won't be much benefit +to caching large sequential IO. Bcache detects sequential IO and skips it; +it also keeps a rolling average of the IO sizes per task, and as long as the +average is above the cutoff it will skip all IO from that task - instead of +caching the first 512k after every seek. Backups and large file copies should +thus entirely bypass the cache. + +In the event of a data IO error on the flash it will try to recover by reading +from disk or invalidating cache entries. For unrecoverable errors (meta data +or dirty data), caching is automatically disabled; if dirty data was present +in the cache it first disables writeback caching and waits for all dirty data +to be flushed. + +Getting started: +You'll need make-bcache from the bcache-tools repository. Both the cache device +and backing device must be formatted before use. + make-bcache -B /dev/sdb + make-bcache -C /dev/sdc + +make-bcache has the ability to format multiple devices at the same time - if +you format your backing devices and cache device at the same time, you won't +have to manually attach: + make-bcache -B /dev/sda /dev/sdb -C /dev/sdc + +To make bcache devices known to the kernel, echo them to /sys/fs/bcache/register: + + echo /dev/sdb > /sys/fs/bcache/register + echo /dev/sdc > /sys/fs/bcache/register + +To register your bcache devices automatically, you could add something like +this to an init script: + + echo /dev/sd* > /sys/fs/bcache/register_quiet + +It'll look for bcache superblocks and ignore everything that doesn't have one. + +Registering the backing device makes the bcache show up in /dev; you can now +format it and use it as normal. But the first time using a new bcache device, +it'll be running in passthrough mode until you attach it to a cache. See the +section on attaching. + +The devices show up at /dev/bcacheN, and can be controlled via sysfs from +/sys/block/bcacheN/bcache: + + mkfs.ext4 /dev/bcache0 + mount /dev/bcache0 /mnt + +Cache devices are managed as sets; multiple caches per set isn't supported yet +but will allow for mirroring of metadata and dirty data in the future. Your new +cache set shows up as /sys/fs/bcache/ + +ATTACHING: + +After your cache device and backing device are registered, the backing device +must be attached to your cache set to enable caching. Attaching a backing +device to a cache set is done thusly, with the UUID of the cache set in +/sys/fs/bcache: + + echo > /sys/block/bcache0/bcache/attach + +This only has to be done once. The next time you reboot, just reregister all +your bcache devices. If a backing device has data in a cache somewhere, the +/dev/bcache# device won't be created until the cache shows up - particularly +important if you have writeback caching turned on. + +If you're booting up and your cache device is gone and never coming back, you +can force run the backing device: + + echo 1 > /sys/block/sdb/bcache/running + +(You need to use /sys/block/sdb (or whatever your backing device is called), not +/sys/block/bcache0, because bcache0 doesn't exist yet. If you're using a +partition, the bcache directory would be at /sys/block/sdb/sdb2/bcache) + +The backing device will still use that cache set if it shows up in the future, +but all the cached data will be invalidated. If there was dirty data in the +cache, don't expect the filesystem to be recoverable - you will have massive +filesystem corruption, though ext4's fsck does work miracles. + +SYSFS - BACKING DEVICE: + +attach + Echo the UUID of a cache set to this file to enable caching. + +cache_mode + Can be one of either writethrough, writeback, writearound or none. + +clear_stats + Writing to this file resets the running total stats (not the day/hour/5 minute + decaying versions). + +detach + Write to this file to detach from a cache set. If there is dirty data in the + cache, it will be flushed first. + +dirty_data + Amount of dirty data for this backing device in the cache. Continuously + updated unlike the cache set's version, but may be slightly off. + +label + Name of underlying device. + +readahead + Size of readahead that should be performed. Defaults to 0. If set to e.g. + 1M, it will round cache miss reads up to that size, but without overlapping + existing cache entries. + +running + 1 if bcache is running (i.e. whether the /dev/bcache device exists, whether + it's in passthrough mode or caching). + +sequential_cutoff + A sequential IO will bypass the cache once it passes this threshhold; the + most recent 128 IOs are tracked so sequential IO can be detected even when + it isn't all done at once. + +sequential_merge + If non zero, bcache keeps a list of the last 128 requests submitted to compare + against all new requests to determine which new requests are sequential + continuations of previous requests for the purpose of determining sequential + cutoff. This is necessary if the sequential cutoff value is greater than the + maximum acceptable sequential size for any single request. + +state + The backing device can be in one of four different states: + + no cache: Has never been attached to a cache set. + + clean: Part of a cache set, and there is no cached dirty data. + + dirty: Part of a cache set, and there is cached dirty data. + + inconsistent: The backing device was forcibly run by the user when there was + dirty data cached but the cache set was unavailable; whatever data was on the + backing device has likely been corrupted. + +stop + Write to this file to shut down the bcache device and close the backing + device. + +writeback_delay + When dirty data is written to the cache and it previously did not contain + any, waits some number of seconds before initiating writeback. Defaults to + 30. + +writeback_percent + If nonzero, bcache tries to keep around this percentage of the cache dirty by + throttling background writeback and using a PD controller to smoothly adjust + the rate. + +writeback_rate + Rate in sectors per second - if writeback_percent is nonzero, background + writeback is throttled to this rate. Continuously adjusted by bcache but may + also be set by the user. + +writeback_running + If off, writeback of dirty data will not take place at all. Dirty data will + still be added to the cache until it is mostly full; only meant for + benchmarking. Defaults to on. + +SYSFS - BACKING DEVICE STATS: + +There are directories with these numbers for a running total, as well as +versions that decay over the past day, hour and 5 minutes; they're also +aggregated in the cache set directory as well. + +bypassed + Amount of IO (both reads and writes) that has bypassed the cache + +cache_hits +cache_misses +cache_hit_ratio + Hits and misses are counted per individual IO as bcache sees them; a + partial hit is counted as a miss. + +cache_bypass_hits +cache_bypass_misses + Hits and misses for IO that is intended to skip the cache are still counted, + but broken out here. + +cache_miss_collisions + Counts instances where data was going to be inserted into the cache from a + cache miss, but raced with a write and data was already present (usually 0 + since the synchronization for cache misses was rewritten) + +cache_readaheads + Count of times readahead occured. + +SYSFS - CACHE SET: + +average_key_size + Average data per key in the btree. + +bdev<0..n> + Symlink to each of the attached backing devices. + +block_size + Block size of the cache devices. + +btree_cache_size + Amount of memory currently used by the btree cache + +bucket_size + Size of buckets + +cache<0..n> + Symlink to each of the cache devices comprising this cache set. + +cache_available_percent + Percentage of cache device free. + +clear_stats + Clears the statistics associated with this cache + +dirty_data + Amount of dirty data is in the cache (updated when garbage collection runs). + +flash_vol_create + Echoing a size to this file (in human readable units, k/M/G) creates a thinly + provisioned volume backed by the cache set. + +io_error_halflife +io_error_limit + These determines how many errors we accept before disabling the cache. + Each error is decayed by the half life (in # ios). If the decaying count + reaches io_error_limit dirty data is written out and the cache is disabled. + +journal_delay_ms + Journal writes will delay for up to this many milliseconds, unless a cache + flush happens sooner. Defaults to 100. + +root_usage_percent + Percentage of the root btree node in use. If this gets too high the node + will split, increasing the tree depth. + +stop + Write to this file to shut down the cache set - waits until all attached + backing devices have been shut down. + +tree_depth + Depth of the btree (A single node btree has depth 0). + +unregister + Detaches all backing devices and closes the cache devices; if dirty data is + present it will disable writeback caching and wait for it to be flushed. + +SYSFS - CACHE SET INTERNAL: + +This directory also exposes timings for a number of internal operations, with +separate files for average duration, average frequency, last occurence and max +duration: garbage collection, btree read, btree node sorts and btree splits. + +active_journal_entries + Number of journal entries that are newer than the index. + +btree_nodes + Total nodes in the btree. + +btree_used_percent + Average fraction of btree in use. + +bset_tree_stats + Statistics about the auxiliary search trees + +btree_cache_max_chain + Longest chain in the btree node cache's hash table + +cache_read_races + Counts instances where while data was being read from the cache, the bucket + was reused and invalidated - i.e. where the pointer was stale after the read + completed. When this occurs the data is reread from the backing device. + +trigger_gc + Writing to this file forces garbage collection to run. + +SYSFS - CACHE DEVICE: + +block_size + Minimum granularity of writes - should match hardware sector size. + +btree_written + Sum of all btree writes, in (kilo/mega/giga) bytes + +bucket_size + Size of buckets + +cache_replacement_policy + One of either lru, fifo or random. + +discard + Boolean; if on a discard/TRIM will be issued to each bucket before it is + reused. Defaults to off, since SATA TRIM is an unqueued command (and thus + slow). + +freelist_percent + Size of the freelist as a percentage of nbuckets. Can be written to to + increase the number of buckets kept on the freelist, which lets you + artificially reduce the size of the cache at runtime. Mostly for testing + purposes (i.e. testing how different size caches affect your hit rate), but + since buckets are discarded when they move on to the freelist will also make + the SSD's garbage collection easier by effectively giving it more reserved + space. + +io_errors + Number of errors that have occured, decayed by io_error_halflife. + +metadata_written + Sum of all non data writes (btree writes and all other metadata). + +nbuckets + Total buckets in this cache + +priority_stats + Statistics about how recently data in the cache has been accessed. This can + reveal your working set size. + +written + Sum of all data that has been written to the cache; comparison with + btree_written gives the amount of write inflation in bcache. diff --git a/MAINTAINERS b/MAINTAINERS index 50b4d735f961..64b849620b52 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1616,6 +1616,13 @@ W: http://www.baycom.org/~tom/ham/ham.html S: Maintained F: drivers/net/hamradio/baycom* +BCACHE (BLOCK LAYER CACHE) +M: Kent Overstreet +L: linux-bcache@vger.kernel.org +W: http://bcache.evilpiepirate.org +S: Maintained: +F: drivers/md/bcache/ + BEFS FILE SYSTEM S: Orphan F: Documentation/filesystems/befs.txt diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 4d8d90b4fe78..3bfc8f1da9fe 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -174,6 +174,8 @@ config MD_FAULTY In unsure, say N. +source "drivers/md/bcache/Kconfig" + config BLK_DEV_DM tristate "Device mapper support" ---help--- diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 7ceeaefc0e95..1439fd4ad9b1 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_MD_RAID10) += raid10.o obj-$(CONFIG_MD_RAID456) += raid456.o obj-$(CONFIG_MD_MULTIPATH) += multipath.o obj-$(CONFIG_MD_FAULTY) += faulty.o +obj-$(CONFIG_BCACHE) += bcache/ obj-$(CONFIG_BLK_DEV_MD) += md-mod.o obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_DM_BUFIO) += dm-bufio.o diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig new file mode 100644 index 000000000000..05c220d05e23 --- /dev/null +++ b/drivers/md/bcache/Kconfig @@ -0,0 +1,42 @@ + +config BCACHE + tristate "Block device as cache" + select CLOSURES + ---help--- + Allows a block device to be used as cache for other devices; uses + a btree for indexing and the layout is optimized for SSDs. + + See Documentation/bcache.txt for details. + +config BCACHE_DEBUG + bool "Bcache debugging" + depends on BCACHE + ---help--- + Don't select this option unless you're a developer + + Enables extra debugging tools (primarily a fuzz tester) + +config BCACHE_EDEBUG + bool "Extended runtime checks" + depends on BCACHE + ---help--- + Don't select this option unless you're a developer + + Enables extra runtime checks which significantly affect performance + +config BCACHE_CLOSURES_DEBUG + bool "Debug closures" + depends on BCACHE + select DEBUG_FS + ---help--- + Keeps all active closures in a linked list and provides a debugfs + interface to list them, which makes it possible to see asynchronous + operations that get stuck. + +# cgroup code needs to be updated: +# +#config CGROUP_BCACHE +# bool "Cgroup controls for bcache" +# depends on BCACHE && BLK_CGROUP +# ---help--- +# TODO diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile new file mode 100644 index 000000000000..0e9c82523be6 --- /dev/null +++ b/drivers/md/bcache/Makefile @@ -0,0 +1,7 @@ + +obj-$(CONFIG_BCACHE) += bcache.o + +bcache-y := alloc.o btree.o bset.o io.o journal.o writeback.o\ + movinggc.o request.o super.o sysfs.o debug.o util.o trace.o stats.o closure.o + +CFLAGS_request.o += -Iblock diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c new file mode 100644 index 000000000000..ed18115e078e --- /dev/null +++ b/drivers/md/bcache/alloc.c @@ -0,0 +1,583 @@ +/* + * Primary bucket allocation code + * + * Copyright 2012 Google, Inc. + * + * Allocation in bcache is done in terms of buckets: + * + * Each bucket has associated an 8 bit gen; this gen corresponds to the gen in + * btree pointers - they must match for the pointer to be considered valid. + * + * Thus (assuming a bucket has no dirty data or metadata in it) we can reuse a + * bucket simply by incrementing its gen. + * + * The gens (along with the priorities; it's really the gens are important but + * the code is named as if it's the priorities) are written in an arbitrary list + * of buckets on disk, with a pointer to them in the journal header. + * + * When we invalidate a bucket, we have to write its new gen to disk and wait + * for that write to complete before we use it - otherwise after a crash we + * could have pointers that appeared to be good but pointed to data that had + * been overwritten. + * + * Since the gens and priorities are all stored contiguously on disk, we can + * batch this up: We fill up the free_inc list with freshly invalidated buckets, + * call prio_write(), and when prio_write() finishes we pull buckets off the + * free_inc list and optionally discard them. + * + * free_inc isn't the only freelist - if it was, we'd often to sleep while + * priorities and gens were being written before we could allocate. c->free is a + * smaller freelist, and buckets on that list are always ready to be used. + * + * If we've got discards enabled, that happens when a bucket moves from the + * free_inc list to the free list. + * + * There is another freelist, because sometimes we have buckets that we know + * have nothing pointing into them - these we can reuse without waiting for + * priorities to be rewritten. These come from freed btree nodes and buckets + * that garbage collection discovered no longer had valid keys pointing into + * them (because they were overwritten). That's the unused list - buckets on the + * unused list move to the free list, optionally being discarded in the process. + * + * It's also important to ensure that gens don't wrap around - with respect to + * either the oldest gen in the btree or the gen on disk. This is quite + * difficult to do in practice, but we explicitly guard against it anyways - if + * a bucket is in danger of wrapping around we simply skip invalidating it that + * time around, and we garbage collect or rewrite the priorities sooner than we + * would have otherwise. + * + * bch_bucket_alloc() allocates a single bucket from a specific cache. + * + * bch_bucket_alloc_set() allocates one or more buckets from different caches + * out of a cache set. + * + * free_some_buckets() drives all the processes described above. It's called + * from bch_bucket_alloc() and a few other places that need to make sure free + * buckets are ready. + * + * invalidate_buckets_(lru|fifo)() find buckets that are available to be + * invalidated, and then invalidate them and stick them on the free_inc list - + * in either lru or fifo order. + */ + +#include "bcache.h" +#include "btree.h" + +#include + +#define MAX_IN_FLIGHT_DISCARDS 8U + +/* Bucket heap / gen */ + +uint8_t bch_inc_gen(struct cache *ca, struct bucket *b) +{ + uint8_t ret = ++b->gen; + + ca->set->need_gc = max(ca->set->need_gc, bucket_gc_gen(b)); + WARN_ON_ONCE(ca->set->need_gc > BUCKET_GC_GEN_MAX); + + if (CACHE_SYNC(&ca->set->sb)) { + ca->need_save_prio = max(ca->need_save_prio, + bucket_disk_gen(b)); + WARN_ON_ONCE(ca->need_save_prio > BUCKET_DISK_GEN_MAX); + } + + return ret; +} + +void bch_rescale_priorities(struct cache_set *c, int sectors) +{ + struct cache *ca; + struct bucket *b; + unsigned next = c->nbuckets * c->sb.bucket_size / 1024; + unsigned i; + int r; + + atomic_sub(sectors, &c->rescale); + + do { + r = atomic_read(&c->rescale); + + if (r >= 0) + return; + } while (atomic_cmpxchg(&c->rescale, r, r + next) != r); + + mutex_lock(&c->bucket_lock); + + c->min_prio = USHRT_MAX; + + for_each_cache(ca, c, i) + for_each_bucket(b, ca) + if (b->prio && + b->prio != BTREE_PRIO && + !atomic_read(&b->pin)) { + b->prio--; + c->min_prio = min(c->min_prio, b->prio); + } + + mutex_unlock(&c->bucket_lock); +} + +/* Discard/TRIM */ + +struct discard { + struct list_head list; + struct work_struct work; + struct cache *ca; + long bucket; + + struct bio bio; + struct bio_vec bv; +}; + +static void discard_finish(struct work_struct *w) +{ + struct discard *d = container_of(w, struct discard, work); + struct cache *ca = d->ca; + char buf[BDEVNAME_SIZE]; + + if (!test_bit(BIO_UPTODATE, &d->bio.bi_flags)) { + pr_notice("discard error on %s, disabling", + bdevname(ca->bdev, buf)); + d->ca->discard = 0; + } + + mutex_lock(&ca->set->bucket_lock); + + fifo_push(&ca->free, d->bucket); + list_add(&d->list, &ca->discards); + atomic_dec(&ca->discards_in_flight); + + mutex_unlock(&ca->set->bucket_lock); + + closure_wake_up(&ca->set->bucket_wait); + wake_up(&ca->set->alloc_wait); + + closure_put(&ca->set->cl); +} + +static void discard_endio(struct bio *bio, int error) +{ + struct discard *d = container_of(bio, struct discard, bio); + schedule_work(&d->work); +} + +static void do_discard(struct cache *ca, long bucket) +{ + struct discard *d = list_first_entry(&ca->discards, + struct discard, list); + + list_del(&d->list); + d->bucket = bucket; + + atomic_inc(&ca->discards_in_flight); + closure_get(&ca->set->cl); + + bio_init(&d->bio); + + d->bio.bi_sector = bucket_to_sector(ca->set, d->bucket); + d->bio.bi_bdev = ca->bdev; + d->bio.bi_rw = REQ_WRITE|REQ_DISCARD; + d->bio.bi_max_vecs = 1; + d->bio.bi_io_vec = d->bio.bi_inline_vecs; + d->bio.bi_size = bucket_bytes(ca); + d->bio.bi_end_io = discard_endio; + bio_set_prio(&d->bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); + + submit_bio(0, &d->bio); +} + +/* Allocation */ + +static inline bool can_inc_bucket_gen(struct bucket *b) +{ + return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX && + bucket_disk_gen(b) < BUCKET_DISK_GEN_MAX; +} + +bool bch_bucket_add_unused(struct cache *ca, struct bucket *b) +{ + BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b)); + + if (fifo_used(&ca->free) > ca->watermark[WATERMARK_MOVINGGC] && + CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) + return false; + + b->prio = 0; + + if (can_inc_bucket_gen(b) && + fifo_push(&ca->unused, b - ca->buckets)) { + atomic_inc(&b->pin); + return true; + } + + return false; +} + +static bool can_invalidate_bucket(struct cache *ca, struct bucket *b) +{ + return GC_MARK(b) == GC_MARK_RECLAIMABLE && + !atomic_read(&b->pin) && + can_inc_bucket_gen(b); +} + +static void invalidate_one_bucket(struct cache *ca, struct bucket *b) +{ + bch_inc_gen(ca, b); + b->prio = INITIAL_PRIO; + atomic_inc(&b->pin); + fifo_push(&ca->free_inc, b - ca->buckets); +} + +static void invalidate_buckets_lru(struct cache *ca) +{ + unsigned bucket_prio(struct bucket *b) + { + return ((unsigned) (b->prio - ca->set->min_prio)) * + GC_SECTORS_USED(b); + } + + bool bucket_max_cmp(struct bucket *l, struct bucket *r) + { + return bucket_prio(l) < bucket_prio(r); + } + + bool bucket_min_cmp(struct bucket *l, struct bucket *r) + { + return bucket_prio(l) > bucket_prio(r); + } + + struct bucket *b; + ssize_t i; + + ca->heap.used = 0; + + for_each_bucket(b, ca) { + if (!can_invalidate_bucket(ca, b)) + continue; + + if (!GC_SECTORS_USED(b)) { + if (!bch_bucket_add_unused(ca, b)) + return; + } else { + if (!heap_full(&ca->heap)) + heap_add(&ca->heap, b, bucket_max_cmp); + else if (bucket_max_cmp(b, heap_peek(&ca->heap))) { + ca->heap.data[0] = b; + heap_sift(&ca->heap, 0, bucket_max_cmp); + } + } + } + + if (ca->heap.used * 2 < ca->heap.size) + bch_queue_gc(ca->set); + + for (i = ca->heap.used / 2 - 1; i >= 0; --i) + heap_sift(&ca->heap, i, bucket_min_cmp); + + while (!fifo_full(&ca->free_inc)) { + if (!heap_pop(&ca->heap, b, bucket_min_cmp)) { + /* We don't want to be calling invalidate_buckets() + * multiple times when it can't do anything + */ + ca->invalidate_needs_gc = 1; + bch_queue_gc(ca->set); + return; + } + + invalidate_one_bucket(ca, b); + } +} + +static void invalidate_buckets_fifo(struct cache *ca) +{ + struct bucket *b; + size_t checked = 0; + + while (!fifo_full(&ca->free_inc)) { + if (ca->fifo_last_bucket < ca->sb.first_bucket || + ca->fifo_last_bucket >= ca->sb.nbuckets) + ca->fifo_last_bucket = ca->sb.first_bucket; + + b = ca->buckets + ca->fifo_last_bucket++; + + if (can_invalidate_bucket(ca, b)) + invalidate_one_bucket(ca, b); + + if (++checked >= ca->sb.nbuckets) { + ca->invalidate_needs_gc = 1; + bch_queue_gc(ca->set); + return; + } + } +} + +static void invalidate_buckets_random(struct cache *ca) +{ + struct bucket *b; + size_t checked = 0; + + while (!fifo_full(&ca->free_inc)) { + size_t n; + get_random_bytes(&n, sizeof(n)); + + n %= (size_t) (ca->sb.nbuckets - ca->sb.first_bucket); + n += ca->sb.first_bucket; + + b = ca->buckets + n; + + if (can_invalidate_bucket(ca, b)) + invalidate_one_bucket(ca, b); + + if (++checked >= ca->sb.nbuckets / 2) { + ca->invalidate_needs_gc = 1; + bch_queue_gc(ca->set); + return; + } + } +} + +static void invalidate_buckets(struct cache *ca) +{ + if (ca->invalidate_needs_gc) + return; + + switch (CACHE_REPLACEMENT(&ca->sb)) { + case CACHE_REPLACEMENT_LRU: + invalidate_buckets_lru(ca); + break; + case CACHE_REPLACEMENT_FIFO: + invalidate_buckets_fifo(ca); + break; + case CACHE_REPLACEMENT_RANDOM: + invalidate_buckets_random(ca); + break; + } +} + +#define allocator_wait(ca, cond) \ +do { \ + DEFINE_WAIT(__wait); \ + \ + while (!(cond)) { \ + prepare_to_wait(&ca->set->alloc_wait, \ + &__wait, TASK_INTERRUPTIBLE); \ + \ + mutex_unlock(&(ca)->set->bucket_lock); \ + if (test_bit(CACHE_SET_STOPPING_2, &ca->set->flags)) { \ + finish_wait(&ca->set->alloc_wait, &__wait); \ + closure_return(cl); \ + } \ + \ + schedule(); \ + __set_current_state(TASK_RUNNING); \ + mutex_lock(&(ca)->set->bucket_lock); \ + } \ + \ + finish_wait(&ca->set->alloc_wait, &__wait); \ +} while (0) + +void bch_allocator_thread(struct closure *cl) +{ + struct cache *ca = container_of(cl, struct cache, alloc); + + mutex_lock(&ca->set->bucket_lock); + + while (1) { + while (1) { + long bucket; + + if ((!atomic_read(&ca->set->prio_blocked) || + !CACHE_SYNC(&ca->set->sb)) && + !fifo_empty(&ca->unused)) + fifo_pop(&ca->unused, bucket); + else if (!fifo_empty(&ca->free_inc)) + fifo_pop(&ca->free_inc, bucket); + else + break; + + allocator_wait(ca, (int) fifo_free(&ca->free) > + atomic_read(&ca->discards_in_flight)); + + if (ca->discard) { + allocator_wait(ca, !list_empty(&ca->discards)); + do_discard(ca, bucket); + } else { + fifo_push(&ca->free, bucket); + closure_wake_up(&ca->set->bucket_wait); + } + } + + allocator_wait(ca, ca->set->gc_mark_valid); + invalidate_buckets(ca); + + allocator_wait(ca, !atomic_read(&ca->set->prio_blocked) || + !CACHE_SYNC(&ca->set->sb)); + + if (CACHE_SYNC(&ca->set->sb) && + (!fifo_empty(&ca->free_inc) || + ca->need_save_prio > 64)) { + bch_prio_write(ca); + } + } +} + +long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl) +{ + long r = -1; +again: + wake_up(&ca->set->alloc_wait); + + if (fifo_used(&ca->free) > ca->watermark[watermark] && + fifo_pop(&ca->free, r)) { + struct bucket *b = ca->buckets + r; +#ifdef CONFIG_BCACHE_EDEBUG + size_t iter; + long i; + + for (iter = 0; iter < prio_buckets(ca) * 2; iter++) + BUG_ON(ca->prio_buckets[iter] == (uint64_t) r); + + fifo_for_each(i, &ca->free, iter) + BUG_ON(i == r); + fifo_for_each(i, &ca->free_inc, iter) + BUG_ON(i == r); + fifo_for_each(i, &ca->unused, iter) + BUG_ON(i == r); +#endif + BUG_ON(atomic_read(&b->pin) != 1); + + SET_GC_SECTORS_USED(b, ca->sb.bucket_size); + + if (watermark <= WATERMARK_METADATA) { + SET_GC_MARK(b, GC_MARK_METADATA); + b->prio = BTREE_PRIO; + } else { + SET_GC_MARK(b, GC_MARK_RECLAIMABLE); + b->prio = INITIAL_PRIO; + } + + return r; + } + + pr_debug("alloc failure: blocked %i free %zu free_inc %zu unused %zu", + atomic_read(&ca->set->prio_blocked), fifo_used(&ca->free), + fifo_used(&ca->free_inc), fifo_used(&ca->unused)); + + if (cl) { + closure_wait(&ca->set->bucket_wait, cl); + + if (closure_blocking(cl)) { + mutex_unlock(&ca->set->bucket_lock); + closure_sync(cl); + mutex_lock(&ca->set->bucket_lock); + goto again; + } + } + + return -1; +} + +void bch_bucket_free(struct cache_set *c, struct bkey *k) +{ + unsigned i; + + for (i = 0; i < KEY_PTRS(k); i++) { + struct bucket *b = PTR_BUCKET(c, k, i); + + SET_GC_MARK(b, 0); + SET_GC_SECTORS_USED(b, 0); + bch_bucket_add_unused(PTR_CACHE(c, k, i), b); + } +} + +int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, + struct bkey *k, int n, struct closure *cl) +{ + int i; + + lockdep_assert_held(&c->bucket_lock); + BUG_ON(!n || n > c->caches_loaded || n > 8); + + bkey_init(k); + + /* sort by free space/prio of oldest data in caches */ + + for (i = 0; i < n; i++) { + struct cache *ca = c->cache_by_alloc[i]; + long b = bch_bucket_alloc(ca, watermark, cl); + + if (b == -1) + goto err; + + k->ptr[i] = PTR(ca->buckets[b].gen, + bucket_to_sector(c, b), + ca->sb.nr_this_dev); + + SET_KEY_PTRS(k, i + 1); + } + + return 0; +err: + bch_bucket_free(c, k); + __bkey_put(c, k); + return -1; +} + +int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, + struct bkey *k, int n, struct closure *cl) +{ + int ret; + mutex_lock(&c->bucket_lock); + ret = __bch_bucket_alloc_set(c, watermark, k, n, cl); + mutex_unlock(&c->bucket_lock); + return ret; +} + +/* Init */ + +void bch_cache_allocator_exit(struct cache *ca) +{ + struct discard *d; + + while (!list_empty(&ca->discards)) { + d = list_first_entry(&ca->discards, struct discard, list); + cancel_work_sync(&d->work); + list_del(&d->list); + kfree(d); + } +} + +int bch_cache_allocator_init(struct cache *ca) +{ + unsigned i; + + /* + * Reserve: + * Prio/gen writes first + * Then 8 for btree allocations + * Then half for the moving garbage collector + */ + + ca->watermark[WATERMARK_PRIO] = 0; + + ca->watermark[WATERMARK_METADATA] = prio_buckets(ca); + + ca->watermark[WATERMARK_MOVINGGC] = 8 + + ca->watermark[WATERMARK_METADATA]; + + ca->watermark[WATERMARK_NONE] = ca->free.size / 2 + + ca->watermark[WATERMARK_MOVINGGC]; + + for (i = 0; i < MAX_IN_FLIGHT_DISCARDS; i++) { + struct discard *d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) + return -ENOMEM; + + d->ca = ca; + INIT_WORK(&d->work, discard_finish); + list_add(&d->list, &ca->discards); + } + + return 0; +} diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h new file mode 100644 index 000000000000..d01a553f63f3 --- /dev/null +++ b/drivers/md/bcache/bcache.h @@ -0,0 +1,1232 @@ +#ifndef _BCACHE_H +#define _BCACHE_H + +/* + * SOME HIGH LEVEL CODE DOCUMENTATION: + * + * Bcache mostly works with cache sets, cache devices, and backing devices. + * + * Support for multiple cache devices hasn't quite been finished off yet, but + * it's about 95% plumbed through. A cache set and its cache devices is sort of + * like a md raid array and its component devices. Most of the code doesn't care + * about individual cache devices, the main abstraction is the cache set. + * + * Multiple cache devices is intended to give us the ability to mirror dirty + * cached data and metadata, without mirroring clean cached data. + * + * Backing devices are different, in that they have a lifetime independent of a + * cache set. When you register a newly formatted backing device it'll come up + * in passthrough mode, and then you can attach and detach a backing device from + * a cache set at runtime - while it's mounted and in use. Detaching implicitly + * invalidates any cached data for that backing device. + * + * A cache set can have multiple (many) backing devices attached to it. + * + * There's also flash only volumes - this is the reason for the distinction + * between struct cached_dev and struct bcache_device. A flash only volume + * works much like a bcache device that has a backing device, except the + * "cached" data is always dirty. The end result is that we get thin + * provisioning with very little additional code. + * + * Flash only volumes work but they're not production ready because the moving + * garbage collector needs more work. More on that later. + * + * BUCKETS/ALLOCATION: + * + * Bcache is primarily designed for caching, which means that in normal + * operation all of our available space will be allocated. Thus, we need an + * efficient way of deleting things from the cache so we can write new things to + * it. + * + * To do this, we first divide the cache device up into buckets. A bucket is the + * unit of allocation; they're typically around 1 mb - anywhere from 128k to 2M+ + * works efficiently. + * + * Each bucket has a 16 bit priority, and an 8 bit generation associated with + * it. The gens and priorities for all the buckets are stored contiguously and + * packed on disk (in a linked list of buckets - aside from the superblock, all + * of bcache's metadata is stored in buckets). + * + * The priority is used to implement an LRU. We reset a bucket's priority when + * we allocate it or on cache it, and every so often we decrement the priority + * of each bucket. It could be used to implement something more sophisticated, + * if anyone ever gets around to it. + * + * The generation is used for invalidating buckets. Each pointer also has an 8 + * bit generation embedded in it; for a pointer to be considered valid, its gen + * must match the gen of the bucket it points into. Thus, to reuse a bucket all + * we have to do is increment its gen (and write its new gen to disk; we batch + * this up). + * + * Bcache is entirely COW - we never write twice to a bucket, even buckets that + * contain metadata (including btree nodes). + * + * THE BTREE: + * + * Bcache is in large part design around the btree. + * + * At a high level, the btree is just an index of key -> ptr tuples. + * + * Keys represent extents, and thus have a size field. Keys also have a variable + * number of pointers attached to them (potentially zero, which is handy for + * invalidating the cache). + * + * The key itself is an inode:offset pair. The inode number corresponds to a + * backing device or a flash only volume. The offset is the ending offset of the + * extent within the inode - not the starting offset; this makes lookups + * slightly more convenient. + * + * Pointers contain the cache device id, the offset on that device, and an 8 bit + * generation number. More on the gen later. + * + * Index lookups are not fully abstracted - cache lookups in particular are + * still somewhat mixed in with the btree code, but things are headed in that + * direction. + * + * Updates are fairly well abstracted, though. There are two different ways of + * updating the btree; insert and replace. + * + * BTREE_INSERT will just take a list of keys and insert them into the btree - + * overwriting (possibly only partially) any extents they overlap with. This is + * used to update the index after a write. + * + * BTREE_REPLACE is really cmpxchg(); it inserts a key into the btree iff it is + * overwriting a key that matches another given key. This is used for inserting + * data into the cache after a cache miss, and for background writeback, and for + * the moving garbage collector. + * + * There is no "delete" operation; deleting things from the index is + * accomplished by either by invalidating pointers (by incrementing a bucket's + * gen) or by inserting a key with 0 pointers - which will overwrite anything + * previously present at that location in the index. + * + * This means that there are always stale/invalid keys in the btree. They're + * filtered out by the code that iterates through a btree node, and removed when + * a btree node is rewritten. + * + * BTREE NODES: + * + * Our unit of allocation is a bucket, and we we can't arbitrarily allocate and + * free smaller than a bucket - so, that's how big our btree nodes are. + * + * (If buckets are really big we'll only use part of the bucket for a btree node + * - no less than 1/4th - but a bucket still contains no more than a single + * btree node. I'd actually like to change this, but for now we rely on the + * bucket's gen for deleting btree nodes when we rewrite/split a node.) + * + * Anyways, btree nodes are big - big enough to be inefficient with a textbook + * btree implementation. + * + * The way this is solved is that btree nodes are internally log structured; we + * can append new keys to an existing btree node without rewriting it. This + * means each set of keys we write is sorted, but the node is not. + * + * We maintain this log structure in memory - keeping 1Mb of keys sorted would + * be expensive, and we have to distinguish between the keys we have written and + * the keys we haven't. So to do a lookup in a btree node, we have to search + * each sorted set. But we do merge written sets together lazily, so the cost of + * these extra searches is quite low (normally most of the keys in a btree node + * will be in one big set, and then there'll be one or two sets that are much + * smaller). + * + * This log structure makes bcache's btree more of a hybrid between a + * conventional btree and a compacting data structure, with some of the + * advantages of both. + * + * GARBAGE COLLECTION: + * + * We can't just invalidate any bucket - it might contain dirty data or + * metadata. If it once contained dirty data, other writes might overwrite it + * later, leaving no valid pointers into that bucket in the index. + * + * Thus, the primary purpose of garbage collection is to find buckets to reuse. + * It also counts how much valid data it each bucket currently contains, so that + * allocation can reuse buckets sooner when they've been mostly overwritten. + * + * It also does some things that are really internal to the btree + * implementation. If a btree node contains pointers that are stale by more than + * some threshold, it rewrites the btree node to avoid the bucket's generation + * wrapping around. It also merges adjacent btree nodes if they're empty enough. + * + * THE JOURNAL: + * + * Bcache's journal is not necessary for consistency; we always strictly + * order metadata writes so that the btree and everything else is consistent on + * disk in the event of an unclean shutdown, and in fact bcache had writeback + * caching (with recovery from unclean shutdown) before journalling was + * implemented. + * + * Rather, the journal is purely a performance optimization; we can't complete a + * write until we've updated the index on disk, otherwise the cache would be + * inconsistent in the event of an unclean shutdown. This means that without the + * journal, on random write workloads we constantly have to update all the leaf + * nodes in the btree, and those writes will be mostly empty (appending at most + * a few keys each) - highly inefficient in terms of amount of metadata writes, + * and it puts more strain on the various btree resorting/compacting code. + * + * The journal is just a log of keys we've inserted; on startup we just reinsert + * all the keys in the open journal entries. That means that when we're updating + * a node in the btree, we can wait until a 4k block of keys fills up before + * writing them out. + * + * For simplicity, we only journal updates to leaf nodes; updates to parent + * nodes are rare enough (since our leaf nodes are huge) that it wasn't worth + * the complexity to deal with journalling them (in particular, journal replay) + * - updates to non leaf nodes just happen synchronously (see btree_split()). + */ + +#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" +#include "closure.h" + +struct bucket { + atomic_t pin; + uint16_t prio; + uint8_t gen; + uint8_t disk_gen; + uint8_t last_gc; /* Most out of date gen in the btree */ + uint8_t gc_gen; + uint16_t gc_mark; +}; + +/* + * I'd use bitfields for these, but I don't trust the compiler not to screw me + * as multiple threads touch struct bucket without locking + */ + +BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2); +#define GC_MARK_RECLAIMABLE 0 +#define GC_MARK_DIRTY 1 +#define GC_MARK_METADATA 2 +BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 14); + +struct bkey { + uint64_t high; + uint64_t low; + uint64_t ptr[]; +}; + +/* Enough for a key with 6 pointers */ +#define BKEY_PAD 8 + +#define BKEY_PADDED(key) \ + union { struct bkey key; uint64_t key ## _pad[BKEY_PAD]; } + +/* Version 1: Backing device + * Version 2: Seed pointer into btree node checksum + * Version 3: New UUID format + */ +#define BCACHE_SB_VERSION 3 + +#define SB_SECTOR 8 +#define SB_SIZE 4096 +#define SB_LABEL_SIZE 32 +#define SB_JOURNAL_BUCKETS 256U +/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ +#define MAX_CACHES_PER_SET 8 + +#define BDEV_DATA_START 16 /* sectors */ + +struct cache_sb { + uint64_t csum; + uint64_t offset; /* sector where this sb was written */ + uint64_t version; +#define CACHE_BACKING_DEV 1 + + uint8_t magic[16]; + + uint8_t uuid[16]; + union { + uint8_t set_uuid[16]; + uint64_t set_magic; + }; + uint8_t label[SB_LABEL_SIZE]; + + uint64_t flags; + uint64_t seq; + uint64_t pad[8]; + + uint64_t nbuckets; /* device size */ + uint16_t block_size; /* sectors */ + uint16_t bucket_size; /* sectors */ + + uint16_t nr_in_set; + uint16_t nr_this_dev; + + uint32_t last_mount; /* time_t */ + + uint16_t first_bucket; + union { + uint16_t njournal_buckets; + uint16_t keys; + }; + uint64_t d[SB_JOURNAL_BUCKETS]; /* journal buckets */ +}; + +BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); +BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); +BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); +#define CACHE_REPLACEMENT_LRU 0U +#define CACHE_REPLACEMENT_FIFO 1U +#define CACHE_REPLACEMENT_RANDOM 2U + +BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); +#define CACHE_MODE_WRITETHROUGH 0U +#define CACHE_MODE_WRITEBACK 1U +#define CACHE_MODE_WRITEAROUND 2U +#define CACHE_MODE_NONE 3U +BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); +#define BDEV_STATE_NONE 0U +#define BDEV_STATE_CLEAN 1U +#define BDEV_STATE_DIRTY 2U +#define BDEV_STATE_STALE 3U + +/* Version 1: Seed pointer into btree node checksum + */ +#define BCACHE_BSET_VERSION 1 + +/* + * This is the on disk format for btree nodes - a btree node on disk is a list + * of these; within each set the keys are sorted + */ +struct bset { + uint64_t csum; + uint64_t magic; + uint64_t seq; + uint32_t version; + uint32_t keys; + + union { + struct bkey start[0]; + uint64_t d[0]; + }; +}; + +/* + * On disk format for priorities and gens - see super.c near prio_write() for + * more. + */ +struct prio_set { + uint64_t csum; + uint64_t magic; + uint64_t seq; + uint32_t version; + uint32_t pad; + + uint64_t next_bucket; + + struct bucket_disk { + uint16_t prio; + uint8_t gen; + } __attribute((packed)) data[]; +}; + +struct uuid_entry { + union { + struct { + uint8_t uuid[16]; + uint8_t label[32]; + uint32_t first_reg; + uint32_t last_reg; + uint32_t invalidated; + + uint32_t flags; + /* Size of flash only volumes */ + uint64_t sectors; + }; + + uint8_t pad[128]; + }; +}; + +BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1); + +#include "journal.h" +#include "stats.h" +struct search; +struct btree; +struct keybuf; + +struct keybuf_key { + struct rb_node node; + BKEY_PADDED(key); + void *private; +}; + +typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *); + +struct keybuf { + keybuf_pred_fn *key_predicate; + + struct bkey last_scanned; + spinlock_t lock; + + /* + * Beginning and end of range in rb tree - so that we can skip taking + * lock and checking the rb tree when we need to check for overlapping + * keys. + */ + struct bkey start; + struct bkey end; + + struct rb_root keys; + +#define KEYBUF_NR 100 + DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR); +}; + +struct bio_split_pool { + struct bio_set *bio_split; + mempool_t *bio_split_hook; +}; + +struct bio_split_hook { + struct closure cl; + struct bio_split_pool *p; + struct bio *bio; + bio_end_io_t *bi_end_io; + void *bi_private; +}; + +struct bcache_device { + struct closure cl; + + struct kobject kobj; + + struct cache_set *c; + unsigned id; +#define BCACHEDEVNAME_SIZE 12 + char name[BCACHEDEVNAME_SIZE]; + + struct gendisk *disk; + + /* If nonzero, we're closing */ + atomic_t closing; + + /* If nonzero, we're detaching/unregistering from cache set */ + atomic_t detaching; + + atomic_long_t sectors_dirty; + unsigned long sectors_dirty_gc; + unsigned long sectors_dirty_last; + long sectors_dirty_derivative; + + mempool_t *unaligned_bvec; + struct bio_set *bio_split; + + unsigned data_csum:1; + + int (*cache_miss)(struct btree *, struct search *, + struct bio *, unsigned); + int (*ioctl) (struct bcache_device *, fmode_t, unsigned, unsigned long); + + struct bio_split_pool bio_split_hook; +}; + +struct io { + /* Used to track sequential IO so it can be skipped */ + struct hlist_node hash; + struct list_head lru; + + unsigned long jiffies; + unsigned sequential; + sector_t last; +}; + +struct cached_dev { + struct list_head list; + struct bcache_device disk; + struct block_device *bdev; + + struct cache_sb sb; + struct bio sb_bio; + struct bio_vec sb_bv[1]; + struct closure_with_waitlist sb_write; + + /* Refcount on the cache set. Always nonzero when we're caching. */ + atomic_t count; + struct work_struct detach; + + /* + * Device might not be running if it's dirty and the cache set hasn't + * showed up yet. + */ + atomic_t running; + + /* + * Writes take a shared lock from start to finish; scanning for dirty + * data to refill the rb tree requires an exclusive lock. + */ + struct rw_semaphore writeback_lock; + + /* + * Nonzero, and writeback has a refcount (d->count), iff there is dirty + * data in the cache. Protected by writeback_lock; must have an + * shared lock to set and exclusive lock to clear. + */ + atomic_t has_dirty; + + struct ratelimit writeback_rate; + struct delayed_work writeback_rate_update; + + /* + * Internal to the writeback code, so read_dirty() can keep track of + * where it's at. + */ + sector_t last_read; + + /* Number of writeback bios in flight */ + atomic_t in_flight; + struct closure_with_timer writeback; + struct closure_waitlist writeback_wait; + + struct keybuf writeback_keys; + + /* For tracking sequential IO */ +#define RECENT_IO_BITS 7 +#define RECENT_IO (1 << RECENT_IO_BITS) + struct io io[RECENT_IO]; + struct hlist_head io_hash[RECENT_IO + 1]; + struct list_head io_lru; + spinlock_t io_lock; + + struct cache_accounting accounting; + + /* The rest of this all shows up in sysfs */ + unsigned sequential_cutoff; + unsigned readahead; + + unsigned sequential_merge:1; + unsigned verify:1; + + unsigned writeback_metadata:1; + unsigned writeback_running:1; + unsigned char writeback_percent; + unsigned writeback_delay; + + int writeback_rate_change; + int64_t writeback_rate_derivative; + uint64_t writeback_rate_target; + + unsigned writeback_rate_update_seconds; + unsigned writeback_rate_d_term; + unsigned writeback_rate_p_term_inverse; + unsigned writeback_rate_d_smooth; +}; + +enum alloc_watermarks { + WATERMARK_PRIO, + WATERMARK_METADATA, + WATERMARK_MOVINGGC, + WATERMARK_NONE, + WATERMARK_MAX +}; + +struct cache { + struct cache_set *set; + struct cache_sb sb; + struct bio sb_bio; + struct bio_vec sb_bv[1]; + + struct kobject kobj; + struct block_device *bdev; + + unsigned watermark[WATERMARK_MAX]; + + struct closure alloc; + struct workqueue_struct *alloc_workqueue; + + struct closure prio; + struct prio_set *disk_buckets; + + /* + * When allocating new buckets, prio_write() gets first dibs - since we + * may not be allocate at all without writing priorities and gens. + * prio_buckets[] contains the last buckets we wrote priorities to (so + * gc can mark them as metadata), prio_next[] contains the buckets + * allocated for the next prio write. + */ + uint64_t *prio_buckets; + uint64_t *prio_last_buckets; + + /* + * free: Buckets that are ready to be used + * + * free_inc: Incoming buckets - these are buckets that currently have + * cached data in them, and we can't reuse them until after we write + * their new gen to disk. After prio_write() finishes writing the new + * gens/prios, they'll be moved to the free list (and possibly discarded + * in the process) + * + * unused: GC found nothing pointing into these buckets (possibly + * because all the data they contained was overwritten), so we only + * need to discard them before they can be moved to the free list. + */ + DECLARE_FIFO(long, free); + DECLARE_FIFO(long, free_inc); + DECLARE_FIFO(long, unused); + + size_t fifo_last_bucket; + + /* Allocation stuff: */ + struct bucket *buckets; + + DECLARE_HEAP(struct bucket *, heap); + + /* + * max(gen - disk_gen) for all buckets. When it gets too big we have to + * call prio_write() to keep gens from wrapping. + */ + uint8_t need_save_prio; + unsigned gc_move_threshold; + + /* + * If nonzero, we know we aren't going to find any buckets to invalidate + * until a gc finishes - otherwise we could pointlessly burn a ton of + * cpu + */ + unsigned invalidate_needs_gc:1; + + bool discard; /* Get rid of? */ + + /* + * We preallocate structs for issuing discards to buckets, and keep them + * on this list when they're not in use; do_discard() issues discards + * whenever there's work to do and is called by free_some_buckets() and + * when a discard finishes. + */ + atomic_t discards_in_flight; + struct list_head discards; + + struct journal_device journal; + + /* The rest of this all shows up in sysfs */ +#define IO_ERROR_SHIFT 20 + atomic_t io_errors; + atomic_t io_count; + + atomic_long_t meta_sectors_written; + atomic_long_t btree_sectors_written; + atomic_long_t sectors_written; + + struct bio_split_pool bio_split_hook; +}; + +struct gc_stat { + size_t nodes; + size_t key_bytes; + + size_t nkeys; + uint64_t data; /* sectors */ + uint64_t dirty; /* sectors */ + unsigned in_use; /* percent */ +}; + +/* + * Flag bits, for how the cache set is shutting down, and what phase it's at: + * + * CACHE_SET_UNREGISTERING means we're not just shutting down, we're detaching + * all the backing devices first (their cached data gets invalidated, and they + * won't automatically reattach). + * + * CACHE_SET_STOPPING always gets set first when we're closing down a cache set; + * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e. + * flushing dirty data). + * + * CACHE_SET_STOPPING_2 gets set at the last phase, when it's time to shut down the + * allocation thread. + */ +#define CACHE_SET_UNREGISTERING 0 +#define CACHE_SET_STOPPING 1 +#define CACHE_SET_STOPPING_2 2 + +struct cache_set { + struct closure cl; + + struct list_head list; + struct kobject kobj; + struct kobject internal; + struct dentry *debug; + struct cache_accounting accounting; + + unsigned long flags; + + struct cache_sb sb; + + struct cache *cache[MAX_CACHES_PER_SET]; + struct cache *cache_by_alloc[MAX_CACHES_PER_SET]; + int caches_loaded; + + struct bcache_device **devices; + struct list_head cached_devs; + uint64_t cached_dev_sectors; + struct closure caching; + + struct closure_with_waitlist sb_write; + + mempool_t *search; + mempool_t *bio_meta; + struct bio_set *bio_split; + + /* For the btree cache */ + struct shrinker shrink; + + /* For the allocator itself */ + wait_queue_head_t alloc_wait; + + /* For the btree cache and anything allocation related */ + struct mutex bucket_lock; + + /* log2(bucket_size), in sectors */ + unsigned short bucket_bits; + + /* log2(block_size), in sectors */ + unsigned short block_bits; + + /* + * Default number of pages for a new btree node - may be less than a + * full bucket + */ + unsigned btree_pages; + + /* + * Lists of struct btrees; lru is the list for structs that have memory + * allocated for actual btree node, freed is for structs that do not. + * + * We never free a struct btree, except on shutdown - we just put it on + * the btree_cache_freed list and reuse it later. This simplifies the + * code, and it doesn't cost us much memory as the memory usage is + * dominated by buffers that hold the actual btree node data and those + * can be freed - and the number of struct btrees allocated is + * effectively bounded. + * + * btree_cache_freeable effectively is a small cache - we use it because + * high order page allocations can be rather expensive, and it's quite + * common to delete and allocate btree nodes in quick succession. It + * should never grow past ~2-3 nodes in practice. + */ + struct list_head btree_cache; + struct list_head btree_cache_freeable; + struct list_head btree_cache_freed; + + /* Number of elements in btree_cache + btree_cache_freeable lists */ + unsigned bucket_cache_used; + + /* + * If we need to allocate memory for a new btree node and that + * allocation fails, we can cannibalize another node in the btree cache + * to satisfy the allocation. However, only one thread can be doing this + * at a time, for obvious reasons - try_harder and try_wait are + * basically a lock for this that we can wait on asynchronously. The + * btree_root() macro releases the lock when it returns. + */ + struct closure *try_harder; + struct closure_waitlist try_wait; + uint64_t try_harder_start; + + /* + * When we free a btree node, we increment the gen of the bucket the + * node is in - but we can't rewrite the prios and gens until we + * finished whatever it is we were doing, otherwise after a crash the + * btree node would be freed but for say a split, we might not have the + * pointers to the new nodes inserted into the btree yet. + * + * This is a refcount that blocks prio_write() until the new keys are + * written. + */ + atomic_t prio_blocked; + struct closure_waitlist bucket_wait; + + /* + * For any bio we don't skip we subtract the number of sectors from + * rescale; when it hits 0 we rescale all the bucket priorities. + */ + atomic_t rescale; + /* + * When we invalidate buckets, we use both the priority and the amount + * of good data to determine which buckets to reuse first - to weight + * those together consistently we keep track of the smallest nonzero + * priority of any bucket. + */ + uint16_t min_prio; + + /* + * max(gen - gc_gen) for all buckets. When it gets too big we have to gc + * to keep gens from wrapping around. + */ + uint8_t need_gc; + struct gc_stat gc_stats; + size_t nbuckets; + + struct closure_with_waitlist gc; + /* Where in the btree gc currently is */ + struct bkey gc_done; + + /* + * The allocation code needs gc_mark in struct bucket to be correct, but + * it's not while a gc is in progress. Protected by bucket_lock. + */ + int gc_mark_valid; + + /* Counts how many sectors bio_insert has added to the cache */ + atomic_t sectors_to_gc; + + struct closure moving_gc; + struct closure_waitlist moving_gc_wait; + struct keybuf moving_gc_keys; + /* Number of moving GC bios in flight */ + atomic_t in_flight; + + struct btree *root; + +#ifdef CONFIG_BCACHE_DEBUG + struct btree *verify_data; + struct mutex verify_lock; +#endif + + unsigned nr_uuids; + struct uuid_entry *uuids; + BKEY_PADDED(uuid_bucket); + struct closure_with_waitlist uuid_write; + + /* + * A btree node on disk could have too many bsets for an iterator to fit + * on the stack - this is a single element mempool for btree_read_work() + */ + struct mutex fill_lock; + struct btree_iter *fill_iter; + + /* + * btree_sort() is a merge sort and requires temporary space - single + * element mempool + */ + struct mutex sort_lock; + struct bset *sort; + + /* List of buckets we're currently writing data to */ + struct list_head data_buckets; + spinlock_t data_bucket_lock; + + struct journal journal; + +#define CONGESTED_MAX 1024 + unsigned congested_last_us; + atomic_t congested; + + /* The rest of this all shows up in sysfs */ + unsigned congested_read_threshold_us; + unsigned congested_write_threshold_us; + + spinlock_t sort_time_lock; + struct time_stats sort_time; + struct time_stats btree_gc_time; + struct time_stats btree_split_time; + spinlock_t btree_read_time_lock; + struct time_stats btree_read_time; + struct time_stats try_harder_time; + + atomic_long_t cache_read_races; + atomic_long_t writeback_keys_done; + atomic_long_t writeback_keys_failed; + unsigned error_limit; + unsigned error_decay; + unsigned short journal_delay_ms; + unsigned verify:1; + unsigned key_merging_disabled:1; + unsigned gc_always_rewrite:1; + unsigned shrinker_disabled:1; + unsigned copy_gc_enabled:1; + +#define BUCKET_HASH_BITS 12 + struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS]; +}; + +static inline bool key_merging_disabled(struct cache_set *c) +{ +#ifdef CONFIG_BCACHE_DEBUG + return c->key_merging_disabled; +#else + return 0; +#endif +} + +struct bbio { + unsigned submit_time_us; + union { + struct bkey key; + uint64_t _pad[3]; + /* + * We only need pad = 3 here because we only ever carry around a + * single pointer - i.e. the pointer we're doing io to/from. + */ + }; + struct bio bio; +}; + +static inline unsigned local_clock_us(void) +{ + return local_clock() >> 10; +} + +#define MAX_BSETS 4U + +#define BTREE_PRIO USHRT_MAX +#define INITIAL_PRIO 32768 + +#define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE) +#define btree_blocks(b) \ + ((unsigned) (KEY_SIZE(&b->key) >> (b)->c->block_bits)) + +#define btree_default_blocks(c) \ + ((unsigned) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits)) + +#define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS) +#define bucket_bytes(c) ((c)->sb.bucket_size << 9) +#define block_bytes(c) ((c)->sb.block_size << 9) + +#define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t)) +#define set_bytes(i) __set_bytes(i, i->keys) + +#define __set_blocks(i, k, c) DIV_ROUND_UP(__set_bytes(i, k), block_bytes(c)) +#define set_blocks(i, c) __set_blocks(i, (i)->keys, c) + +#define node(i, j) ((struct bkey *) ((i)->d + (j))) +#define end(i) node(i, (i)->keys) + +#define index(i, b) \ + ((size_t) (((void *) i - (void *) (b)->sets[0].data) / \ + block_bytes(b->c))) + +#define btree_data_space(b) (PAGE_SIZE << (b)->page_order) + +#define prios_per_bucket(c) \ + ((bucket_bytes(c) - sizeof(struct prio_set)) / \ + sizeof(struct bucket_disk)) +#define prio_buckets(c) \ + DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c)) + +#define JSET_MAGIC 0x245235c1a3625032ULL +#define PSET_MAGIC 0x6750e15f87337f91ULL +#define BSET_MAGIC 0x90135c78b99e07f5ULL + +#define jset_magic(c) ((c)->sb.set_magic ^ JSET_MAGIC) +#define pset_magic(c) ((c)->sb.set_magic ^ PSET_MAGIC) +#define bset_magic(c) ((c)->sb.set_magic ^ BSET_MAGIC) + +/* Bkey fields: all units are in sectors */ + +#define KEY_FIELD(name, field, offset, size) \ + BITMASK(name, struct bkey, field, offset, size) + +#define PTR_FIELD(name, offset, size) \ + static inline uint64_t name(const struct bkey *k, unsigned i) \ + { return (k->ptr[i] >> offset) & ~(((uint64_t) ~0) << size); } \ + \ + static inline void SET_##name(struct bkey *k, unsigned i, uint64_t v)\ + { \ + k->ptr[i] &= ~(~((uint64_t) ~0 << size) << offset); \ + k->ptr[i] |= v << offset; \ + } + +KEY_FIELD(KEY_PTRS, high, 60, 3) +KEY_FIELD(HEADER_SIZE, high, 58, 2) +KEY_FIELD(KEY_CSUM, high, 56, 2) +KEY_FIELD(KEY_PINNED, high, 55, 1) +KEY_FIELD(KEY_DIRTY, high, 36, 1) + +KEY_FIELD(KEY_SIZE, high, 20, 16) +KEY_FIELD(KEY_INODE, high, 0, 20) + +/* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */ + +static inline uint64_t KEY_OFFSET(const struct bkey *k) +{ + return k->low; +} + +static inline void SET_KEY_OFFSET(struct bkey *k, uint64_t v) +{ + k->low = v; +} + +PTR_FIELD(PTR_DEV, 51, 12) +PTR_FIELD(PTR_OFFSET, 8, 43) +PTR_FIELD(PTR_GEN, 0, 8) + +#define PTR_CHECK_DEV ((1 << 12) - 1) + +#define PTR(gen, offset, dev) \ + ((((uint64_t) dev) << 51) | ((uint64_t) offset) << 8 | gen) + +static inline size_t sector_to_bucket(struct cache_set *c, sector_t s) +{ + return s >> c->bucket_bits; +} + +static inline sector_t bucket_to_sector(struct cache_set *c, size_t b) +{ + return ((sector_t) b) << c->bucket_bits; +} + +static inline sector_t bucket_remainder(struct cache_set *c, sector_t s) +{ + return s & (c->sb.bucket_size - 1); +} + +static inline struct cache *PTR_CACHE(struct cache_set *c, + const struct bkey *k, + unsigned ptr) +{ + return c->cache[PTR_DEV(k, ptr)]; +} + +static inline size_t PTR_BUCKET_NR(struct cache_set *c, + const struct bkey *k, + unsigned ptr) +{ + return sector_to_bucket(c, PTR_OFFSET(k, ptr)); +} + +static inline struct bucket *PTR_BUCKET(struct cache_set *c, + const struct bkey *k, + unsigned ptr) +{ + return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr); +} + +/* Btree key macros */ + +/* + * The high bit being set is a relic from when we used it to do binary + * searches - it told you where a key started. It's not used anymore, + * and can probably be safely dropped. + */ +#define KEY(dev, sector, len) (struct bkey) \ +{ \ + .high = (1ULL << 63) | ((uint64_t) (len) << 20) | (dev), \ + .low = (sector) \ +} + +static inline void bkey_init(struct bkey *k) +{ + *k = KEY(0, 0, 0); +} + +#define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k)) +#define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0) +#define MAX_KEY KEY(~(~0 << 20), ((uint64_t) ~0) >> 1, 0) +#define ZERO_KEY KEY(0, 0, 0) + +/* + * This is used for various on disk data structures - cache_sb, prio_set, bset, + * jset: The checksum is _always_ the first 8 bytes of these structs + */ +#define csum_set(i) \ + crc64(((void *) (i)) + sizeof(uint64_t), \ + ((void *) end(i)) - (((void *) (i)) + sizeof(uint64_t))) + +/* Error handling macros */ + +#define btree_bug(b, ...) \ +do { \ + if (bch_cache_set_error((b)->c, __VA_ARGS__)) \ + dump_stack(); \ +} while (0) + +#define cache_bug(c, ...) \ +do { \ + if (bch_cache_set_error(c, __VA_ARGS__)) \ + dump_stack(); \ +} while (0) + +#define btree_bug_on(cond, b, ...) \ +do { \ + if (cond) \ + btree_bug(b, __VA_ARGS__); \ +} while (0) + +#define cache_bug_on(cond, c, ...) \ +do { \ + if (cond) \ + cache_bug(c, __VA_ARGS__); \ +} while (0) + +#define cache_set_err_on(cond, c, ...) \ +do { \ + if (cond) \ + bch_cache_set_error(c, __VA_ARGS__); \ +} while (0) + +/* Looping macros */ + +#define for_each_cache(ca, cs, iter) \ + for (iter = 0; ca = cs->cache[iter], iter < (cs)->sb.nr_in_set; iter++) + +#define for_each_bucket(b, ca) \ + for (b = (ca)->buckets + (ca)->sb.first_bucket; \ + b < (ca)->buckets + (ca)->sb.nbuckets; b++) + +static inline void __bkey_put(struct cache_set *c, struct bkey *k) +{ + unsigned i; + + for (i = 0; i < KEY_PTRS(k); i++) + atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin); +} + +/* Blktrace macros */ + +#define blktrace_msg(c, fmt, ...) \ +do { \ + struct request_queue *q = bdev_get_queue(c->bdev); \ + if (q) \ + blk_add_trace_msg(q, fmt, ##__VA_ARGS__); \ +} while (0) + +#define blktrace_msg_all(s, fmt, ...) \ +do { \ + struct cache *_c; \ + unsigned i; \ + for_each_cache(_c, (s), i) \ + blktrace_msg(_c, fmt, ##__VA_ARGS__); \ +} while (0) + +static inline void cached_dev_put(struct cached_dev *dc) +{ + if (atomic_dec_and_test(&dc->count)) + schedule_work(&dc->detach); +} + +static inline bool cached_dev_get(struct cached_dev *dc) +{ + if (!atomic_inc_not_zero(&dc->count)) + return false; + + /* Paired with the mb in cached_dev_attach */ + smp_mb__after_atomic_inc(); + return true; +} + +/* + * bucket_gc_gen() returns the difference between the bucket's current gen and + * the oldest gen of any pointer into that bucket in the btree (last_gc). + * + * bucket_disk_gen() returns the difference between the current gen and the gen + * on disk; they're both used to make sure gens don't wrap around. + */ + +static inline uint8_t bucket_gc_gen(struct bucket *b) +{ + return b->gen - b->last_gc; +} + +static inline uint8_t bucket_disk_gen(struct bucket *b) +{ + return b->gen - b->disk_gen; +} + +#define BUCKET_GC_GEN_MAX 96U +#define BUCKET_DISK_GEN_MAX 64U + +#define kobj_attribute_write(n, fn) \ + static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn) + +#define kobj_attribute_rw(n, show, store) \ + static struct kobj_attribute ksysfs_##n = \ + __ATTR(n, S_IWUSR|S_IRUSR, show, store) + +/* Forward declarations */ + +void bch_writeback_queue(struct cached_dev *); +void bch_writeback_add(struct cached_dev *, unsigned); + +void bch_count_io_errors(struct cache *, int, const char *); +void bch_bbio_count_io_errors(struct cache_set *, struct bio *, + int, const char *); +void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *); +void bch_bbio_free(struct bio *, struct cache_set *); +struct bio *bch_bbio_alloc(struct cache_set *); + +struct bio *bch_bio_split(struct bio *, int, gfp_t, struct bio_set *); +void bch_generic_make_request(struct bio *, struct bio_split_pool *); +void __bch_submit_bbio(struct bio *, struct cache_set *); +void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned); + +uint8_t bch_inc_gen(struct cache *, struct bucket *); +void bch_rescale_priorities(struct cache_set *, int); +bool bch_bucket_add_unused(struct cache *, struct bucket *); +void bch_allocator_thread(struct closure *); + +long bch_bucket_alloc(struct cache *, unsigned, struct closure *); +void bch_bucket_free(struct cache_set *, struct bkey *); + +int __bch_bucket_alloc_set(struct cache_set *, unsigned, + struct bkey *, int, struct closure *); +int bch_bucket_alloc_set(struct cache_set *, unsigned, + struct bkey *, int, struct closure *); + +__printf(2, 3) +bool bch_cache_set_error(struct cache_set *, const char *, ...); + +void bch_prio_write(struct cache *); +void bch_write_bdev_super(struct cached_dev *, struct closure *); + +extern struct workqueue_struct *bcache_wq, *bch_gc_wq; +extern const char * const bch_cache_modes[]; +extern struct mutex bch_register_lock; +extern struct list_head bch_cache_sets; + +extern struct kobj_type bch_cached_dev_ktype; +extern struct kobj_type bch_flash_dev_ktype; +extern struct kobj_type bch_cache_set_ktype; +extern struct kobj_type bch_cache_set_internal_ktype; +extern struct kobj_type bch_cache_ktype; + +void bch_cached_dev_release(struct kobject *); +void bch_flash_dev_release(struct kobject *); +void bch_cache_set_release(struct kobject *); +void bch_cache_release(struct kobject *); + +int bch_uuid_write(struct cache_set *); +void bcache_write_super(struct cache_set *); + +int bch_flash_dev_create(struct cache_set *c, uint64_t size); + +int bch_cached_dev_attach(struct cached_dev *, struct cache_set *); +void bch_cached_dev_detach(struct cached_dev *); +void bch_cached_dev_run(struct cached_dev *); +void bcache_device_stop(struct bcache_device *); + +void bch_cache_set_unregister(struct cache_set *); +void bch_cache_set_stop(struct cache_set *); + +struct cache_set *bch_cache_set_alloc(struct cache_sb *); +void bch_btree_cache_free(struct cache_set *); +int bch_btree_cache_alloc(struct cache_set *); +void bch_writeback_init_cached_dev(struct cached_dev *); +void bch_moving_init_cache_set(struct cache_set *); + +void bch_cache_allocator_exit(struct cache *ca); +int bch_cache_allocator_init(struct cache *ca); + +void bch_debug_exit(void); +int bch_debug_init(struct kobject *); +void bch_writeback_exit(void); +int bch_writeback_init(void); +void bch_request_exit(void); +int bch_request_init(void); +void bch_btree_exit(void); +int bch_btree_init(void); + +#endif /* _BCACHE_H */ diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c new file mode 100644 index 000000000000..bb0f7ae14b3c --- /dev/null +++ b/drivers/md/bcache/bset.c @@ -0,0 +1,1190 @@ +/* + * Code for working with individual keys, and sorted sets of keys with in a + * btree node + * + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" + +#include + +/* Keylists */ + +void bch_keylist_copy(struct keylist *dest, struct keylist *src) +{ + *dest = *src; + + if (src->list == src->d) { + size_t n = (uint64_t *) src->top - src->d; + dest->top = (struct bkey *) &dest->d[n]; + dest->list = dest->d; + } +} + +int bch_keylist_realloc(struct keylist *l, int nptrs, struct cache_set *c) +{ + unsigned oldsize = (uint64_t *) l->top - l->list; + unsigned newsize = oldsize + 2 + nptrs; + uint64_t *new; + + /* The journalling code doesn't handle the case where the keys to insert + * is bigger than an empty write: If we just return -ENOMEM here, + * bio_insert() and bio_invalidate() will insert the keys created so far + * and finish the rest when the keylist is empty. + */ + if (newsize * sizeof(uint64_t) > block_bytes(c) - sizeof(struct jset)) + return -ENOMEM; + + newsize = roundup_pow_of_two(newsize); + + if (newsize <= KEYLIST_INLINE || + roundup_pow_of_two(oldsize) == newsize) + return 0; + + new = krealloc(l->list == l->d ? NULL : l->list, + sizeof(uint64_t) * newsize, GFP_NOIO); + + if (!new) + return -ENOMEM; + + if (l->list == l->d) + memcpy(new, l->list, sizeof(uint64_t) * KEYLIST_INLINE); + + l->list = new; + l->top = (struct bkey *) (&l->list[oldsize]); + + return 0; +} + +struct bkey *bch_keylist_pop(struct keylist *l) +{ + struct bkey *k = l->bottom; + + if (k == l->top) + return NULL; + + while (bkey_next(k) != l->top) + k = bkey_next(k); + + return l->top = k; +} + +/* Pointer validation */ + +bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k) +{ + unsigned i; + + if (level && (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))) + goto bad; + + if (!level && KEY_SIZE(k) > KEY_OFFSET(k)) + goto bad; + + if (!KEY_SIZE(k)) + return true; + + for (i = 0; i < KEY_PTRS(k); i++) + if (ptr_available(c, k, i)) { + struct cache *ca = PTR_CACHE(c, k, i); + size_t bucket = PTR_BUCKET_NR(c, k, i); + size_t r = bucket_remainder(c, PTR_OFFSET(k, i)); + + if (KEY_SIZE(k) + r > c->sb.bucket_size || + bucket < ca->sb.first_bucket || + bucket >= ca->sb.nbuckets) + goto bad; + } + + return false; +bad: + cache_bug(c, "spotted bad key %s: %s", pkey(k), bch_ptr_status(c, k)); + return true; +} + +bool bch_ptr_bad(struct btree *b, const struct bkey *k) +{ + struct bucket *g; + unsigned i, stale; + + if (!bkey_cmp(k, &ZERO_KEY) || + !KEY_PTRS(k) || + bch_ptr_invalid(b, k)) + return true; + + if (KEY_PTRS(k) && PTR_DEV(k, 0) == PTR_CHECK_DEV) + return true; + + for (i = 0; i < KEY_PTRS(k); i++) + if (ptr_available(b->c, k, i)) { + g = PTR_BUCKET(b->c, k, i); + stale = ptr_stale(b->c, k, i); + + btree_bug_on(stale > 96, b, + "key too stale: %i, need_gc %u", + stale, b->c->need_gc); + + btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k), + b, "stale dirty pointer"); + + if (stale) + return true; + +#ifdef CONFIG_BCACHE_EDEBUG + if (!mutex_trylock(&b->c->bucket_lock)) + continue; + + if (b->level) { + if (KEY_DIRTY(k) || + g->prio != BTREE_PRIO || + (b->c->gc_mark_valid && + GC_MARK(g) != GC_MARK_METADATA)) + goto bug; + + } else { + if (g->prio == BTREE_PRIO) + goto bug; + + if (KEY_DIRTY(k) && + b->c->gc_mark_valid && + GC_MARK(g) != GC_MARK_DIRTY) + goto bug; + } + mutex_unlock(&b->c->bucket_lock); +#endif + } + + return false; +#ifdef CONFIG_BCACHE_EDEBUG +bug: + mutex_unlock(&b->c->bucket_lock); + btree_bug(b, "inconsistent pointer %s: bucket %li pin %i " + "prio %i gen %i last_gc %i mark %llu gc_gen %i", pkey(k), + PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), + g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); + return true; +#endif +} + +/* Key/pointer manipulation */ + +void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src, + unsigned i) +{ + BUG_ON(i > KEY_PTRS(src)); + + /* Only copy the header, key, and one pointer. */ + memcpy(dest, src, 2 * sizeof(uint64_t)); + dest->ptr[0] = src->ptr[i]; + SET_KEY_PTRS(dest, 1); + /* We didn't copy the checksum so clear that bit. */ + SET_KEY_CSUM(dest, 0); +} + +bool __bch_cut_front(const struct bkey *where, struct bkey *k) +{ + unsigned i, len = 0; + + if (bkey_cmp(where, &START_KEY(k)) <= 0) + return false; + + if (bkey_cmp(where, k) < 0) + len = KEY_OFFSET(k) - KEY_OFFSET(where); + else + bkey_copy_key(k, where); + + for (i = 0; i < KEY_PTRS(k); i++) + SET_PTR_OFFSET(k, i, PTR_OFFSET(k, i) + KEY_SIZE(k) - len); + + BUG_ON(len > KEY_SIZE(k)); + SET_KEY_SIZE(k, len); + return true; +} + +bool __bch_cut_back(const struct bkey *where, struct bkey *k) +{ + unsigned len = 0; + + if (bkey_cmp(where, k) >= 0) + return false; + + BUG_ON(KEY_INODE(where) != KEY_INODE(k)); + + if (bkey_cmp(where, &START_KEY(k)) > 0) + len = KEY_OFFSET(where) - KEY_START(k); + + bkey_copy_key(k, where); + + BUG_ON(len > KEY_SIZE(k)); + SET_KEY_SIZE(k, len); + return true; +} + +static uint64_t merge_chksums(struct bkey *l, struct bkey *r) +{ + return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) & + ~((uint64_t)1 << 63); +} + +/* Tries to merge l and r: l should be lower than r + * Returns true if we were able to merge. If we did merge, l will be the merged + * key, r will be untouched. + */ +bool bch_bkey_try_merge(struct btree *b, struct bkey *l, struct bkey *r) +{ + unsigned i; + + if (key_merging_disabled(b->c)) + return false; + + if (KEY_PTRS(l) != KEY_PTRS(r) || + KEY_DIRTY(l) != KEY_DIRTY(r) || + bkey_cmp(l, &START_KEY(r))) + return false; + + for (i = 0; i < KEY_PTRS(l); i++) + if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || + PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i)) + return false; + + /* Keys with no pointers aren't restricted to one bucket and could + * overflow KEY_SIZE + */ + if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) { + SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l)); + SET_KEY_SIZE(l, USHRT_MAX); + + bch_cut_front(l, r); + return false; + } + + if (KEY_CSUM(l)) { + if (KEY_CSUM(r)) + l->ptr[KEY_PTRS(l)] = merge_chksums(l, r); + else + SET_KEY_CSUM(l, 0); + } + + SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r)); + SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r)); + + return true; +} + +/* Binary tree stuff for auxiliary search trees */ + +static unsigned inorder_next(unsigned j, unsigned size) +{ + if (j * 2 + 1 < size) { + j = j * 2 + 1; + + while (j * 2 < size) + j *= 2; + } else + j >>= ffz(j) + 1; + + return j; +} + +static unsigned inorder_prev(unsigned j, unsigned size) +{ + if (j * 2 < size) { + j = j * 2; + + while (j * 2 + 1 < size) + j = j * 2 + 1; + } else + j >>= ffs(j); + + return j; +} + +/* I have no idea why this code works... and I'm the one who wrote it + * + * However, I do know what it does: + * Given a binary tree constructed in an array (i.e. how you normally implement + * a heap), it converts a node in the tree - referenced by array index - to the + * index it would have if you did an inorder traversal. + * + * Also tested for every j, size up to size somewhere around 6 million. + * + * The binary tree starts at array index 1, not 0 + * extra is a function of size: + * extra = (size - rounddown_pow_of_two(size - 1)) << 1; + */ +static unsigned __to_inorder(unsigned j, unsigned size, unsigned extra) +{ + unsigned b = fls(j); + unsigned shift = fls(size - 1) - b; + + j ^= 1U << (b - 1); + j <<= 1; + j |= 1; + j <<= shift; + + if (j > extra) + j -= (j - extra) >> 1; + + return j; +} + +static unsigned to_inorder(unsigned j, struct bset_tree *t) +{ + return __to_inorder(j, t->size, t->extra); +} + +static unsigned __inorder_to_tree(unsigned j, unsigned size, unsigned extra) +{ + unsigned shift; + + if (j > extra) + j += j - extra; + + shift = ffs(j); + + j >>= shift; + j |= roundup_pow_of_two(size) >> shift; + + return j; +} + +static unsigned inorder_to_tree(unsigned j, struct bset_tree *t) +{ + return __inorder_to_tree(j, t->size, t->extra); +} + +#if 0 +void inorder_test(void) +{ + unsigned long done = 0; + ktime_t start = ktime_get(); + + for (unsigned size = 2; + size < 65536000; + size++) { + unsigned extra = (size - rounddown_pow_of_two(size - 1)) << 1; + unsigned i = 1, j = rounddown_pow_of_two(size - 1); + + if (!(size % 4096)) + printk(KERN_NOTICE "loop %u, %llu per us\n", size, + done / ktime_us_delta(ktime_get(), start)); + + while (1) { + if (__inorder_to_tree(i, size, extra) != j) + panic("size %10u j %10u i %10u", size, j, i); + + if (__to_inorder(j, size, extra) != i) + panic("size %10u j %10u i %10u", size, j, i); + + if (j == rounddown_pow_of_two(size) - 1) + break; + + BUG_ON(inorder_prev(inorder_next(j, size), size) != j); + + j = inorder_next(j, size); + i++; + } + + done += size - 1; + } +} +#endif + +/* + * Cacheline/offset <-> bkey pointer arithmatic: + * + * t->tree is a binary search tree in an array; each node corresponds to a key + * in one cacheline in t->set (BSET_CACHELINE bytes). + * + * This means we don't have to store the full index of the key that a node in + * the binary tree points to; to_inorder() gives us the cacheline, and then + * bkey_float->m gives us the offset within that cacheline, in units of 8 bytes. + * + * cacheline_to_bkey() and friends abstract out all the pointer arithmatic to + * make this work. + * + * To construct the bfloat for an arbitrary key we need to know what the key + * immediately preceding it is: we have to check if the two keys differ in the + * bits we're going to store in bkey_float->mantissa. t->prev[j] stores the size + * of the previous key so we can walk backwards to it from t->tree[j]'s key. + */ + +static struct bkey *cacheline_to_bkey(struct bset_tree *t, unsigned cacheline, + unsigned offset) +{ + return ((void *) t->data) + cacheline * BSET_CACHELINE + offset * 8; +} + +static unsigned bkey_to_cacheline(struct bset_tree *t, struct bkey *k) +{ + return ((void *) k - (void *) t->data) / BSET_CACHELINE; +} + +static unsigned bkey_to_cacheline_offset(struct bkey *k) +{ + return ((size_t) k & (BSET_CACHELINE - 1)) / sizeof(uint64_t); +} + +static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned j) +{ + return cacheline_to_bkey(t, to_inorder(j, t), t->tree[j].m); +} + +static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned j) +{ + return (void *) (((uint64_t *) tree_to_bkey(t, j)) - t->prev[j]); +} + +/* + * For the write set - the one we're currently inserting keys into - we don't + * maintain a full search tree, we just keep a simple lookup table in t->prev. + */ +static struct bkey *table_to_bkey(struct bset_tree *t, unsigned cacheline) +{ + return cacheline_to_bkey(t, cacheline, t->prev[cacheline]); +} + +static inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift) +{ +#ifdef CONFIG_X86_64 + asm("shrd %[shift],%[high],%[low]" + : [low] "+Rm" (low) + : [high] "R" (high), + [shift] "ci" (shift) + : "cc"); +#else + low >>= shift; + low |= (high << 1) << (63U - shift); +#endif + return low; +} + +static inline unsigned bfloat_mantissa(const struct bkey *k, + struct bkey_float *f) +{ + const uint64_t *p = &k->low - (f->exponent >> 6); + return shrd128(p[-1], p[0], f->exponent & 63) & BKEY_MANTISSA_MASK; +} + +static void make_bfloat(struct bset_tree *t, unsigned j) +{ + struct bkey_float *f = &t->tree[j]; + struct bkey *m = tree_to_bkey(t, j); + struct bkey *p = tree_to_prev_bkey(t, j); + + struct bkey *l = is_power_of_2(j) + ? t->data->start + : tree_to_prev_bkey(t, j >> ffs(j)); + + struct bkey *r = is_power_of_2(j + 1) + ? node(t->data, t->data->keys - bkey_u64s(&t->end)) + : tree_to_bkey(t, j >> (ffz(j) + 1)); + + BUG_ON(m < l || m > r); + BUG_ON(bkey_next(p) != m); + + if (KEY_INODE(l) != KEY_INODE(r)) + f->exponent = fls64(KEY_INODE(r) ^ KEY_INODE(l)) + 64; + else + f->exponent = fls64(r->low ^ l->low); + + f->exponent = max_t(int, f->exponent - BKEY_MANTISSA_BITS, 0); + + /* + * Setting f->exponent = 127 flags this node as failed, and causes the + * lookup code to fall back to comparing against the original key. + */ + + if (bfloat_mantissa(m, f) != bfloat_mantissa(p, f)) + f->mantissa = bfloat_mantissa(m, f) - 1; + else + f->exponent = 127; +} + +static void bset_alloc_tree(struct btree *b, struct bset_tree *t) +{ + if (t != b->sets) { + unsigned j = roundup(t[-1].size, + 64 / sizeof(struct bkey_float)); + + t->tree = t[-1].tree + j; + t->prev = t[-1].prev + j; + } + + while (t < b->sets + MAX_BSETS) + t++->size = 0; +} + +static void bset_build_unwritten_tree(struct btree *b) +{ + struct bset_tree *t = b->sets + b->nsets; + + bset_alloc_tree(b, t); + + if (t->tree != b->sets->tree + bset_tree_space(b)) { + t->prev[0] = bkey_to_cacheline_offset(t->data->start); + t->size = 1; + } +} + +static void bset_build_written_tree(struct btree *b) +{ + struct bset_tree *t = b->sets + b->nsets; + struct bkey *k = t->data->start; + unsigned j, cacheline = 1; + + bset_alloc_tree(b, t); + + t->size = min_t(unsigned, + bkey_to_cacheline(t, end(t->data)), + b->sets->tree + bset_tree_space(b) - t->tree); + + if (t->size < 2) { + t->size = 0; + return; + } + + t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1; + + /* First we figure out where the first key in each cacheline is */ + for (j = inorder_next(0, t->size); + j; + j = inorder_next(j, t->size)) { + while (bkey_to_cacheline(t, k) != cacheline) + k = bkey_next(k); + + t->prev[j] = bkey_u64s(k); + k = bkey_next(k); + cacheline++; + t->tree[j].m = bkey_to_cacheline_offset(k); + } + + while (bkey_next(k) != end(t->data)) + k = bkey_next(k); + + t->end = *k; + + /* Then we build the tree */ + for (j = inorder_next(0, t->size); + j; + j = inorder_next(j, t->size)) + make_bfloat(t, j); +} + +void bch_bset_fix_invalidated_key(struct btree *b, struct bkey *k) +{ + struct bset_tree *t; + unsigned inorder, j = 1; + + for (t = b->sets; t <= &b->sets[b->nsets]; t++) + if (k < end(t->data)) + goto found_set; + + BUG(); +found_set: + if (!t->size || !bset_written(b, t)) + return; + + inorder = bkey_to_cacheline(t, k); + + if (k == t->data->start) + goto fix_left; + + if (bkey_next(k) == end(t->data)) { + t->end = *k; + goto fix_right; + } + + j = inorder_to_tree(inorder, t); + + if (j && + j < t->size && + k == tree_to_bkey(t, j)) +fix_left: do { + make_bfloat(t, j); + j = j * 2; + } while (j < t->size); + + j = inorder_to_tree(inorder + 1, t); + + if (j && + j < t->size && + k == tree_to_prev_bkey(t, j)) +fix_right: do { + make_bfloat(t, j); + j = j * 2 + 1; + } while (j < t->size); +} + +void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) +{ + struct bset_tree *t = &b->sets[b->nsets]; + unsigned shift = bkey_u64s(k); + unsigned j = bkey_to_cacheline(t, k); + + /* We're getting called from btree_split() or btree_gc, just bail out */ + if (!t->size) + return; + + /* k is the key we just inserted; we need to find the entry in the + * lookup table for the first key that is strictly greater than k: + * it's either k's cacheline or the next one + */ + if (j < t->size && + table_to_bkey(t, j) <= k) + j++; + + /* Adjust all the lookup table entries, and find a new key for any that + * have gotten too big + */ + for (; j < t->size; j++) { + t->prev[j] += shift; + + if (t->prev[j] > 7) { + k = table_to_bkey(t, j - 1); + + while (k < cacheline_to_bkey(t, j, 0)) + k = bkey_next(k); + + t->prev[j] = bkey_to_cacheline_offset(k); + } + } + + if (t->size == b->sets->tree + bset_tree_space(b) - t->tree) + return; + + /* Possibly add a new entry to the end of the lookup table */ + + for (k = table_to_bkey(t, t->size - 1); + k != end(t->data); + k = bkey_next(k)) + if (t->size == bkey_to_cacheline(t, k)) { + t->prev[t->size] = bkey_to_cacheline_offset(k); + t->size++; + } +} + +void bch_bset_init_next(struct btree *b) +{ + struct bset *i = write_block(b); + + if (i != b->sets[0].data) { + b->sets[++b->nsets].data = i; + i->seq = b->sets[0].data->seq; + } else + get_random_bytes(&i->seq, sizeof(uint64_t)); + + i->magic = bset_magic(b->c); + i->version = 0; + i->keys = 0; + + bset_build_unwritten_tree(b); +} + +struct bset_search_iter { + struct bkey *l, *r; +}; + +static struct bset_search_iter bset_search_write_set(struct btree *b, + struct bset_tree *t, + const struct bkey *search) +{ + unsigned li = 0, ri = t->size; + + BUG_ON(!b->nsets && + t->size < bkey_to_cacheline(t, end(t->data))); + + while (li + 1 != ri) { + unsigned m = (li + ri) >> 1; + + if (bkey_cmp(table_to_bkey(t, m), search) > 0) + ri = m; + else + li = m; + } + + return (struct bset_search_iter) { + table_to_bkey(t, li), + ri < t->size ? table_to_bkey(t, ri) : end(t->data) + }; +} + +static struct bset_search_iter bset_search_tree(struct btree *b, + struct bset_tree *t, + const struct bkey *search) +{ + struct bkey *l, *r; + struct bkey_float *f; + unsigned inorder, j, n = 1; + + do { + unsigned p = n << 4; + p &= ((int) (p - t->size)) >> 31; + + prefetch(&t->tree[p]); + + j = n; + f = &t->tree[j]; + + /* + * n = (f->mantissa > bfloat_mantissa()) + * ? j * 2 + * : j * 2 + 1; + * + * We need to subtract 1 from f->mantissa for the sign bit trick + * to work - that's done in make_bfloat() + */ + if (likely(f->exponent != 127)) + n = j * 2 + (((unsigned) + (f->mantissa - + bfloat_mantissa(search, f))) >> 31); + else + n = (bkey_cmp(tree_to_bkey(t, j), search) > 0) + ? j * 2 + : j * 2 + 1; + } while (n < t->size); + + inorder = to_inorder(j, t); + + /* + * n would have been the node we recursed to - the low bit tells us if + * we recursed left or recursed right. + */ + if (n & 1) { + l = cacheline_to_bkey(t, inorder, f->m); + + if (++inorder != t->size) { + f = &t->tree[inorder_next(j, t->size)]; + r = cacheline_to_bkey(t, inorder, f->m); + } else + r = end(t->data); + } else { + r = cacheline_to_bkey(t, inorder, f->m); + + if (--inorder) { + f = &t->tree[inorder_prev(j, t->size)]; + l = cacheline_to_bkey(t, inorder, f->m); + } else + l = t->data->start; + } + + return (struct bset_search_iter) {l, r}; +} + +struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t, + const struct bkey *search) +{ + struct bset_search_iter i; + + /* + * First, we search for a cacheline, then lastly we do a linear search + * within that cacheline. + * + * To search for the cacheline, there's three different possibilities: + * * The set is too small to have a search tree, so we just do a linear + * search over the whole set. + * * The set is the one we're currently inserting into; keeping a full + * auxiliary search tree up to date would be too expensive, so we + * use a much simpler lookup table to do a binary search - + * bset_search_write_set(). + * * Or we use the auxiliary search tree we constructed earlier - + * bset_search_tree() + */ + + if (unlikely(!t->size)) { + i.l = t->data->start; + i.r = end(t->data); + } else if (bset_written(b, t)) { + /* + * Each node in the auxiliary search tree covers a certain range + * of bits, and keys above and below the set it covers might + * differ outside those bits - so we have to special case the + * start and end - handle that here: + */ + + if (unlikely(bkey_cmp(search, &t->end) >= 0)) + return end(t->data); + + if (unlikely(bkey_cmp(search, t->data->start) < 0)) + return t->data->start; + + i = bset_search_tree(b, t, search); + } else + i = bset_search_write_set(b, t, search); + +#ifdef CONFIG_BCACHE_EDEBUG + BUG_ON(bset_written(b, t) && + i.l != t->data->start && + bkey_cmp(tree_to_prev_bkey(t, + inorder_to_tree(bkey_to_cacheline(t, i.l), t)), + search) > 0); + + BUG_ON(i.r != end(t->data) && + bkey_cmp(i.r, search) <= 0); +#endif + + while (likely(i.l != i.r) && + bkey_cmp(i.l, search) <= 0) + i.l = bkey_next(i.l); + + return i.l; +} + +/* Btree iterator */ + +static inline bool btree_iter_cmp(struct btree_iter_set l, + struct btree_iter_set r) +{ + int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k)); + + return c ? c > 0 : l.k < r.k; +} + +static inline bool btree_iter_end(struct btree_iter *iter) +{ + return !iter->used; +} + +void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, + struct bkey *end) +{ + if (k != end) + BUG_ON(!heap_add(iter, + ((struct btree_iter_set) { k, end }), + btree_iter_cmp)); +} + +struct bkey *__bch_btree_iter_init(struct btree *b, struct btree_iter *iter, + struct bkey *search, struct bset_tree *start) +{ + struct bkey *ret = NULL; + iter->size = ARRAY_SIZE(iter->data); + iter->used = 0; + + for (; start <= &b->sets[b->nsets]; start++) { + ret = bch_bset_search(b, start, search); + bch_btree_iter_push(iter, ret, end(start->data)); + } + + return ret; +} + +struct bkey *bch_btree_iter_next(struct btree_iter *iter) +{ + struct btree_iter_set unused; + struct bkey *ret = NULL; + + if (!btree_iter_end(iter)) { + ret = iter->data->k; + iter->data->k = bkey_next(iter->data->k); + + if (iter->data->k > iter->data->end) { + __WARN(); + iter->data->k = iter->data->end; + } + + if (iter->data->k == iter->data->end) + heap_pop(iter, unused, btree_iter_cmp); + else + heap_sift(iter, 0, btree_iter_cmp); + } + + return ret; +} + +struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, + struct btree *b, ptr_filter_fn fn) +{ + struct bkey *ret; + + do { + ret = bch_btree_iter_next(iter); + } while (ret && fn(b, ret)); + + return ret; +} + +struct bkey *bch_next_recurse_key(struct btree *b, struct bkey *search) +{ + struct btree_iter iter; + + bch_btree_iter_init(b, &iter, search); + return bch_btree_iter_next_filter(&iter, b, bch_ptr_bad); +} + +/* Mergesort */ + +static void btree_sort_fixup(struct btree_iter *iter) +{ + while (iter->used > 1) { + struct btree_iter_set *top = iter->data, *i = top + 1; + struct bkey *k; + + if (iter->used > 2 && + btree_iter_cmp(i[0], i[1])) + i++; + + for (k = i->k; + k != i->end && bkey_cmp(top->k, &START_KEY(k)) > 0; + k = bkey_next(k)) + if (top->k > i->k) + __bch_cut_front(top->k, k); + else if (KEY_SIZE(k)) + bch_cut_back(&START_KEY(k), top->k); + + if (top->k < i->k || k == i->k) + break; + + heap_sift(iter, i - top, btree_iter_cmp); + } +} + +static void btree_mergesort(struct btree *b, struct bset *out, + struct btree_iter *iter, + bool fixup, bool remove_stale) +{ + struct bkey *k, *last = NULL; + bool (*bad)(struct btree *, const struct bkey *) = remove_stale + ? bch_ptr_bad + : bch_ptr_invalid; + + while (!btree_iter_end(iter)) { + if (fixup && !b->level) + btree_sort_fixup(iter); + + k = bch_btree_iter_next(iter); + if (bad(b, k)) + continue; + + if (!last) { + last = out->start; + bkey_copy(last, k); + } else if (b->level || + !bch_bkey_try_merge(b, last, k)) { + last = bkey_next(last); + bkey_copy(last, k); + } + } + + out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0; + + pr_debug("sorted %i keys", out->keys); + bch_check_key_order(b, out); +} + +static void __btree_sort(struct btree *b, struct btree_iter *iter, + unsigned start, unsigned order, bool fixup) +{ + uint64_t start_time; + bool remove_stale = !b->written; + struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, + order); + if (!out) { + mutex_lock(&b->c->sort_lock); + out = b->c->sort; + order = ilog2(bucket_pages(b->c)); + } + + start_time = local_clock(); + + btree_mergesort(b, out, iter, fixup, remove_stale); + b->nsets = start; + + if (!fixup && !start && b->written) + bch_btree_verify(b, out); + + if (!start && order == b->page_order) { + /* + * Our temporary buffer is the same size as the btree node's + * buffer, we can just swap buffers instead of doing a big + * memcpy() + */ + + out->magic = bset_magic(b->c); + out->seq = b->sets[0].data->seq; + out->version = b->sets[0].data->version; + swap(out, b->sets[0].data); + + if (b->c->sort == b->sets[0].data) + b->c->sort = out; + } else { + b->sets[start].data->keys = out->keys; + memcpy(b->sets[start].data->start, out->start, + (void *) end(out) - (void *) out->start); + } + + if (out == b->c->sort) + mutex_unlock(&b->c->sort_lock); + else + free_pages((unsigned long) out, order); + + if (b->written) + bset_build_written_tree(b); + + if (!start) { + spin_lock(&b->c->sort_time_lock); + time_stats_update(&b->c->sort_time, start_time); + spin_unlock(&b->c->sort_time_lock); + } +} + +void bch_btree_sort_partial(struct btree *b, unsigned start) +{ + size_t oldsize = 0, order = b->page_order, keys = 0; + struct btree_iter iter; + __bch_btree_iter_init(b, &iter, NULL, &b->sets[start]); + + BUG_ON(b->sets[b->nsets].data == write_block(b) && + (b->sets[b->nsets].size || b->nsets)); + + if (b->written) + oldsize = bch_count_data(b); + + if (start) { + unsigned i; + + for (i = start; i <= b->nsets; i++) + keys += b->sets[i].data->keys; + + order = roundup_pow_of_two(__set_bytes(b->sets->data, keys)) / PAGE_SIZE; + if (order) + order = ilog2(order); + } + + __btree_sort(b, &iter, start, order, false); + + EBUG_ON(b->written && bch_count_data(b) != oldsize); +} + +void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter) +{ + BUG_ON(!b->written); + __btree_sort(b, iter, 0, b->page_order, true); +} + +void bch_btree_sort_into(struct btree *b, struct btree *new) +{ + uint64_t start_time = local_clock(); + + struct btree_iter iter; + bch_btree_iter_init(b, &iter, NULL); + + btree_mergesort(b, new->sets->data, &iter, false, true); + + spin_lock(&b->c->sort_time_lock); + time_stats_update(&b->c->sort_time, start_time); + spin_unlock(&b->c->sort_time_lock); + + bkey_copy_key(&new->key, &b->key); + new->sets->size = 0; +} + +void bch_btree_sort_lazy(struct btree *b) +{ + if (b->nsets) { + unsigned i, j, keys = 0, total; + + for (i = 0; i <= b->nsets; i++) + keys += b->sets[i].data->keys; + + total = keys; + + for (j = 0; j < b->nsets; j++) { + if (keys * 2 < total || + keys < 1000) { + bch_btree_sort_partial(b, j); + return; + } + + keys -= b->sets[j].data->keys; + } + + /* Must sort if b->nsets == 3 or we'll overflow */ + if (b->nsets >= (MAX_BSETS - 1) - b->level) { + bch_btree_sort(b); + return; + } + } + + bset_build_written_tree(b); +} + +/* Sysfs stuff */ + +struct bset_stats { + size_t nodes; + size_t sets_written, sets_unwritten; + size_t bytes_written, bytes_unwritten; + size_t floats, failed; +}; + +static int bch_btree_bset_stats(struct btree *b, struct btree_op *op, + struct bset_stats *stats) +{ + struct bkey *k; + unsigned i; + + stats->nodes++; + + for (i = 0; i <= b->nsets; i++) { + struct bset_tree *t = &b->sets[i]; + size_t bytes = t->data->keys * sizeof(uint64_t); + size_t j; + + if (bset_written(b, t)) { + stats->sets_written++; + stats->bytes_written += bytes; + + stats->floats += t->size - 1; + + for (j = 1; j < t->size; j++) + if (t->tree[j].exponent == 127) + stats->failed++; + } else { + stats->sets_unwritten++; + stats->bytes_unwritten += bytes; + } + } + + if (b->level) { + struct btree_iter iter; + + for_each_key_filter(b, k, &iter, bch_ptr_bad) { + int ret = btree(bset_stats, k, b, op, stats); + if (ret) + return ret; + } + } + + return 0; +} + +int bch_bset_print_stats(struct cache_set *c, char *buf) +{ + struct btree_op op; + struct bset_stats t; + int ret; + + bch_btree_op_init_stack(&op); + memset(&t, 0, sizeof(struct bset_stats)); + + ret = btree_root(bset_stats, c, &op, &t); + if (ret) + return ret; + + return snprintf(buf, PAGE_SIZE, + "btree nodes: %zu\n" + "written sets: %zu\n" + "unwritten sets: %zu\n" + "written key bytes: %zu\n" + "unwritten key bytes: %zu\n" + "floats: %zu\n" + "failed: %zu\n", + t.nodes, + t.sets_written, t.sets_unwritten, + t.bytes_written, t.bytes_unwritten, + t.floats, t.failed); +} diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h new file mode 100644 index 000000000000..57a9cff41546 --- /dev/null +++ b/drivers/md/bcache/bset.h @@ -0,0 +1,379 @@ +#ifndef _BCACHE_BSET_H +#define _BCACHE_BSET_H + +/* + * BKEYS: + * + * A bkey contains a key, a size field, a variable number of pointers, and some + * ancillary flag bits. + * + * We use two different functions for validating bkeys, bch_ptr_invalid and + * bch_ptr_bad(). + * + * bch_ptr_invalid() primarily filters out keys and pointers that would be + * invalid due to some sort of bug, whereas bch_ptr_bad() filters out keys and + * pointer that occur in normal practice but don't point to real data. + * + * The one exception to the rule that ptr_invalid() filters out invalid keys is + * that it also filters out keys of size 0 - these are keys that have been + * completely overwritten. It'd be safe to delete these in memory while leaving + * them on disk, just unnecessary work - so we filter them out when resorting + * instead. + * + * We can't filter out stale keys when we're resorting, because garbage + * collection needs to find them to ensure bucket gens don't wrap around - + * unless we're rewriting the btree node those stale keys still exist on disk. + * + * We also implement functions here for removing some number of sectors from the + * front or the back of a bkey - this is mainly used for fixing overlapping + * extents, by removing the overlapping sectors from the older key. + * + * BSETS: + * + * A bset is an array of bkeys laid out contiguously in memory in sorted order, + * along with a header. A btree node is made up of a number of these, written at + * different times. + * + * There could be many of them on disk, but we never allow there to be more than + * 4 in memory - we lazily resort as needed. + * + * We implement code here for creating and maintaining auxiliary search trees + * (described below) for searching an individial bset, and on top of that we + * implement a btree iterator. + * + * BTREE ITERATOR: + * + * Most of the code in bcache doesn't care about an individual bset - it needs + * to search entire btree nodes and iterate over them in sorted order. + * + * The btree iterator code serves both functions; it iterates through the keys + * in a btree node in sorted order, starting from either keys after a specific + * point (if you pass it a search key) or the start of the btree node. + * + * AUXILIARY SEARCH TREES: + * + * Since keys are variable length, we can't use a binary search on a bset - we + * wouldn't be able to find the start of the next key. But binary searches are + * slow anyways, due to terrible cache behaviour; bcache originally used binary + * searches and that code topped out at under 50k lookups/second. + * + * So we need to construct some sort of lookup table. Since we only insert keys + * into the last (unwritten) set, most of the keys within a given btree node are + * usually in sets that are mostly constant. We use two different types of + * lookup tables to take advantage of this. + * + * Both lookup tables share in common that they don't index every key in the + * set; they index one key every BSET_CACHELINE bytes, and then a linear search + * is used for the rest. + * + * For sets that have been written to disk and are no longer being inserted + * into, we construct a binary search tree in an array - traversing a binary + * search tree in an array gives excellent locality of reference and is very + * fast, since both children of any node are adjacent to each other in memory + * (and their grandchildren, and great grandchildren...) - this means + * prefetching can be used to great effect. + * + * It's quite useful performance wise to keep these nodes small - not just + * because they're more likely to be in L2, but also because we can prefetch + * more nodes on a single cacheline and thus prefetch more iterations in advance + * when traversing this tree. + * + * Nodes in the auxiliary search tree must contain both a key to compare against + * (we don't want to fetch the key from the set, that would defeat the purpose), + * and a pointer to the key. We use a few tricks to compress both of these. + * + * To compress the pointer, we take advantage of the fact that one node in the + * search tree corresponds to precisely BSET_CACHELINE bytes in the set. We have + * a function (to_inorder()) that takes the index of a node in a binary tree and + * returns what its index would be in an inorder traversal, so we only have to + * store the low bits of the offset. + * + * The key is 84 bits (KEY_DEV + key->key, the offset on the device). To + * compress that, we take advantage of the fact that when we're traversing the + * search tree at every iteration we know that both our search key and the key + * we're looking for lie within some range - bounded by our previous + * comparisons. (We special case the start of a search so that this is true even + * at the root of the tree). + * + * So we know the key we're looking for is between a and b, and a and b don't + * differ higher than bit 50, we don't need to check anything higher than bit + * 50. + * + * We don't usually need the rest of the bits, either; we only need enough bits + * to partition the key range we're currently checking. Consider key n - the + * key our auxiliary search tree node corresponds to, and key p, the key + * immediately preceding n. The lowest bit we need to store in the auxiliary + * search tree is the highest bit that differs between n and p. + * + * Note that this could be bit 0 - we might sometimes need all 80 bits to do the + * comparison. But we'd really like our nodes in the auxiliary search tree to be + * of fixed size. + * + * The solution is to make them fixed size, and when we're constructing a node + * check if p and n differed in the bits we needed them to. If they don't we + * flag that node, and when doing lookups we fallback to comparing against the + * real key. As long as this doesn't happen to often (and it seems to reliably + * happen a bit less than 1% of the time), we win - even on failures, that key + * is then more likely to be in cache than if we were doing binary searches all + * the way, since we're touching so much less memory. + * + * The keys in the auxiliary search tree are stored in (software) floating + * point, with an exponent and a mantissa. The exponent needs to be big enough + * to address all the bits in the original key, but the number of bits in the + * mantissa is somewhat arbitrary; more bits just gets us fewer failures. + * + * We need 7 bits for the exponent and 3 bits for the key's offset (since keys + * are 8 byte aligned); using 22 bits for the mantissa means a node is 4 bytes. + * We need one node per 128 bytes in the btree node, which means the auxiliary + * search trees take up 3% as much memory as the btree itself. + * + * Constructing these auxiliary search trees is moderately expensive, and we + * don't want to be constantly rebuilding the search tree for the last set + * whenever we insert another key into it. For the unwritten set, we use a much + * simpler lookup table - it's just a flat array, so index i in the lookup table + * corresponds to the i range of BSET_CACHELINE bytes in the set. Indexing + * within each byte range works the same as with the auxiliary search trees. + * + * These are much easier to keep up to date when we insert a key - we do it + * somewhat lazily; when we shift a key up we usually just increment the pointer + * to it, only when it would overflow do we go to the trouble of finding the + * first key in that range of bytes again. + */ + +/* Btree key comparison/iteration */ + +struct btree_iter { + size_t size, used; + struct btree_iter_set { + struct bkey *k, *end; + } data[MAX_BSETS]; +}; + +struct bset_tree { + /* + * We construct a binary tree in an array as if the array + * started at 1, so that things line up on the same cachelines + * better: see comments in bset.c at cacheline_to_bkey() for + * details + */ + + /* size of the binary tree and prev array */ + unsigned size; + + /* function of size - precalculated for to_inorder() */ + unsigned extra; + + /* copy of the last key in the set */ + struct bkey end; + struct bkey_float *tree; + + /* + * The nodes in the bset tree point to specific keys - this + * array holds the sizes of the previous key. + * + * Conceptually it's a member of struct bkey_float, but we want + * to keep bkey_float to 4 bytes and prev isn't used in the fast + * path. + */ + uint8_t *prev; + + /* The actual btree node, with pointers to each sorted set */ + struct bset *data; +}; + +static __always_inline int64_t bkey_cmp(const struct bkey *l, + const struct bkey *r) +{ + return unlikely(KEY_INODE(l) != KEY_INODE(r)) + ? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r) + : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); +} + +static inline size_t bkey_u64s(const struct bkey *k) +{ + BUG_ON(KEY_CSUM(k) > 1); + return 2 + KEY_PTRS(k) + (KEY_CSUM(k) ? 1 : 0); +} + +static inline size_t bkey_bytes(const struct bkey *k) +{ + return bkey_u64s(k) * sizeof(uint64_t); +} + +static inline void bkey_copy(struct bkey *dest, const struct bkey *src) +{ + memcpy(dest, src, bkey_bytes(src)); +} + +static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src) +{ + if (!src) + src = &KEY(0, 0, 0); + + SET_KEY_INODE(dest, KEY_INODE(src)); + SET_KEY_OFFSET(dest, KEY_OFFSET(src)); +} + +static inline struct bkey *bkey_next(const struct bkey *k) +{ + uint64_t *d = (void *) k; + return (struct bkey *) (d + bkey_u64s(k)); +} + +/* Keylists */ + +struct keylist { + struct bkey *top; + union { + uint64_t *list; + struct bkey *bottom; + }; + + /* Enough room for btree_split's keys without realloc */ +#define KEYLIST_INLINE 16 + uint64_t d[KEYLIST_INLINE]; +}; + +static inline void bch_keylist_init(struct keylist *l) +{ + l->top = (void *) (l->list = l->d); +} + +static inline void bch_keylist_push(struct keylist *l) +{ + l->top = bkey_next(l->top); +} + +static inline void bch_keylist_add(struct keylist *l, struct bkey *k) +{ + bkey_copy(l->top, k); + bch_keylist_push(l); +} + +static inline bool bch_keylist_empty(struct keylist *l) +{ + return l->top == (void *) l->list; +} + +static inline void bch_keylist_free(struct keylist *l) +{ + if (l->list != l->d) + kfree(l->list); +} + +void bch_keylist_copy(struct keylist *, struct keylist *); +struct bkey *bch_keylist_pop(struct keylist *); +int bch_keylist_realloc(struct keylist *, int, struct cache_set *); + +void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *, + unsigned); +bool __bch_cut_front(const struct bkey *, struct bkey *); +bool __bch_cut_back(const struct bkey *, struct bkey *); + +static inline bool bch_cut_front(const struct bkey *where, struct bkey *k) +{ + BUG_ON(bkey_cmp(where, k) > 0); + return __bch_cut_front(where, k); +} + +static inline bool bch_cut_back(const struct bkey *where, struct bkey *k) +{ + BUG_ON(bkey_cmp(where, &START_KEY(k)) < 0); + return __bch_cut_back(where, k); +} + +const char *bch_ptr_status(struct cache_set *, const struct bkey *); +bool __bch_ptr_invalid(struct cache_set *, int level, const struct bkey *); +bool bch_ptr_bad(struct btree *, const struct bkey *); + +static inline uint8_t gen_after(uint8_t a, uint8_t b) +{ + uint8_t r = a - b; + return r > 128U ? 0 : r; +} + +static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k, + unsigned i) +{ + return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i)); +} + +static inline bool ptr_available(struct cache_set *c, const struct bkey *k, + unsigned i) +{ + return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i); +} + + +typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *); + +struct bkey *bch_next_recurse_key(struct btree *, struct bkey *); +struct bkey *bch_btree_iter_next(struct btree_iter *); +struct bkey *bch_btree_iter_next_filter(struct btree_iter *, + struct btree *, ptr_filter_fn); + +void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *); +struct bkey *__bch_btree_iter_init(struct btree *, struct btree_iter *, + struct bkey *, struct bset_tree *); + +/* 32 bits total: */ +#define BKEY_MID_BITS 3 +#define BKEY_EXPONENT_BITS 7 +#define BKEY_MANTISSA_BITS 22 +#define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1) + +struct bkey_float { + unsigned exponent:BKEY_EXPONENT_BITS; + unsigned m:BKEY_MID_BITS; + unsigned mantissa:BKEY_MANTISSA_BITS; +} __packed; + +/* + * BSET_CACHELINE was originally intended to match the hardware cacheline size - + * it used to be 64, but I realized the lookup code would touch slightly less + * memory if it was 128. + * + * It definites the number of bytes (in struct bset) per struct bkey_float in + * the auxiliar search tree - when we're done searching the bset_float tree we + * have this many bytes left that we do a linear search over. + * + * Since (after level 5) every level of the bset_tree is on a new cacheline, + * we're touching one fewer cacheline in the bset tree in exchange for one more + * cacheline in the linear search - but the linear search might stop before it + * gets to the second cacheline. + */ + +#define BSET_CACHELINE 128 +#define bset_tree_space(b) (btree_data_space(b) / BSET_CACHELINE) + +#define bset_tree_bytes(b) (bset_tree_space(b) * sizeof(struct bkey_float)) +#define bset_prev_bytes(b) (bset_tree_space(b) * sizeof(uint8_t)) + +void bch_bset_init_next(struct btree *); + +void bch_bset_fix_invalidated_key(struct btree *, struct bkey *); +void bch_bset_fix_lookup_table(struct btree *, struct bkey *); + +struct bkey *__bch_bset_search(struct btree *, struct bset_tree *, + const struct bkey *); + +static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t, + const struct bkey *search) +{ + return search ? __bch_bset_search(b, t, search) : t->data->start; +} + +bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); +void bch_btree_sort_lazy(struct btree *); +void bch_btree_sort_into(struct btree *, struct btree *); +void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *); +void bch_btree_sort_partial(struct btree *, unsigned); + +static inline void bch_btree_sort(struct btree *b) +{ + bch_btree_sort_partial(b, 0); +} + +int bch_bset_print_stats(struct cache_set *, char *); + +#endif diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c new file mode 100644 index 000000000000..e7bc917ef0d7 --- /dev/null +++ b/drivers/md/bcache/btree.c @@ -0,0 +1,2503 @@ +/* + * Copyright (C) 2010 Kent Overstreet + * + * Uses a block device as cache for other block devices; optimized for SSDs. + * All allocation is done in buckets, which should match the erase block size + * of the device. + * + * Buckets containing cached data are kept on a heap sorted by priority; + * bucket priority is increased on cache hit, and periodically all the buckets + * on the heap have their priority scaled down. This currently is just used as + * an LRU but in the future should allow for more intelligent heuristics. + * + * Buckets have an 8 bit counter; freeing is accomplished by incrementing the + * counter. Garbage collection is used to remove stale pointers. + * + * Indexing is done via a btree; nodes are not necessarily fully sorted, rather + * as keys are inserted we only sort the pages that have not yet been written. + * When garbage collection is run, we resort the entire node. + * + * All configuration is done via sysfs; see Documentation/bcache.txt. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" +#include "request.h" + +#include +#include +#include +#include +#include +#include + +/* + * Todo: + * register_bcache: Return errors out to userspace correctly + * + * Writeback: don't undirty key until after a cache flush + * + * Create an iterator for key pointers + * + * On btree write error, mark bucket such that it won't be freed from the cache + * + * Journalling: + * Check for bad keys in replay + * Propagate barriers + * Refcount journal entries in journal_replay + * + * Garbage collection: + * Finish incremental gc + * Gc should free old UUIDs, data for invalid UUIDs + * + * Provide a way to list backing device UUIDs we have data cached for, and + * probably how long it's been since we've seen them, and a way to invalidate + * dirty data for devices that will never be attached again + * + * Keep 1 min/5 min/15 min statistics of how busy a block device has been, so + * that based on that and how much dirty data we have we can keep writeback + * from being starved + * + * Add a tracepoint or somesuch to watch for writeback starvation + * + * When btree depth > 1 and splitting an interior node, we have to make sure + * alloc_bucket() cannot fail. This should be true but is not completely + * obvious. + * + * Make sure all allocations get charged to the root cgroup + * + * Plugging? + * + * If data write is less than hard sector size of ssd, round up offset in open + * bucket to the next whole sector + * + * Also lookup by cgroup in get_open_bucket() + * + * Superblock needs to be fleshed out for multiple cache devices + * + * Add a sysfs tunable for the number of writeback IOs in flight + * + * Add a sysfs tunable for the number of open data buckets + * + * IO tracking: Can we track when one process is doing io on behalf of another? + * IO tracking: Don't use just an average, weigh more recent stuff higher + * + * Test module load/unload + */ + +static const char * const op_types[] = { + "insert", "replace" +}; + +static const char *op_type(struct btree_op *op) +{ + return op_types[op->type]; +} + +#define MAX_NEED_GC 64 +#define MAX_SAVE_PRIO 72 + +#define PTR_DIRTY_BIT (((uint64_t) 1 << 36)) + +#define PTR_HASH(c, k) \ + (((k)->ptr[0] >> c->bucket_bits) | PTR_GEN(k, 0)) + +struct workqueue_struct *bch_gc_wq; +static struct workqueue_struct *btree_io_wq; + +void bch_btree_op_init_stack(struct btree_op *op) +{ + memset(op, 0, sizeof(struct btree_op)); + closure_init_stack(&op->cl); + op->lock = -1; + bch_keylist_init(&op->keys); +} + +/* Btree key manipulation */ + +static void bkey_put(struct cache_set *c, struct bkey *k, int level) +{ + if ((level && KEY_OFFSET(k)) || !level) + __bkey_put(c, k); +} + +/* Btree IO */ + +static uint64_t btree_csum_set(struct btree *b, struct bset *i) +{ + uint64_t crc = b->key.ptr[0]; + void *data = (void *) i + 8, *end = end(i); + + crc = crc64_update(crc, data, end - data); + return crc ^ 0xffffffffffffffff; +} + +static void btree_bio_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + struct btree *b = container_of(cl, struct btree, io.cl); + + if (error) + set_btree_node_io_error(b); + + bch_bbio_count_io_errors(b->c, bio, error, (bio->bi_rw & WRITE) + ? "writing btree" : "reading btree"); + closure_put(cl); +} + +static void btree_bio_init(struct btree *b) +{ + BUG_ON(b->bio); + b->bio = bch_bbio_alloc(b->c); + + b->bio->bi_end_io = btree_bio_endio; + b->bio->bi_private = &b->io.cl; +} + +void bch_btree_read_done(struct closure *cl) +{ + struct btree *b = container_of(cl, struct btree, io.cl); + struct bset *i = b->sets[0].data; + struct btree_iter *iter = b->c->fill_iter; + const char *err = "bad btree header"; + BUG_ON(b->nsets || b->written); + + bch_bbio_free(b->bio, b->c); + b->bio = NULL; + + mutex_lock(&b->c->fill_lock); + iter->used = 0; + + if (btree_node_io_error(b) || + !i->seq) + goto err; + + for (; + b->written < btree_blocks(b) && i->seq == b->sets[0].data->seq; + i = write_block(b)) { + err = "unsupported bset version"; + if (i->version > BCACHE_BSET_VERSION) + goto err; + + err = "bad btree header"; + if (b->written + set_blocks(i, b->c) > btree_blocks(b)) + goto err; + + err = "bad magic"; + if (i->magic != bset_magic(b->c)) + goto err; + + err = "bad checksum"; + switch (i->version) { + case 0: + if (i->csum != csum_set(i)) + goto err; + break; + case BCACHE_BSET_VERSION: + if (i->csum != btree_csum_set(b, i)) + goto err; + break; + } + + err = "empty set"; + if (i != b->sets[0].data && !i->keys) + goto err; + + bch_btree_iter_push(iter, i->start, end(i)); + + b->written += set_blocks(i, b->c); + } + + err = "corrupted btree"; + for (i = write_block(b); + index(i, b) < btree_blocks(b); + i = ((void *) i) + block_bytes(b->c)) + if (i->seq == b->sets[0].data->seq) + goto err; + + bch_btree_sort_and_fix_extents(b, iter); + + i = b->sets[0].data; + err = "short btree key"; + if (b->sets[0].size && + bkey_cmp(&b->key, &b->sets[0].end) < 0) + goto err; + + if (b->written < btree_blocks(b)) + bch_bset_init_next(b); +out: + + mutex_unlock(&b->c->fill_lock); + + spin_lock(&b->c->btree_read_time_lock); + time_stats_update(&b->c->btree_read_time, b->io_start_time); + spin_unlock(&b->c->btree_read_time_lock); + + smp_wmb(); /* read_done is our write lock */ + set_btree_node_read_done(b); + + closure_return(cl); +err: + set_btree_node_io_error(b); + bch_cache_set_error(b->c, "%s at bucket %lu, block %zu, %u keys", + err, PTR_BUCKET_NR(b->c, &b->key, 0), + index(i, b), i->keys); + goto out; +} + +void bch_btree_read(struct btree *b) +{ + BUG_ON(b->nsets || b->written); + + if (!closure_trylock(&b->io.cl, &b->c->cl)) + BUG(); + + b->io_start_time = local_clock(); + + btree_bio_init(b); + b->bio->bi_rw = REQ_META|READ_SYNC; + b->bio->bi_size = KEY_SIZE(&b->key) << 9; + + bio_map(b->bio, b->sets[0].data); + + pr_debug("%s", pbtree(b)); + trace_bcache_btree_read(b->bio); + bch_submit_bbio(b->bio, b->c, &b->key, 0); + + continue_at(&b->io.cl, bch_btree_read_done, system_wq); +} + +static void btree_complete_write(struct btree *b, struct btree_write *w) +{ + if (w->prio_blocked && + !atomic_sub_return(w->prio_blocked, &b->c->prio_blocked)) + wake_up(&b->c->alloc_wait); + + if (w->journal) { + atomic_dec_bug(w->journal); + __closure_wake_up(&b->c->journal.wait); + } + + if (w->owner) + closure_put(w->owner); + + w->prio_blocked = 0; + w->journal = NULL; + w->owner = NULL; +} + +static void __btree_write_done(struct closure *cl) +{ + struct btree *b = container_of(cl, struct btree, io.cl); + struct btree_write *w = btree_prev_write(b); + + bch_bbio_free(b->bio, b->c); + b->bio = NULL; + btree_complete_write(b, w); + + if (btree_node_dirty(b)) + queue_delayed_work(btree_io_wq, &b->work, + msecs_to_jiffies(30000)); + + closure_return(cl); +} + +static void btree_write_done(struct closure *cl) +{ + struct btree *b = container_of(cl, struct btree, io.cl); + struct bio_vec *bv; + int n; + + __bio_for_each_segment(bv, b->bio, n, 0) + __free_page(bv->bv_page); + + __btree_write_done(cl); +} + +static void do_btree_write(struct btree *b) +{ + struct closure *cl = &b->io.cl; + struct bset *i = b->sets[b->nsets].data; + BKEY_PADDED(key) k; + + i->version = BCACHE_BSET_VERSION; + i->csum = btree_csum_set(b, i); + + btree_bio_init(b); + b->bio->bi_rw = REQ_META|WRITE_SYNC; + b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); + bio_map(b->bio, i); + + bkey_copy(&k.key, &b->key); + SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); + + if (!bio_alloc_pages(b->bio, GFP_NOIO)) { + int j; + struct bio_vec *bv; + void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); + + bio_for_each_segment(bv, b->bio, j) + memcpy(page_address(bv->bv_page), + base + j * PAGE_SIZE, PAGE_SIZE); + + trace_bcache_btree_write(b->bio); + bch_submit_bbio(b->bio, b->c, &k.key, 0); + + continue_at(cl, btree_write_done, NULL); + } else { + b->bio->bi_vcnt = 0; + bio_map(b->bio, i); + + trace_bcache_btree_write(b->bio); + bch_submit_bbio(b->bio, b->c, &k.key, 0); + + closure_sync(cl); + __btree_write_done(cl); + } +} + +static void __btree_write(struct btree *b) +{ + struct bset *i = b->sets[b->nsets].data; + + BUG_ON(current->bio_list); + + closure_lock(&b->io, &b->c->cl); + cancel_delayed_work(&b->work); + + clear_bit(BTREE_NODE_dirty, &b->flags); + change_bit(BTREE_NODE_write_idx, &b->flags); + + bch_check_key_order(b, i); + BUG_ON(b->written && !i->keys); + + do_btree_write(b); + + pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys); + + b->written += set_blocks(i, b->c); + atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, + &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); + + bch_btree_sort_lazy(b); + + if (b->written < btree_blocks(b)) + bch_bset_init_next(b); +} + +static void btree_write_work(struct work_struct *w) +{ + struct btree *b = container_of(to_delayed_work(w), struct btree, work); + + down_write(&b->lock); + + if (btree_node_dirty(b)) + __btree_write(b); + up_write(&b->lock); +} + +void bch_btree_write(struct btree *b, bool now, struct btree_op *op) +{ + struct bset *i = b->sets[b->nsets].data; + struct btree_write *w = btree_current_write(b); + + BUG_ON(b->written && + (b->written >= btree_blocks(b) || + i->seq != b->sets[0].data->seq || + !i->keys)); + + if (!btree_node_dirty(b)) { + set_btree_node_dirty(b); + queue_delayed_work(btree_io_wq, &b->work, + msecs_to_jiffies(30000)); + } + + w->prio_blocked += b->prio_blocked; + b->prio_blocked = 0; + + if (op && op->journal && !b->level) { + if (w->journal && + journal_pin_cmp(b->c, w, op)) { + atomic_dec_bug(w->journal); + w->journal = NULL; + } + + if (!w->journal) { + w->journal = op->journal; + atomic_inc(w->journal); + } + } + + if (current->bio_list) + return; + + /* Force write if set is too big */ + if (now || + b->level || + set_bytes(i) > PAGE_SIZE - 48) { + if (op && now) { + /* Must wait on multiple writes */ + BUG_ON(w->owner); + w->owner = &op->cl; + closure_get(&op->cl); + } + + __btree_write(b); + } + BUG_ON(!b->written); +} + +/* + * Btree in memory cache - allocation/freeing + * mca -> memory cache + */ + +static void mca_reinit(struct btree *b) +{ + unsigned i; + + b->flags = 0; + b->written = 0; + b->nsets = 0; + + for (i = 0; i < MAX_BSETS; i++) + b->sets[i].size = 0; + /* + * Second loop starts at 1 because b->sets[0]->data is the memory we + * allocated + */ + for (i = 1; i < MAX_BSETS; i++) + b->sets[i].data = NULL; +} + +#define mca_reserve(c) (((c->root && c->root->level) \ + ? c->root->level : 1) * 8 + 16) +#define mca_can_free(c) \ + max_t(int, 0, c->bucket_cache_used - mca_reserve(c)) + +static void mca_data_free(struct btree *b) +{ + struct bset_tree *t = b->sets; + BUG_ON(!closure_is_unlocked(&b->io.cl)); + + if (bset_prev_bytes(b) < PAGE_SIZE) + kfree(t->prev); + else + free_pages((unsigned long) t->prev, + get_order(bset_prev_bytes(b))); + + if (bset_tree_bytes(b) < PAGE_SIZE) + kfree(t->tree); + else + free_pages((unsigned long) t->tree, + get_order(bset_tree_bytes(b))); + + free_pages((unsigned long) t->data, b->page_order); + + t->prev = NULL; + t->tree = NULL; + t->data = NULL; + list_move(&b->list, &b->c->btree_cache_freed); + b->c->bucket_cache_used--; +} + +static void mca_bucket_free(struct btree *b) +{ + BUG_ON(btree_node_dirty(b)); + + b->key.ptr[0] = 0; + hlist_del_init_rcu(&b->hash); + list_move(&b->list, &b->c->btree_cache_freeable); +} + +static unsigned btree_order(struct bkey *k) +{ + return ilog2(KEY_SIZE(k) / PAGE_SECTORS ?: 1); +} + +static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) +{ + struct bset_tree *t = b->sets; + BUG_ON(t->data); + + b->page_order = max_t(unsigned, + ilog2(b->c->btree_pages), + btree_order(k)); + + t->data = (void *) __get_free_pages(gfp, b->page_order); + if (!t->data) + goto err; + + t->tree = bset_tree_bytes(b) < PAGE_SIZE + ? kmalloc(bset_tree_bytes(b), gfp) + : (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b))); + if (!t->tree) + goto err; + + t->prev = bset_prev_bytes(b) < PAGE_SIZE + ? kmalloc(bset_prev_bytes(b), gfp) + : (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b))); + if (!t->prev) + goto err; + + list_move(&b->list, &b->c->btree_cache); + b->c->bucket_cache_used++; + return; +err: + mca_data_free(b); +} + +static struct btree *mca_bucket_alloc(struct cache_set *c, + struct bkey *k, gfp_t gfp) +{ + struct btree *b = kzalloc(sizeof(struct btree), gfp); + if (!b) + return NULL; + + init_rwsem(&b->lock); + lockdep_set_novalidate_class(&b->lock); + INIT_LIST_HEAD(&b->list); + INIT_DELAYED_WORK(&b->work, btree_write_work); + b->c = c; + closure_init_unlocked(&b->io); + + mca_data_alloc(b, k, gfp); + return b; +} + +static int mca_reap(struct btree *b, struct closure *cl, unsigned min_order) +{ + lockdep_assert_held(&b->c->bucket_lock); + + if (!down_write_trylock(&b->lock)) + return -ENOMEM; + + if (b->page_order < min_order) { + rw_unlock(true, b); + return -ENOMEM; + } + + BUG_ON(btree_node_dirty(b) && !b->sets[0].data); + + if (cl && btree_node_dirty(b)) + bch_btree_write(b, true, NULL); + + if (cl) + closure_wait_event_async(&b->io.wait, cl, + atomic_read(&b->io.cl.remaining) == -1); + + if (btree_node_dirty(b) || + !closure_is_unlocked(&b->io.cl) || + work_pending(&b->work.work)) { + rw_unlock(true, b); + return -EAGAIN; + } + + return 0; +} + +static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) +{ + struct cache_set *c = container_of(shrink, struct cache_set, shrink); + struct btree *b, *t; + unsigned long i, nr = sc->nr_to_scan; + + if (c->shrinker_disabled) + return 0; + + if (c->try_harder) + return 0; + + /* + * If nr == 0, we're supposed to return the number of items we have + * cached. Not allowed to return -1. + */ + if (!nr) + return mca_can_free(c) * c->btree_pages; + + /* Return -1 if we can't do anything right now */ + if (sc->gfp_mask & __GFP_WAIT) + mutex_lock(&c->bucket_lock); + else if (!mutex_trylock(&c->bucket_lock)) + return -1; + + nr /= c->btree_pages; + nr = min_t(unsigned long, nr, mca_can_free(c)); + + i = 0; + list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) { + if (!nr) + break; + + if (++i > 3 && + !mca_reap(b, NULL, 0)) { + mca_data_free(b); + rw_unlock(true, b); + --nr; + } + } + + /* + * Can happen right when we first start up, before we've read in any + * btree nodes + */ + if (list_empty(&c->btree_cache)) + goto out; + + for (i = 0; nr && i < c->bucket_cache_used; i++) { + b = list_first_entry(&c->btree_cache, struct btree, list); + list_rotate_left(&c->btree_cache); + + if (!b->accessed && + !mca_reap(b, NULL, 0)) { + mca_bucket_free(b); + mca_data_free(b); + rw_unlock(true, b); + --nr; + } else + b->accessed = 0; + } +out: + nr = mca_can_free(c) * c->btree_pages; + mutex_unlock(&c->bucket_lock); + return nr; +} + +void bch_btree_cache_free(struct cache_set *c) +{ + struct btree *b; + struct closure cl; + closure_init_stack(&cl); + + if (c->shrink.list.next) + unregister_shrinker(&c->shrink); + + mutex_lock(&c->bucket_lock); + +#ifdef CONFIG_BCACHE_DEBUG + if (c->verify_data) + list_move(&c->verify_data->list, &c->btree_cache); +#endif + + list_splice(&c->btree_cache_freeable, + &c->btree_cache); + + while (!list_empty(&c->btree_cache)) { + b = list_first_entry(&c->btree_cache, struct btree, list); + + if (btree_node_dirty(b)) + btree_complete_write(b, btree_current_write(b)); + clear_bit(BTREE_NODE_dirty, &b->flags); + + mca_data_free(b); + } + + while (!list_empty(&c->btree_cache_freed)) { + b = list_first_entry(&c->btree_cache_freed, + struct btree, list); + list_del(&b->list); + cancel_delayed_work_sync(&b->work); + kfree(b); + } + + mutex_unlock(&c->bucket_lock); +} + +int bch_btree_cache_alloc(struct cache_set *c) +{ + unsigned i; + + /* XXX: doesn't check for errors */ + + closure_init_unlocked(&c->gc); + + for (i = 0; i < mca_reserve(c); i++) + mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL); + + list_splice_init(&c->btree_cache, + &c->btree_cache_freeable); + +#ifdef CONFIG_BCACHE_DEBUG + mutex_init(&c->verify_lock); + + c->verify_data = mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL); + + if (c->verify_data && + c->verify_data->sets[0].data) + list_del_init(&c->verify_data->list); + else + c->verify_data = NULL; +#endif + + c->shrink.shrink = bch_mca_shrink; + c->shrink.seeks = 4; + c->shrink.batch = c->btree_pages * 2; + register_shrinker(&c->shrink); + + return 0; +} + +/* Btree in memory cache - hash table */ + +static struct hlist_head *mca_hash(struct cache_set *c, struct bkey *k) +{ + return &c->bucket_hash[hash_32(PTR_HASH(c, k), BUCKET_HASH_BITS)]; +} + +static struct btree *mca_find(struct cache_set *c, struct bkey *k) +{ + struct btree *b; + + rcu_read_lock(); + hlist_for_each_entry_rcu(b, mca_hash(c, k), hash) + if (PTR_HASH(c, &b->key) == PTR_HASH(c, k)) + goto out; + b = NULL; +out: + rcu_read_unlock(); + return b; +} + +static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k, + int level, struct closure *cl) +{ + int ret = -ENOMEM; + struct btree *i; + + if (!cl) + return ERR_PTR(-ENOMEM); + + /* + * Trying to free up some memory - i.e. reuse some btree nodes - may + * require initiating IO to flush the dirty part of the node. If we're + * running under generic_make_request(), that IO will never finish and + * we would deadlock. Returning -EAGAIN causes the cache lookup code to + * punt to workqueue and retry. + */ + if (current->bio_list) + return ERR_PTR(-EAGAIN); + + if (c->try_harder && c->try_harder != cl) { + closure_wait_event_async(&c->try_wait, cl, !c->try_harder); + return ERR_PTR(-EAGAIN); + } + + /* XXX: tracepoint */ + c->try_harder = cl; + c->try_harder_start = local_clock(); +retry: + list_for_each_entry_reverse(i, &c->btree_cache, list) { + int r = mca_reap(i, cl, btree_order(k)); + if (!r) + return i; + if (r != -ENOMEM) + ret = r; + } + + if (ret == -EAGAIN && + closure_blocking(cl)) { + mutex_unlock(&c->bucket_lock); + closure_sync(cl); + mutex_lock(&c->bucket_lock); + goto retry; + } + + return ERR_PTR(ret); +} + +/* + * We can only have one thread cannibalizing other cached btree nodes at a time, + * or we'll deadlock. We use an open coded mutex to ensure that, which a + * cannibalize_bucket() will take. This means every time we unlock the root of + * the btree, we need to release this lock if we have it held. + */ +void bch_cannibalize_unlock(struct cache_set *c, struct closure *cl) +{ + if (c->try_harder == cl) { + time_stats_update(&c->try_harder_time, c->try_harder_start); + c->try_harder = NULL; + __closure_wake_up(&c->try_wait); + } +} + +static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, + int level, struct closure *cl) +{ + struct btree *b; + + lockdep_assert_held(&c->bucket_lock); + + if (mca_find(c, k)) + return NULL; + + /* btree_free() doesn't free memory; it sticks the node on the end of + * the list. Check if there's any freed nodes there: + */ + list_for_each_entry(b, &c->btree_cache_freeable, list) + if (!mca_reap(b, NULL, btree_order(k))) + goto out; + + /* We never free struct btree itself, just the memory that holds the on + * disk node. Check the freed list before allocating a new one: + */ + list_for_each_entry(b, &c->btree_cache_freed, list) + if (!mca_reap(b, NULL, 0)) { + mca_data_alloc(b, k, __GFP_NOWARN|GFP_NOIO); + if (!b->sets[0].data) + goto err; + else + goto out; + } + + b = mca_bucket_alloc(c, k, __GFP_NOWARN|GFP_NOIO); + if (!b) + goto err; + + BUG_ON(!down_write_trylock(&b->lock)); + if (!b->sets->data) + goto err; +out: + BUG_ON(!closure_is_unlocked(&b->io.cl)); + + bkey_copy(&b->key, k); + list_move(&b->list, &c->btree_cache); + hlist_del_init_rcu(&b->hash); + hlist_add_head_rcu(&b->hash, mca_hash(c, k)); + + lock_set_subclass(&b->lock.dep_map, level + 1, _THIS_IP_); + b->level = level; + + mca_reinit(b); + + return b; +err: + if (b) + rw_unlock(true, b); + + b = mca_cannibalize(c, k, level, cl); + if (!IS_ERR(b)) + goto out; + + return b; +} + +/** + * bch_btree_node_get - find a btree node in the cache and lock it, reading it + * in from disk if necessary. + * + * If IO is necessary, it uses the closure embedded in struct btree_op to wait; + * if that closure is in non blocking mode, will return -EAGAIN. + * + * The btree node will have either a read or a write lock held, depending on + * level and op->lock. + */ +struct btree *bch_btree_node_get(struct cache_set *c, struct bkey *k, + int level, struct btree_op *op) +{ + int i = 0; + bool write = level <= op->lock; + struct btree *b; + + BUG_ON(level < 0); +retry: + b = mca_find(c, k); + + if (!b) { + mutex_lock(&c->bucket_lock); + b = mca_alloc(c, k, level, &op->cl); + mutex_unlock(&c->bucket_lock); + + if (!b) + goto retry; + if (IS_ERR(b)) + return b; + + bch_btree_read(b); + + if (!write) + downgrade_write(&b->lock); + } else { + rw_lock(write, b, level); + if (PTR_HASH(c, &b->key) != PTR_HASH(c, k)) { + rw_unlock(write, b); + goto retry; + } + BUG_ON(b->level != level); + } + + b->accessed = 1; + + for (; i <= b->nsets && b->sets[i].size; i++) { + prefetch(b->sets[i].tree); + prefetch(b->sets[i].data); + } + + for (; i <= b->nsets; i++) + prefetch(b->sets[i].data); + + if (!closure_wait_event(&b->io.wait, &op->cl, + btree_node_read_done(b))) { + rw_unlock(write, b); + b = ERR_PTR(-EAGAIN); + } else if (btree_node_io_error(b)) { + rw_unlock(write, b); + b = ERR_PTR(-EIO); + } else + BUG_ON(!b->written); + + return b; +} + +static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level) +{ + struct btree *b; + + mutex_lock(&c->bucket_lock); + b = mca_alloc(c, k, level, NULL); + mutex_unlock(&c->bucket_lock); + + if (!IS_ERR_OR_NULL(b)) { + bch_btree_read(b); + rw_unlock(true, b); + } +} + +/* Btree alloc */ + +static void btree_node_free(struct btree *b, struct btree_op *op) +{ + unsigned i; + + /* + * The BUG_ON() in btree_node_get() implies that we must have a write + * lock on parent to free or even invalidate a node + */ + BUG_ON(op->lock <= b->level); + BUG_ON(b == b->c->root); + pr_debug("bucket %s", pbtree(b)); + + if (btree_node_dirty(b)) + btree_complete_write(b, btree_current_write(b)); + clear_bit(BTREE_NODE_dirty, &b->flags); + + if (b->prio_blocked && + !atomic_sub_return(b->prio_blocked, &b->c->prio_blocked)) + closure_wake_up(&b->c->bucket_wait); + + b->prio_blocked = 0; + + cancel_delayed_work(&b->work); + + mutex_lock(&b->c->bucket_lock); + + for (i = 0; i < KEY_PTRS(&b->key); i++) { + BUG_ON(atomic_read(&PTR_BUCKET(b->c, &b->key, i)->pin)); + + bch_inc_gen(PTR_CACHE(b->c, &b->key, i), + PTR_BUCKET(b->c, &b->key, i)); + } + + bch_bucket_free(b->c, &b->key); + mca_bucket_free(b); + mutex_unlock(&b->c->bucket_lock); +} + +struct btree *bch_btree_node_alloc(struct cache_set *c, int level, + struct closure *cl) +{ + BKEY_PADDED(key) k; + struct btree *b = ERR_PTR(-EAGAIN); + + mutex_lock(&c->bucket_lock); +retry: + if (__bch_bucket_alloc_set(c, WATERMARK_METADATA, &k.key, 1, cl)) + goto err; + + SET_KEY_SIZE(&k.key, c->btree_pages * PAGE_SECTORS); + + b = mca_alloc(c, &k.key, level, cl); + if (IS_ERR(b)) + goto err_free; + + if (!b) { + cache_bug(c, "Tried to allocate bucket" + " that was in btree cache"); + __bkey_put(c, &k.key); + goto retry; + } + + set_btree_node_read_done(b); + b->accessed = 1; + bch_bset_init_next(b); + + mutex_unlock(&c->bucket_lock); + return b; +err_free: + bch_bucket_free(c, &k.key); + __bkey_put(c, &k.key); +err: + mutex_unlock(&c->bucket_lock); + return b; +} + +static struct btree *btree_node_alloc_replacement(struct btree *b, + struct closure *cl) +{ + struct btree *n = bch_btree_node_alloc(b->c, b->level, cl); + if (!IS_ERR_OR_NULL(n)) + bch_btree_sort_into(b, n); + + return n; +} + +/* Garbage collection */ + +uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k) +{ + uint8_t stale = 0; + unsigned i; + struct bucket *g; + + /* + * ptr_invalid() can't return true for the keys that mark btree nodes as + * freed, but since ptr_bad() returns true we'll never actually use them + * for anything and thus we don't want mark their pointers here + */ + if (!bkey_cmp(k, &ZERO_KEY)) + return stale; + + for (i = 0; i < KEY_PTRS(k); i++) { + if (!ptr_available(c, k, i)) + continue; + + g = PTR_BUCKET(c, k, i); + + if (gen_after(g->gc_gen, PTR_GEN(k, i))) + g->gc_gen = PTR_GEN(k, i); + + if (ptr_stale(c, k, i)) { + stale = max(stale, ptr_stale(c, k, i)); + continue; + } + + cache_bug_on(GC_MARK(g) && + (GC_MARK(g) == GC_MARK_METADATA) != (level != 0), + c, "inconsistent ptrs: mark = %llu, level = %i", + GC_MARK(g), level); + + if (level) + SET_GC_MARK(g, GC_MARK_METADATA); + else if (KEY_DIRTY(k)) + SET_GC_MARK(g, GC_MARK_DIRTY); + + /* guard against overflow */ + SET_GC_SECTORS_USED(g, min_t(unsigned, + GC_SECTORS_USED(g) + KEY_SIZE(k), + (1 << 14) - 1)); + + BUG_ON(!GC_SECTORS_USED(g)); + } + + return stale; +} + +#define btree_mark_key(b, k) __bch_btree_mark_key(b->c, b->level, k) + +static int btree_gc_mark_node(struct btree *b, unsigned *keys, + struct gc_stat *gc) +{ + uint8_t stale = 0; + unsigned last_dev = -1; + struct bcache_device *d = NULL; + struct bkey *k; + struct btree_iter iter; + struct bset_tree *t; + + gc->nodes++; + + for_each_key_filter(b, k, &iter, bch_ptr_invalid) { + if (last_dev != KEY_INODE(k)) { + last_dev = KEY_INODE(k); + + d = KEY_INODE(k) < b->c->nr_uuids + ? b->c->devices[last_dev] + : NULL; + } + + stale = max(stale, btree_mark_key(b, k)); + + if (bch_ptr_bad(b, k)) + continue; + + *keys += bkey_u64s(k); + + gc->key_bytes += bkey_u64s(k); + gc->nkeys++; + + gc->data += KEY_SIZE(k); + if (KEY_DIRTY(k)) { + gc->dirty += KEY_SIZE(k); + if (d) + d->sectors_dirty_gc += KEY_SIZE(k); + } + } + + for (t = b->sets; t <= &b->sets[b->nsets]; t++) + btree_bug_on(t->size && + bset_written(b, t) && + bkey_cmp(&b->key, &t->end) < 0, + b, "found short btree key in gc"); + + return stale; +} + +static struct btree *btree_gc_alloc(struct btree *b, struct bkey *k, + struct btree_op *op) +{ + /* + * We block priorities from being written for the duration of garbage + * collection, so we can't sleep in btree_alloc() -> + * bch_bucket_alloc_set(), or we'd risk deadlock - so we don't pass it + * our closure. + */ + struct btree *n = btree_node_alloc_replacement(b, NULL); + + if (!IS_ERR_OR_NULL(n)) { + swap(b, n); + + memcpy(k->ptr, b->key.ptr, + sizeof(uint64_t) * KEY_PTRS(&b->key)); + + __bkey_put(b->c, &b->key); + atomic_inc(&b->c->prio_blocked); + b->prio_blocked++; + + btree_node_free(n, op); + up_write(&n->lock); + } + + return b; +} + +/* + * Leaving this at 2 until we've got incremental garbage collection done; it + * could be higher (and has been tested with 4) except that garbage collection + * could take much longer, adversely affecting latency. + */ +#define GC_MERGE_NODES 2U + +struct gc_merge_info { + struct btree *b; + struct bkey *k; + unsigned keys; +}; + +static void btree_gc_coalesce(struct btree *b, struct btree_op *op, + struct gc_stat *gc, struct gc_merge_info *r) +{ + unsigned nodes = 0, keys = 0, blocks; + int i; + + while (nodes < GC_MERGE_NODES && r[nodes].b) + keys += r[nodes++].keys; + + blocks = btree_default_blocks(b->c) * 2 / 3; + + if (nodes < 2 || + __set_blocks(b->sets[0].data, keys, b->c) > blocks * (nodes - 1)) + return; + + for (i = nodes - 1; i >= 0; --i) { + if (r[i].b->written) + r[i].b = btree_gc_alloc(r[i].b, r[i].k, op); + + if (r[i].b->written) + return; + } + + for (i = nodes - 1; i > 0; --i) { + struct bset *n1 = r[i].b->sets->data; + struct bset *n2 = r[i - 1].b->sets->data; + struct bkey *k, *last = NULL; + + keys = 0; + + if (i == 1) { + /* + * Last node we're not getting rid of - we're getting + * rid of the node at r[0]. Have to try and fit all of + * the remaining keys into this node; we can't ensure + * they will always fit due to rounding and variable + * length keys (shouldn't be possible in practice, + * though) + */ + if (__set_blocks(n1, n1->keys + r->keys, + b->c) > btree_blocks(r[i].b)) + return; + + keys = n2->keys; + last = &r->b->key; + } else + for (k = n2->start; + k < end(n2); + k = bkey_next(k)) { + if (__set_blocks(n1, n1->keys + keys + + bkey_u64s(k), b->c) > blocks) + break; + + last = k; + keys += bkey_u64s(k); + } + + BUG_ON(__set_blocks(n1, n1->keys + keys, + b->c) > btree_blocks(r[i].b)); + + if (last) { + bkey_copy_key(&r[i].b->key, last); + bkey_copy_key(r[i].k, last); + } + + memcpy(end(n1), + n2->start, + (void *) node(n2, keys) - (void *) n2->start); + + n1->keys += keys; + + memmove(n2->start, + node(n2, keys), + (void *) end(n2) - (void *) node(n2, keys)); + + n2->keys -= keys; + + r[i].keys = n1->keys; + r[i - 1].keys = n2->keys; + } + + btree_node_free(r->b, op); + up_write(&r->b->lock); + + pr_debug("coalesced %u nodes", nodes); + + gc->nodes--; + nodes--; + + memmove(&r[0], &r[1], sizeof(struct gc_merge_info) * nodes); + memset(&r[nodes], 0, sizeof(struct gc_merge_info)); +} + +static int btree_gc_recurse(struct btree *b, struct btree_op *op, + struct closure *writes, struct gc_stat *gc) +{ + void write(struct btree *r) + { + if (!r->written) + bch_btree_write(r, true, op); + else if (btree_node_dirty(r)) { + BUG_ON(btree_current_write(r)->owner); + btree_current_write(r)->owner = writes; + closure_get(writes); + + bch_btree_write(r, true, NULL); + } + + up_write(&r->lock); + } + + int ret = 0, stale; + unsigned i; + struct gc_merge_info r[GC_MERGE_NODES]; + + memset(r, 0, sizeof(r)); + + while ((r->k = bch_next_recurse_key(b, &b->c->gc_done))) { + r->b = bch_btree_node_get(b->c, r->k, b->level - 1, op); + + if (IS_ERR(r->b)) { + ret = PTR_ERR(r->b); + break; + } + + r->keys = 0; + stale = btree_gc_mark_node(r->b, &r->keys, gc); + + if (!b->written && + (r->b->level || stale > 10 || + b->c->gc_always_rewrite)) + r->b = btree_gc_alloc(r->b, r->k, op); + + if (r->b->level) + ret = btree_gc_recurse(r->b, op, writes, gc); + + if (ret) { + write(r->b); + break; + } + + bkey_copy_key(&b->c->gc_done, r->k); + + if (!b->written) + btree_gc_coalesce(b, op, gc, r); + + if (r[GC_MERGE_NODES - 1].b) + write(r[GC_MERGE_NODES - 1].b); + + memmove(&r[1], &r[0], + sizeof(struct gc_merge_info) * (GC_MERGE_NODES - 1)); + + /* When we've got incremental GC working, we'll want to do + * if (should_resched()) + * return -EAGAIN; + */ + cond_resched(); +#if 0 + if (need_resched()) { + ret = -EAGAIN; + break; + } +#endif + } + + for (i = 1; i < GC_MERGE_NODES && r[i].b; i++) + write(r[i].b); + + /* Might have freed some children, must remove their keys */ + if (!b->written) + bch_btree_sort(b); + + return ret; +} + +static int bch_btree_gc_root(struct btree *b, struct btree_op *op, + struct closure *writes, struct gc_stat *gc) +{ + struct btree *n = NULL; + unsigned keys = 0; + int ret = 0, stale = btree_gc_mark_node(b, &keys, gc); + + if (b->level || stale > 10) + n = btree_node_alloc_replacement(b, NULL); + + if (!IS_ERR_OR_NULL(n)) + swap(b, n); + + if (b->level) + ret = btree_gc_recurse(b, op, writes, gc); + + if (!b->written || btree_node_dirty(b)) { + atomic_inc(&b->c->prio_blocked); + b->prio_blocked++; + bch_btree_write(b, true, n ? op : NULL); + } + + if (!IS_ERR_OR_NULL(n)) { + closure_sync(&op->cl); + bch_btree_set_root(b); + btree_node_free(n, op); + rw_unlock(true, b); + } + + return ret; +} + +static void btree_gc_start(struct cache_set *c) +{ + struct cache *ca; + struct bucket *b; + struct bcache_device **d; + unsigned i; + + if (!c->gc_mark_valid) + return; + + mutex_lock(&c->bucket_lock); + + c->gc_mark_valid = 0; + c->gc_done = ZERO_KEY; + + for_each_cache(ca, c, i) + for_each_bucket(b, ca) { + b->gc_gen = b->gen; + if (!atomic_read(&b->pin)) + SET_GC_MARK(b, GC_MARK_RECLAIMABLE); + } + + for (d = c->devices; + d < c->devices + c->nr_uuids; + d++) + if (*d) + (*d)->sectors_dirty_gc = 0; + + mutex_unlock(&c->bucket_lock); +} + +size_t bch_btree_gc_finish(struct cache_set *c) +{ + size_t available = 0; + struct bucket *b; + struct cache *ca; + struct bcache_device **d; + unsigned i; + + mutex_lock(&c->bucket_lock); + + set_gc_sectors(c); + c->gc_mark_valid = 1; + c->need_gc = 0; + + if (c->root) + for (i = 0; i < KEY_PTRS(&c->root->key); i++) + SET_GC_MARK(PTR_BUCKET(c, &c->root->key, i), + GC_MARK_METADATA); + + for (i = 0; i < KEY_PTRS(&c->uuid_bucket); i++) + SET_GC_MARK(PTR_BUCKET(c, &c->uuid_bucket, i), + GC_MARK_METADATA); + + for_each_cache(ca, c, i) { + uint64_t *i; + + ca->invalidate_needs_gc = 0; + + for (i = ca->sb.d; i < ca->sb.d + ca->sb.keys; i++) + SET_GC_MARK(ca->buckets + *i, GC_MARK_METADATA); + + for (i = ca->prio_buckets; + i < ca->prio_buckets + prio_buckets(ca) * 2; i++) + SET_GC_MARK(ca->buckets + *i, GC_MARK_METADATA); + + for_each_bucket(b, ca) { + b->last_gc = b->gc_gen; + c->need_gc = max(c->need_gc, bucket_gc_gen(b)); + + if (!atomic_read(&b->pin) && + GC_MARK(b) == GC_MARK_RECLAIMABLE) { + available++; + if (!GC_SECTORS_USED(b)) + bch_bucket_add_unused(ca, b); + } + } + } + + for (d = c->devices; + d < c->devices + c->nr_uuids; + d++) + if (*d) { + unsigned long last = + atomic_long_read(&((*d)->sectors_dirty)); + long difference = (*d)->sectors_dirty_gc - last; + + pr_debug("sectors dirty off by %li", difference); + + (*d)->sectors_dirty_last += difference; + + atomic_long_set(&((*d)->sectors_dirty), + (*d)->sectors_dirty_gc); + } + + mutex_unlock(&c->bucket_lock); + return available; +} + +static void bch_btree_gc(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, gc.cl); + int ret; + unsigned long available; + struct gc_stat stats; + struct closure writes; + struct btree_op op; + + uint64_t start_time = local_clock(); + trace_bcache_gc_start(c->sb.set_uuid); + blktrace_msg_all(c, "Starting gc"); + + memset(&stats, 0, sizeof(struct gc_stat)); + closure_init_stack(&writes); + bch_btree_op_init_stack(&op); + op.lock = SHRT_MAX; + + btree_gc_start(c); + + ret = btree_root(gc_root, c, &op, &writes, &stats); + closure_sync(&op.cl); + closure_sync(&writes); + + if (ret) { + blktrace_msg_all(c, "Stopped gc"); + pr_warn("gc failed!"); + + continue_at(cl, bch_btree_gc, bch_gc_wq); + } + + /* Possibly wait for new UUIDs or whatever to hit disk */ + bch_journal_meta(c, &op.cl); + closure_sync(&op.cl); + + available = bch_btree_gc_finish(c); + + time_stats_update(&c->btree_gc_time, start_time); + + stats.key_bytes *= sizeof(uint64_t); + stats.dirty <<= 9; + stats.data <<= 9; + stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets; + memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat)); + blktrace_msg_all(c, "Finished gc"); + + trace_bcache_gc_end(c->sb.set_uuid); + wake_up(&c->alloc_wait); + closure_wake_up(&c->bucket_wait); + + continue_at(cl, bch_moving_gc, bch_gc_wq); +} + +void bch_queue_gc(struct cache_set *c) +{ + closure_trylock_call(&c->gc.cl, bch_btree_gc, bch_gc_wq, &c->cl); +} + +/* Initial partial gc */ + +static int bch_btree_check_recurse(struct btree *b, struct btree_op *op, + unsigned long **seen) +{ + int ret; + unsigned i; + struct bkey *k; + struct bucket *g; + struct btree_iter iter; + + for_each_key_filter(b, k, &iter, bch_ptr_invalid) { + for (i = 0; i < KEY_PTRS(k); i++) { + if (!ptr_available(b->c, k, i)) + continue; + + g = PTR_BUCKET(b->c, k, i); + + if (!__test_and_set_bit(PTR_BUCKET_NR(b->c, k, i), + seen[PTR_DEV(k, i)]) || + !ptr_stale(b->c, k, i)) { + g->gen = PTR_GEN(k, i); + + if (b->level) + g->prio = BTREE_PRIO; + else if (g->prio == BTREE_PRIO) + g->prio = INITIAL_PRIO; + } + } + + btree_mark_key(b, k); + } + + if (b->level) { + k = bch_next_recurse_key(b, &ZERO_KEY); + + while (k) { + struct bkey *p = bch_next_recurse_key(b, k); + if (p) + btree_node_prefetch(b->c, p, b->level - 1); + + ret = btree(check_recurse, k, b, op, seen); + if (ret) + return ret; + + k = p; + } + } + + return 0; +} + +int bch_btree_check(struct cache_set *c, struct btree_op *op) +{ + int ret = -ENOMEM; + unsigned i; + unsigned long *seen[MAX_CACHES_PER_SET]; + + memset(seen, 0, sizeof(seen)); + + for (i = 0; c->cache[i]; i++) { + size_t n = DIV_ROUND_UP(c->cache[i]->sb.nbuckets, 8); + seen[i] = kmalloc(n, GFP_KERNEL); + if (!seen[i]) + goto err; + + /* Disables the seen array until prio_read() uses it too */ + memset(seen[i], 0xFF, n); + } + + ret = btree_root(check_recurse, c, op, seen); +err: + for (i = 0; i < MAX_CACHES_PER_SET; i++) + kfree(seen[i]); + return ret; +} + +/* Btree insertion */ + +static void shift_keys(struct btree *b, struct bkey *where, struct bkey *insert) +{ + struct bset *i = b->sets[b->nsets].data; + + memmove((uint64_t *) where + bkey_u64s(insert), + where, + (void *) end(i) - (void *) where); + + i->keys += bkey_u64s(insert); + bkey_copy(where, insert); + bch_bset_fix_lookup_table(b, where); +} + +static bool fix_overlapping_extents(struct btree *b, + struct bkey *insert, + struct btree_iter *iter, + struct btree_op *op) +{ + void subtract_dirty(struct bkey *k, int sectors) + { + struct bcache_device *d = b->c->devices[KEY_INODE(k)]; + + if (KEY_DIRTY(k) && d) + atomic_long_sub(sectors, &d->sectors_dirty); + } + + unsigned old_size, sectors_found = 0; + + while (1) { + struct bkey *k = bch_btree_iter_next(iter); + if (!k || + bkey_cmp(&START_KEY(k), insert) >= 0) + break; + + if (bkey_cmp(k, &START_KEY(insert)) <= 0) + continue; + + old_size = KEY_SIZE(k); + + /* + * We might overlap with 0 size extents; we can't skip these + * because if they're in the set we're inserting to we have to + * adjust them so they don't overlap with the key we're + * inserting. But we don't want to check them for BTREE_REPLACE + * operations. + */ + + if (op->type == BTREE_REPLACE && + KEY_SIZE(k)) { + /* + * k might have been split since we inserted/found the + * key we're replacing + */ + unsigned i; + uint64_t offset = KEY_START(k) - + KEY_START(&op->replace); + + /* But it must be a subset of the replace key */ + if (KEY_START(k) < KEY_START(&op->replace) || + KEY_OFFSET(k) > KEY_OFFSET(&op->replace)) + goto check_failed; + + /* We didn't find a key that we were supposed to */ + if (KEY_START(k) > KEY_START(insert) + sectors_found) + goto check_failed; + + if (KEY_PTRS(&op->replace) != KEY_PTRS(k)) + goto check_failed; + + /* skip past gen */ + offset <<= 8; + + BUG_ON(!KEY_PTRS(&op->replace)); + + for (i = 0; i < KEY_PTRS(&op->replace); i++) + if (k->ptr[i] != op->replace.ptr[i] + offset) + goto check_failed; + + sectors_found = KEY_OFFSET(k) - KEY_START(insert); + } + + if (bkey_cmp(insert, k) < 0 && + bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0) { + /* + * We overlapped in the middle of an existing key: that + * means we have to split the old key. But we have to do + * slightly different things depending on whether the + * old key has been written out yet. + */ + + struct bkey *top; + + subtract_dirty(k, KEY_SIZE(insert)); + + if (bkey_written(b, k)) { + /* + * We insert a new key to cover the top of the + * old key, and the old key is modified in place + * to represent the bottom split. + * + * It's completely arbitrary whether the new key + * is the top or the bottom, but it has to match + * up with what btree_sort_fixup() does - it + * doesn't check for this kind of overlap, it + * depends on us inserting a new key for the top + * here. + */ + top = bch_bset_search(b, &b->sets[b->nsets], + insert); + shift_keys(b, top, k); + } else { + BKEY_PADDED(key) temp; + bkey_copy(&temp.key, k); + shift_keys(b, k, &temp.key); + top = bkey_next(k); + } + + bch_cut_front(insert, top); + bch_cut_back(&START_KEY(insert), k); + bch_bset_fix_invalidated_key(b, k); + return false; + } + + if (bkey_cmp(insert, k) < 0) { + bch_cut_front(insert, k); + } else { + if (bkey_written(b, k) && + bkey_cmp(&START_KEY(insert), &START_KEY(k)) <= 0) { + /* + * Completely overwrote, so we don't have to + * invalidate the binary search tree + */ + bch_cut_front(k, k); + } else { + __bch_cut_back(&START_KEY(insert), k); + bch_bset_fix_invalidated_key(b, k); + } + } + + subtract_dirty(k, old_size - KEY_SIZE(k)); + } + +check_failed: + if (op->type == BTREE_REPLACE) { + if (!sectors_found) { + op->insert_collision = true; + return true; + } else if (sectors_found < KEY_SIZE(insert)) { + SET_KEY_OFFSET(insert, KEY_OFFSET(insert) - + (KEY_SIZE(insert) - sectors_found)); + SET_KEY_SIZE(insert, sectors_found); + } + } + + return false; +} + +static bool btree_insert_key(struct btree *b, struct btree_op *op, + struct bkey *k) +{ + struct bset *i = b->sets[b->nsets].data; + struct bkey *m, *prev; + const char *status = "insert"; + + BUG_ON(bkey_cmp(k, &b->key) > 0); + BUG_ON(b->level && !KEY_PTRS(k)); + BUG_ON(!b->level && !KEY_OFFSET(k)); + + if (!b->level) { + struct btree_iter iter; + struct bkey search = KEY(KEY_INODE(k), KEY_START(k), 0); + + /* + * bset_search() returns the first key that is strictly greater + * than the search key - but for back merging, we want to find + * the first key that is greater than or equal to KEY_START(k) - + * unless KEY_START(k) is 0. + */ + if (KEY_OFFSET(&search)) + SET_KEY_OFFSET(&search, KEY_OFFSET(&search) - 1); + + prev = NULL; + m = bch_btree_iter_init(b, &iter, &search); + + if (fix_overlapping_extents(b, k, &iter, op)) + return false; + + while (m != end(i) && + bkey_cmp(k, &START_KEY(m)) > 0) + prev = m, m = bkey_next(m); + + if (key_merging_disabled(b->c)) + goto insert; + + /* prev is in the tree, if we merge we're done */ + status = "back merging"; + if (prev && + bch_bkey_try_merge(b, prev, k)) + goto merged; + + status = "overwrote front"; + if (m != end(i) && + KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m)) + goto copy; + + status = "front merge"; + if (m != end(i) && + bch_bkey_try_merge(b, k, m)) + goto copy; + } else + m = bch_bset_search(b, &b->sets[b->nsets], k); + +insert: shift_keys(b, m, k); +copy: bkey_copy(m, k); +merged: + bch_check_keys(b, "%s for %s at %s: %s", status, + op_type(op), pbtree(b), pkey(k)); + bch_check_key_order_msg(b, i, "%s for %s at %s: %s", status, + op_type(op), pbtree(b), pkey(k)); + + if (b->level && !KEY_OFFSET(k)) + b->prio_blocked++; + + pr_debug("%s for %s at %s: %s", status, + op_type(op), pbtree(b), pkey(k)); + + return true; +} + +bool bch_btree_insert_keys(struct btree *b, struct btree_op *op) +{ + bool ret = false; + struct bkey *k; + unsigned oldsize = bch_count_data(b); + + while ((k = bch_keylist_pop(&op->keys))) { + bkey_put(b->c, k, b->level); + ret |= btree_insert_key(b, op, k); + } + + BUG_ON(bch_count_data(b) < oldsize); + return ret; +} + +bool bch_btree_insert_check_key(struct btree *b, struct btree_op *op, + struct bio *bio) +{ + bool ret = false; + uint64_t btree_ptr = b->key.ptr[0]; + unsigned long seq = b->seq; + BKEY_PADDED(k) tmp; + + rw_unlock(false, b); + rw_lock(true, b, b->level); + + if (b->key.ptr[0] != btree_ptr || + b->seq != seq + 1 || + should_split(b)) + goto out; + + op->replace = KEY(op->inode, bio_end(bio), bio_sectors(bio)); + + SET_KEY_PTRS(&op->replace, 1); + get_random_bytes(&op->replace.ptr[0], sizeof(uint64_t)); + + SET_PTR_DEV(&op->replace, 0, PTR_CHECK_DEV); + + bkey_copy(&tmp.k, &op->replace); + + BUG_ON(op->type != BTREE_INSERT); + BUG_ON(!btree_insert_key(b, op, &tmp.k)); + bch_btree_write(b, false, NULL); + ret = true; +out: + downgrade_write(&b->lock); + return ret; +} + +static int btree_split(struct btree *b, struct btree_op *op) +{ + bool split, root = b == b->c->root; + struct btree *n1, *n2 = NULL, *n3 = NULL; + uint64_t start_time = local_clock(); + + if (b->level) + set_closure_blocking(&op->cl); + + n1 = btree_node_alloc_replacement(b, &op->cl); + if (IS_ERR(n1)) + goto err; + + split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5; + + pr_debug("%ssplitting at %s keys %i", split ? "" : "not ", + pbtree(b), n1->sets[0].data->keys); + + if (split) { + unsigned keys = 0; + + n2 = bch_btree_node_alloc(b->c, b->level, &op->cl); + if (IS_ERR(n2)) + goto err_free1; + + if (root) { + n3 = bch_btree_node_alloc(b->c, b->level + 1, &op->cl); + if (IS_ERR(n3)) + goto err_free2; + } + + bch_btree_insert_keys(n1, op); + + /* Has to be a linear search because we don't have an auxiliary + * search tree yet + */ + + while (keys < (n1->sets[0].data->keys * 3) / 5) + keys += bkey_u64s(node(n1->sets[0].data, keys)); + + bkey_copy_key(&n1->key, node(n1->sets[0].data, keys)); + keys += bkey_u64s(node(n1->sets[0].data, keys)); + + n2->sets[0].data->keys = n1->sets[0].data->keys - keys; + n1->sets[0].data->keys = keys; + + memcpy(n2->sets[0].data->start, + end(n1->sets[0].data), + n2->sets[0].data->keys * sizeof(uint64_t)); + + bkey_copy_key(&n2->key, &b->key); + + bch_keylist_add(&op->keys, &n2->key); + bch_btree_write(n2, true, op); + rw_unlock(true, n2); + } else + bch_btree_insert_keys(n1, op); + + bch_keylist_add(&op->keys, &n1->key); + bch_btree_write(n1, true, op); + + if (n3) { + bkey_copy_key(&n3->key, &MAX_KEY); + bch_btree_insert_keys(n3, op); + bch_btree_write(n3, true, op); + + closure_sync(&op->cl); + bch_btree_set_root(n3); + rw_unlock(true, n3); + } else if (root) { + op->keys.top = op->keys.bottom; + closure_sync(&op->cl); + bch_btree_set_root(n1); + } else { + unsigned i; + + bkey_copy(op->keys.top, &b->key); + bkey_copy_key(op->keys.top, &ZERO_KEY); + + for (i = 0; i < KEY_PTRS(&b->key); i++) { + uint8_t g = PTR_BUCKET(b->c, &b->key, i)->gen + 1; + + SET_PTR_GEN(op->keys.top, i, g); + } + + bch_keylist_push(&op->keys); + closure_sync(&op->cl); + atomic_inc(&b->c->prio_blocked); + } + + rw_unlock(true, n1); + btree_node_free(b, op); + + time_stats_update(&b->c->btree_split_time, start_time); + + return 0; +err_free2: + __bkey_put(n2->c, &n2->key); + btree_node_free(n2, op); + rw_unlock(true, n2); +err_free1: + __bkey_put(n1->c, &n1->key); + btree_node_free(n1, op); + rw_unlock(true, n1); +err: + if (n3 == ERR_PTR(-EAGAIN) || + n2 == ERR_PTR(-EAGAIN) || + n1 == ERR_PTR(-EAGAIN)) + return -EAGAIN; + + pr_warn("couldn't split"); + return -ENOMEM; +} + +static int bch_btree_insert_recurse(struct btree *b, struct btree_op *op, + struct keylist *stack_keys) +{ + if (b->level) { + int ret; + struct bkey *insert = op->keys.bottom; + struct bkey *k = bch_next_recurse_key(b, &START_KEY(insert)); + + if (!k) { + btree_bug(b, "no key to recurse on at level %i/%i", + b->level, b->c->root->level); + + op->keys.top = op->keys.bottom; + return -EIO; + } + + if (bkey_cmp(insert, k) > 0) { + unsigned i; + + if (op->type == BTREE_REPLACE) { + __bkey_put(b->c, insert); + op->keys.top = op->keys.bottom; + op->insert_collision = true; + return 0; + } + + for (i = 0; i < KEY_PTRS(insert); i++) + atomic_inc(&PTR_BUCKET(b->c, insert, i)->pin); + + bkey_copy(stack_keys->top, insert); + + bch_cut_back(k, insert); + bch_cut_front(k, stack_keys->top); + + bch_keylist_push(stack_keys); + } + + ret = btree(insert_recurse, k, b, op, stack_keys); + if (ret) + return ret; + } + + if (!bch_keylist_empty(&op->keys)) { + if (should_split(b)) { + if (op->lock <= b->c->root->level) { + BUG_ON(b->level); + op->lock = b->c->root->level + 1; + return -EINTR; + } + return btree_split(b, op); + } + + BUG_ON(write_block(b) != b->sets[b->nsets].data); + + if (bch_btree_insert_keys(b, op)) + bch_btree_write(b, false, op); + } + + return 0; +} + +int bch_btree_insert(struct btree_op *op, struct cache_set *c) +{ + int ret = 0; + struct keylist stack_keys; + + /* + * Don't want to block with the btree locked unless we have to, + * otherwise we get deadlocks with try_harder and between split/gc + */ + clear_closure_blocking(&op->cl); + + BUG_ON(bch_keylist_empty(&op->keys)); + bch_keylist_copy(&stack_keys, &op->keys); + bch_keylist_init(&op->keys); + + while (!bch_keylist_empty(&stack_keys) || + !bch_keylist_empty(&op->keys)) { + if (bch_keylist_empty(&op->keys)) { + bch_keylist_add(&op->keys, + bch_keylist_pop(&stack_keys)); + op->lock = 0; + } + + ret = btree_root(insert_recurse, c, op, &stack_keys); + + if (ret == -EAGAIN) { + ret = 0; + closure_sync(&op->cl); + } else if (ret) { + struct bkey *k; + + pr_err("error %i trying to insert key for %s", + ret, op_type(op)); + + while ((k = bch_keylist_pop(&stack_keys) ?: + bch_keylist_pop(&op->keys))) + bkey_put(c, k, 0); + } + } + + bch_keylist_free(&stack_keys); + + if (op->journal) + atomic_dec_bug(op->journal); + op->journal = NULL; + return ret; +} + +void bch_btree_set_root(struct btree *b) +{ + unsigned i; + + BUG_ON(!b->written); + + for (i = 0; i < KEY_PTRS(&b->key); i++) + BUG_ON(PTR_BUCKET(b->c, &b->key, i)->prio != BTREE_PRIO); + + mutex_lock(&b->c->bucket_lock); + list_del_init(&b->list); + mutex_unlock(&b->c->bucket_lock); + + b->c->root = b; + __bkey_put(b->c, &b->key); + + bch_journal_meta(b->c, NULL); + pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0)); +} + +/* Cache lookup */ + +static int submit_partial_cache_miss(struct btree *b, struct btree_op *op, + struct bkey *k) +{ + struct search *s = container_of(op, struct search, op); + struct bio *bio = &s->bio.bio; + int ret = 0; + + while (!ret && + !op->lookup_done) { + unsigned sectors = INT_MAX; + + if (KEY_INODE(k) == op->inode) { + if (KEY_START(k) <= bio->bi_sector) + break; + + sectors = min_t(uint64_t, sectors, + KEY_START(k) - bio->bi_sector); + } + + ret = s->d->cache_miss(b, s, bio, sectors); + } + + return ret; +} + +/* + * Read from a single key, handling the initial cache miss if the key starts in + * the middle of the bio + */ +static int submit_partial_cache_hit(struct btree *b, struct btree_op *op, + struct bkey *k) +{ + struct search *s = container_of(op, struct search, op); + struct bio *bio = &s->bio.bio; + unsigned ptr; + struct bio *n; + + int ret = submit_partial_cache_miss(b, op, k); + if (ret || op->lookup_done) + return ret; + + /* XXX: figure out best pointer - for multiple cache devices */ + ptr = 0; + + PTR_BUCKET(b->c, k, ptr)->prio = INITIAL_PRIO; + + while (!op->lookup_done && + KEY_INODE(k) == op->inode && + bio->bi_sector < KEY_OFFSET(k)) { + struct bkey *bio_key; + sector_t sector = PTR_OFFSET(k, ptr) + + (bio->bi_sector - KEY_START(k)); + unsigned sectors = min_t(uint64_t, INT_MAX, + KEY_OFFSET(k) - bio->bi_sector); + + n = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); + if (!n) + return -EAGAIN; + + if (n == bio) + op->lookup_done = true; + + bio_key = &container_of(n, struct bbio, bio)->key; + + /* + * The bucket we're reading from might be reused while our bio + * is in flight, and we could then end up reading the wrong + * data. + * + * We guard against this by checking (in cache_read_endio()) if + * the pointer is stale again; if so, we treat it as an error + * and reread from the backing device (but we don't pass that + * error up anywhere). + */ + + bch_bkey_copy_single_ptr(bio_key, k, ptr); + SET_PTR_OFFSET(bio_key, 0, sector); + + n->bi_end_io = bch_cache_read_endio; + n->bi_private = &s->cl; + + trace_bcache_cache_hit(n); + __bch_submit_bbio(n, b->c); + } + + return 0; +} + +int bch_btree_search_recurse(struct btree *b, struct btree_op *op) +{ + struct search *s = container_of(op, struct search, op); + struct bio *bio = &s->bio.bio; + + int ret = 0; + struct bkey *k; + struct btree_iter iter; + bch_btree_iter_init(b, &iter, &KEY(op->inode, bio->bi_sector, 0)); + + pr_debug("at %s searching for %u:%llu", pbtree(b), op->inode, + (uint64_t) bio->bi_sector); + + do { + k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad); + if (!k) { + /* + * b->key would be exactly what we want, except that + * pointers to btree nodes have nonzero size - we + * wouldn't go far enough + */ + + ret = submit_partial_cache_miss(b, op, + &KEY(KEY_INODE(&b->key), + KEY_OFFSET(&b->key), 0)); + break; + } + + ret = b->level + ? btree(search_recurse, k, b, op) + : submit_partial_cache_hit(b, op, k); + } while (!ret && + !op->lookup_done); + + return ret; +} + +/* Keybuf code */ + +static inline int keybuf_cmp(struct keybuf_key *l, struct keybuf_key *r) +{ + /* Overlapping keys compare equal */ + if (bkey_cmp(&l->key, &START_KEY(&r->key)) <= 0) + return -1; + if (bkey_cmp(&START_KEY(&l->key), &r->key) >= 0) + return 1; + return 0; +} + +static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l, + struct keybuf_key *r) +{ + return clamp_t(int64_t, bkey_cmp(&l->key, &r->key), -1, 1); +} + +static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, + struct keybuf *buf, struct bkey *end) +{ + struct btree_iter iter; + bch_btree_iter_init(b, &iter, &buf->last_scanned); + + while (!array_freelist_empty(&buf->freelist)) { + struct bkey *k = bch_btree_iter_next_filter(&iter, b, + bch_ptr_bad); + + if (!b->level) { + if (!k) { + buf->last_scanned = b->key; + break; + } + + buf->last_scanned = *k; + if (bkey_cmp(&buf->last_scanned, end) >= 0) + break; + + if (buf->key_predicate(buf, k)) { + struct keybuf_key *w; + + pr_debug("%s", pkey(k)); + + spin_lock(&buf->lock); + + w = array_alloc(&buf->freelist); + + w->private = NULL; + bkey_copy(&w->key, k); + + if (RB_INSERT(&buf->keys, w, node, keybuf_cmp)) + array_free(&buf->freelist, w); + + spin_unlock(&buf->lock); + } + } else { + if (!k) + break; + + btree(refill_keybuf, k, b, op, buf, end); + /* + * Might get an error here, but can't really do anything + * and it'll get logged elsewhere. Just read what we + * can. + */ + + if (bkey_cmp(&buf->last_scanned, end) >= 0) + break; + + cond_resched(); + } + } + + return 0; +} + +void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf, + struct bkey *end) +{ + struct bkey start = buf->last_scanned; + struct btree_op op; + bch_btree_op_init_stack(&op); + + cond_resched(); + + btree_root(refill_keybuf, c, &op, buf, end); + closure_sync(&op.cl); + + pr_debug("found %s keys from %llu:%llu to %llu:%llu", + RB_EMPTY_ROOT(&buf->keys) ? "no" : + array_freelist_empty(&buf->freelist) ? "some" : "a few", + KEY_INODE(&start), KEY_OFFSET(&start), + KEY_INODE(&buf->last_scanned), KEY_OFFSET(&buf->last_scanned)); + + spin_lock(&buf->lock); + + if (!RB_EMPTY_ROOT(&buf->keys)) { + struct keybuf_key *w; + w = RB_FIRST(&buf->keys, struct keybuf_key, node); + buf->start = START_KEY(&w->key); + + w = RB_LAST(&buf->keys, struct keybuf_key, node); + buf->end = w->key; + } else { + buf->start = MAX_KEY; + buf->end = MAX_KEY; + } + + spin_unlock(&buf->lock); +} + +static void __bch_keybuf_del(struct keybuf *buf, struct keybuf_key *w) +{ + rb_erase(&w->node, &buf->keys); + array_free(&buf->freelist, w); +} + +void bch_keybuf_del(struct keybuf *buf, struct keybuf_key *w) +{ + spin_lock(&buf->lock); + __bch_keybuf_del(buf, w); + spin_unlock(&buf->lock); +} + +bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start, + struct bkey *end) +{ + bool ret = false; + struct keybuf_key *p, *w, s; + s.key = *start; + + if (bkey_cmp(end, &buf->start) <= 0 || + bkey_cmp(start, &buf->end) >= 0) + return false; + + spin_lock(&buf->lock); + w = RB_GREATER(&buf->keys, s, node, keybuf_nonoverlapping_cmp); + + while (w && bkey_cmp(&START_KEY(&w->key), end) < 0) { + p = w; + w = RB_NEXT(w, node); + + if (p->private) + ret = true; + else + __bch_keybuf_del(buf, p); + } + + spin_unlock(&buf->lock); + return ret; +} + +struct keybuf_key *bch_keybuf_next(struct keybuf *buf) +{ + struct keybuf_key *w; + spin_lock(&buf->lock); + + w = RB_FIRST(&buf->keys, struct keybuf_key, node); + + while (w && w->private) + w = RB_NEXT(w, node); + + if (w) + w->private = ERR_PTR(-EINTR); + + spin_unlock(&buf->lock); + return w; +} + +struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c, + struct keybuf *buf, + struct bkey *end) +{ + struct keybuf_key *ret; + + while (1) { + ret = bch_keybuf_next(buf); + if (ret) + break; + + if (bkey_cmp(&buf->last_scanned, end) >= 0) { + pr_debug("scan finished"); + break; + } + + bch_refill_keybuf(c, buf, end); + } + + return ret; +} + +void bch_keybuf_init(struct keybuf *buf, keybuf_pred_fn *fn) +{ + buf->key_predicate = fn; + buf->last_scanned = MAX_KEY; + buf->keys = RB_ROOT; + + spin_lock_init(&buf->lock); + array_allocator_init(&buf->freelist); +} + +void bch_btree_exit(void) +{ + if (btree_io_wq) + destroy_workqueue(btree_io_wq); + if (bch_gc_wq) + destroy_workqueue(bch_gc_wq); +} + +int __init bch_btree_init(void) +{ + if (!(bch_gc_wq = create_singlethread_workqueue("bch_btree_gc")) || + !(btree_io_wq = create_singlethread_workqueue("bch_btree_io"))) + return -ENOMEM; + + return 0; +} diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h new file mode 100644 index 000000000000..af4a7092a28c --- /dev/null +++ b/drivers/md/bcache/btree.h @@ -0,0 +1,405 @@ +#ifndef _BCACHE_BTREE_H +#define _BCACHE_BTREE_H + +/* + * THE BTREE: + * + * At a high level, bcache's btree is relatively standard b+ tree. All keys and + * pointers are in the leaves; interior nodes only have pointers to the child + * nodes. + * + * In the interior nodes, a struct bkey always points to a child btree node, and + * the key is the highest key in the child node - except that the highest key in + * an interior node is always MAX_KEY. The size field refers to the size on disk + * of the child node - this would allow us to have variable sized btree nodes + * (handy for keeping the depth of the btree 1 by expanding just the root). + * + * Btree nodes are themselves log structured, but this is hidden fairly + * thoroughly. Btree nodes on disk will in practice have extents that overlap + * (because they were written at different times), but in memory we never have + * overlapping extents - when we read in a btree node from disk, the first thing + * we do is resort all the sets of keys with a mergesort, and in the same pass + * we check for overlapping extents and adjust them appropriately. + * + * struct btree_op is a central interface to the btree code. It's used for + * specifying read vs. write locking, and the embedded closure is used for + * waiting on IO or reserve memory. + * + * BTREE CACHE: + * + * Btree nodes are cached in memory; traversing the btree might require reading + * in btree nodes which is handled mostly transparently. + * + * bch_btree_node_get() looks up a btree node in the cache and reads it in from + * disk if necessary. This function is almost never called directly though - the + * btree() macro is used to get a btree node, call some function on it, and + * unlock the node after the function returns. + * + * The root is special cased - it's taken out of the cache's lru (thus pinning + * it in memory), so we can find the root of the btree by just dereferencing a + * pointer instead of looking it up in the cache. This makes locking a bit + * tricky, since the root pointer is protected by the lock in the btree node it + * points to - the btree_root() macro handles this. + * + * In various places we must be able to allocate memory for multiple btree nodes + * in order to make forward progress. To do this we use the btree cache itself + * as a reserve; if __get_free_pages() fails, we'll find a node in the btree + * cache we can reuse. We can't allow more than one thread to be doing this at a + * time, so there's a lock, implemented by a pointer to the btree_op closure - + * this allows the btree_root() macro to implicitly release this lock. + * + * BTREE IO: + * + * Btree nodes never have to be explicitly read in; bch_btree_node_get() handles + * this. + * + * For writing, we have two btree_write structs embeddded in struct btree - one + * write in flight, and one being set up, and we toggle between them. + * + * Writing is done with a single function - bch_btree_write() really serves two + * different purposes and should be broken up into two different functions. When + * passing now = false, it merely indicates that the node is now dirty - calling + * it ensures that the dirty keys will be written at some point in the future. + * + * When passing now = true, bch_btree_write() causes a write to happen + * "immediately" (if there was already a write in flight, it'll cause the write + * to happen as soon as the previous write completes). It returns immediately + * though - but it takes a refcount on the closure in struct btree_op you passed + * to it, so a closure_sync() later can be used to wait for the write to + * complete. + * + * This is handy because btree_split() and garbage collection can issue writes + * in parallel, reducing the amount of time they have to hold write locks. + * + * LOCKING: + * + * When traversing the btree, we may need write locks starting at some level - + * inserting a key into the btree will typically only require a write lock on + * the leaf node. + * + * This is specified with the lock field in struct btree_op; lock = 0 means we + * take write locks at level <= 0, i.e. only leaf nodes. bch_btree_node_get() + * checks this field and returns the node with the appropriate lock held. + * + * If, after traversing the btree, the insertion code discovers it has to split + * then it must restart from the root and take new locks - to do this it changes + * the lock field and returns -EINTR, which causes the btree_root() macro to + * loop. + * + * Handling cache misses require a different mechanism for upgrading to a write + * lock. We do cache lookups with only a read lock held, but if we get a cache + * miss and we wish to insert this data into the cache, we have to insert a + * placeholder key to detect races - otherwise, we could race with a write and + * overwrite the data that was just written to the cache with stale data from + * the backing device. + * + * For this we use a sequence number that write locks and unlocks increment - to + * insert the check key it unlocks the btree node and then takes a write lock, + * and fails if the sequence number doesn't match. + */ + +#include "bset.h" +#include "debug.h" + +struct btree_write { + struct closure *owner; + atomic_t *journal; + + /* If btree_split() frees a btree node, it writes a new pointer to that + * btree node indicating it was freed; it takes a refcount on + * c->prio_blocked because we can't write the gens until the new + * pointer is on disk. This allows btree_write_endio() to release the + * refcount that btree_split() took. + */ + int prio_blocked; +}; + +struct btree { + /* Hottest entries first */ + struct hlist_node hash; + + /* Key/pointer for this btree node */ + BKEY_PADDED(key); + + /* Single bit - set when accessed, cleared by shrinker */ + unsigned long accessed; + unsigned long seq; + struct rw_semaphore lock; + struct cache_set *c; + + unsigned long flags; + uint16_t written; /* would be nice to kill */ + uint8_t level; + uint8_t nsets; + uint8_t page_order; + + /* + * Set of sorted keys - the real btree node - plus a binary search tree + * + * sets[0] is special; set[0]->tree, set[0]->prev and set[0]->data point + * to the memory we have allocated for this btree node. Additionally, + * set[0]->data points to the entire btree node as it exists on disk. + */ + struct bset_tree sets[MAX_BSETS]; + + /* Used to refcount bio splits, also protects b->bio */ + struct closure_with_waitlist io; + + /* Gets transferred to w->prio_blocked - see the comment there */ + int prio_blocked; + + struct list_head list; + struct delayed_work work; + + uint64_t io_start_time; + struct btree_write writes[2]; + struct bio *bio; +}; + +#define BTREE_FLAG(flag) \ +static inline bool btree_node_ ## flag(struct btree *b) \ +{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \ + \ +static inline void set_btree_node_ ## flag(struct btree *b) \ +{ set_bit(BTREE_NODE_ ## flag, &b->flags); } \ + +enum btree_flags { + BTREE_NODE_read_done, + BTREE_NODE_io_error, + BTREE_NODE_dirty, + BTREE_NODE_write_idx, +}; + +BTREE_FLAG(read_done); +BTREE_FLAG(io_error); +BTREE_FLAG(dirty); +BTREE_FLAG(write_idx); + +static inline struct btree_write *btree_current_write(struct btree *b) +{ + return b->writes + btree_node_write_idx(b); +} + +static inline struct btree_write *btree_prev_write(struct btree *b) +{ + return b->writes + (btree_node_write_idx(b) ^ 1); +} + +static inline unsigned bset_offset(struct btree *b, struct bset *i) +{ + return (((size_t) i) - ((size_t) b->sets->data)) >> 9; +} + +static inline struct bset *write_block(struct btree *b) +{ + return ((void *) b->sets[0].data) + b->written * block_bytes(b->c); +} + +static inline bool bset_written(struct btree *b, struct bset_tree *t) +{ + return t->data < write_block(b); +} + +static inline bool bkey_written(struct btree *b, struct bkey *k) +{ + return k < write_block(b)->start; +} + +static inline void set_gc_sectors(struct cache_set *c) +{ + atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 8); +} + +static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k) +{ + return __bch_ptr_invalid(b->c, b->level, k); +} + +static inline struct bkey *bch_btree_iter_init(struct btree *b, + struct btree_iter *iter, + struct bkey *search) +{ + return __bch_btree_iter_init(b, iter, search, b->sets); +} + +/* Looping macros */ + +#define for_each_cached_btree(b, c, iter) \ + for (iter = 0; \ + iter < ARRAY_SIZE((c)->bucket_hash); \ + iter++) \ + hlist_for_each_entry_rcu((b), (c)->bucket_hash + iter, hash) + +#define for_each_key_filter(b, k, iter, filter) \ + for (bch_btree_iter_init((b), (iter), NULL); \ + ((k) = bch_btree_iter_next_filter((iter), b, filter));) + +#define for_each_key(b, k, iter) \ + for (bch_btree_iter_init((b), (iter), NULL); \ + ((k) = bch_btree_iter_next(iter));) + +/* Recursing down the btree */ + +struct btree_op { + struct closure cl; + struct cache_set *c; + + /* Journal entry we have a refcount on */ + atomic_t *journal; + + /* Bio to be inserted into the cache */ + struct bio *cache_bio; + + unsigned inode; + + uint16_t write_prio; + + /* Btree level at which we start taking write locks */ + short lock; + + /* Btree insertion type */ + enum { + BTREE_INSERT, + BTREE_REPLACE + } type:8; + + unsigned csum:1; + unsigned skip:1; + unsigned flush_journal:1; + + unsigned insert_data_done:1; + unsigned lookup_done:1; + unsigned insert_collision:1; + + /* Anything after this point won't get zeroed in do_bio_hook() */ + + /* Keys to be inserted */ + struct keylist keys; + BKEY_PADDED(replace); +}; + +void bch_btree_op_init_stack(struct btree_op *); + +static inline void rw_lock(bool w, struct btree *b, int level) +{ + w ? down_write_nested(&b->lock, level + 1) + : down_read_nested(&b->lock, level + 1); + if (w) + b->seq++; +} + +static inline void rw_unlock(bool w, struct btree *b) +{ +#ifdef CONFIG_BCACHE_EDEBUG + unsigned i; + + if (w && + b->key.ptr[0] && + btree_node_read_done(b)) + for (i = 0; i <= b->nsets; i++) + bch_check_key_order(b, b->sets[i].data); +#endif + + if (w) + b->seq++; + (w ? up_write : up_read)(&b->lock); +} + +#define insert_lock(s, b) ((b)->level <= (s)->lock) + +/* + * These macros are for recursing down the btree - they handle the details of + * locking and looking up nodes in the cache for you. They're best treated as + * mere syntax when reading code that uses them. + * + * op->lock determines whether we take a read or a write lock at a given depth. + * If you've got a read lock and find that you need a write lock (i.e. you're + * going to have to split), set op->lock and return -EINTR; btree_root() will + * call you again and you'll have the correct lock. + */ + +/** + * btree - recurse down the btree on a specified key + * @fn: function to call, which will be passed the child node + * @key: key to recurse on + * @b: parent btree node + * @op: pointer to struct btree_op + */ +#define btree(fn, key, b, op, ...) \ +({ \ + int _r, l = (b)->level - 1; \ + bool _w = l <= (op)->lock; \ + struct btree *_b = bch_btree_node_get((b)->c, key, l, op); \ + if (!IS_ERR(_b)) { \ + _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ + rw_unlock(_w, _b); \ + } else \ + _r = PTR_ERR(_b); \ + _r; \ +}) + +/** + * btree_root - call a function on the root of the btree + * @fn: function to call, which will be passed the child node + * @c: cache set + * @op: pointer to struct btree_op + */ +#define btree_root(fn, c, op, ...) \ +({ \ + int _r = -EINTR; \ + do { \ + struct btree *_b = (c)->root; \ + bool _w = insert_lock(op, _b); \ + rw_lock(_w, _b, _b->level); \ + if (_b == (c)->root && \ + _w == insert_lock(op, _b)) \ + _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ + rw_unlock(_w, _b); \ + bch_cannibalize_unlock(c, &(op)->cl); \ + } while (_r == -EINTR); \ + \ + _r; \ +}) + +static inline bool should_split(struct btree *b) +{ + struct bset *i = write_block(b); + return b->written >= btree_blocks(b) || + (i->seq == b->sets[0].data->seq && + b->written + __set_blocks(i, i->keys + 15, b->c) + > btree_blocks(b)); +} + +void bch_btree_read_done(struct closure *); +void bch_btree_read(struct btree *); +void bch_btree_write(struct btree *b, bool now, struct btree_op *op); + +void bch_cannibalize_unlock(struct cache_set *, struct closure *); +void bch_btree_set_root(struct btree *); +struct btree *bch_btree_node_alloc(struct cache_set *, int, struct closure *); +struct btree *bch_btree_node_get(struct cache_set *, struct bkey *, + int, struct btree_op *); + +bool bch_btree_insert_keys(struct btree *, struct btree_op *); +bool bch_btree_insert_check_key(struct btree *, struct btree_op *, + struct bio *); +int bch_btree_insert(struct btree_op *, struct cache_set *); + +int bch_btree_search_recurse(struct btree *, struct btree_op *); + +void bch_queue_gc(struct cache_set *); +size_t bch_btree_gc_finish(struct cache_set *); +void bch_moving_gc(struct closure *); +int bch_btree_check(struct cache_set *, struct btree_op *); +uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *); + +void bch_keybuf_init(struct keybuf *, keybuf_pred_fn *); +void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *); +bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *, + struct bkey *); +void bch_keybuf_del(struct keybuf *, struct keybuf_key *); +struct keybuf_key *bch_keybuf_next(struct keybuf *); +struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, + struct keybuf *, struct bkey *); + +#endif diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c new file mode 100644 index 000000000000..d6fbec0f8484 --- /dev/null +++ b/drivers/md/bcache/closure.c @@ -0,0 +1,348 @@ +/* + * Asynchronous refcounty things + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include +#include +#include + +#include "closure.h" + +void closure_queue(struct closure *cl) +{ + struct workqueue_struct *wq = cl->wq; + if (wq) { + INIT_WORK(&cl->work, cl->work.func); + BUG_ON(!queue_work(wq, &cl->work)); + } else + cl->fn(cl); +} +EXPORT_SYMBOL_GPL(closure_queue); + +#define CL_FIELD(type, field) \ + case TYPE_ ## type: \ + return &container_of(cl, struct type, cl)->field + +static struct closure_waitlist *closure_waitlist(struct closure *cl) +{ + switch (cl->type) { + CL_FIELD(closure_with_waitlist, wait); + CL_FIELD(closure_with_waitlist_and_timer, wait); + default: + return NULL; + } +} + +static struct timer_list *closure_timer(struct closure *cl) +{ + switch (cl->type) { + CL_FIELD(closure_with_timer, timer); + CL_FIELD(closure_with_waitlist_and_timer, timer); + default: + return NULL; + } +} + +static inline void closure_put_after_sub(struct closure *cl, int flags) +{ + int r = flags & CLOSURE_REMAINING_MASK; + + BUG_ON(flags & CLOSURE_GUARD_MASK); + BUG_ON(!r && (flags & ~(CLOSURE_DESTRUCTOR|CLOSURE_BLOCKING))); + + /* Must deliver precisely one wakeup */ + if (r == 1 && (flags & CLOSURE_SLEEPING)) + wake_up_process(cl->task); + + if (!r) { + if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) { + /* CLOSURE_BLOCKING might be set - clear it */ + atomic_set(&cl->remaining, + CLOSURE_REMAINING_INITIALIZER); + closure_queue(cl); + } else { + struct closure *parent = cl->parent; + struct closure_waitlist *wait = closure_waitlist(cl); + + closure_debug_destroy(cl); + + atomic_set(&cl->remaining, -1); + + if (wait) + closure_wake_up(wait); + + if (cl->fn) + cl->fn(cl); + + if (parent) + closure_put(parent); + } + } +} + +/* For clearing flags with the same atomic op as a put */ +void closure_sub(struct closure *cl, int v) +{ + closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining)); +} +EXPORT_SYMBOL_GPL(closure_sub); + +void closure_put(struct closure *cl) +{ + closure_put_after_sub(cl, atomic_dec_return(&cl->remaining)); +} +EXPORT_SYMBOL_GPL(closure_put); + +static void set_waiting(struct closure *cl, unsigned long f) +{ +#ifdef CONFIG_BCACHE_CLOSURES_DEBUG + cl->waiting_on = f; +#endif +} + +void __closure_wake_up(struct closure_waitlist *wait_list) +{ + struct llist_node *list; + struct closure *cl; + struct llist_node *reverse = NULL; + + list = llist_del_all(&wait_list->list); + + /* We first reverse the list to preserve FIFO ordering and fairness */ + + while (list) { + struct llist_node *t = list; + list = llist_next(list); + + t->next = reverse; + reverse = t; + } + + /* Then do the wakeups */ + + while (reverse) { + cl = container_of(reverse, struct closure, list); + reverse = llist_next(reverse); + + set_waiting(cl, 0); + closure_sub(cl, CLOSURE_WAITING + 1); + } +} +EXPORT_SYMBOL_GPL(__closure_wake_up); + +bool closure_wait(struct closure_waitlist *list, struct closure *cl) +{ + if (atomic_read(&cl->remaining) & CLOSURE_WAITING) + return false; + + set_waiting(cl, _RET_IP_); + atomic_add(CLOSURE_WAITING + 1, &cl->remaining); + llist_add(&cl->list, &list->list); + + return true; +} +EXPORT_SYMBOL_GPL(closure_wait); + +/** + * closure_sync() - sleep until a closure a closure has nothing left to wait on + * + * Sleeps until the refcount hits 1 - the thread that's running the closure owns + * the last refcount. + */ +void closure_sync(struct closure *cl) +{ + while (1) { + __closure_start_sleep(cl); + closure_set_ret_ip(cl); + + if ((atomic_read(&cl->remaining) & + CLOSURE_REMAINING_MASK) == 1) + break; + + schedule(); + } + + __closure_end_sleep(cl); +} +EXPORT_SYMBOL_GPL(closure_sync); + +/** + * closure_trylock() - try to acquire the closure, without waiting + * @cl: closure to lock + * + * Returns true if the closure was succesfully locked. + */ +bool closure_trylock(struct closure *cl, struct closure *parent) +{ + if (atomic_cmpxchg(&cl->remaining, -1, + CLOSURE_REMAINING_INITIALIZER) != -1) + return false; + + closure_set_ret_ip(cl); + + smp_mb(); + cl->parent = parent; + if (parent) + closure_get(parent); + + closure_debug_create(cl); + return true; +} +EXPORT_SYMBOL_GPL(closure_trylock); + +void __closure_lock(struct closure *cl, struct closure *parent, + struct closure_waitlist *wait_list) +{ + struct closure wait; + closure_init_stack(&wait); + + while (1) { + if (closure_trylock(cl, parent)) + return; + + closure_wait_event_sync(wait_list, &wait, + atomic_read(&cl->remaining) == -1); + } +} +EXPORT_SYMBOL_GPL(__closure_lock); + +static void closure_delay_timer_fn(unsigned long data) +{ + struct closure *cl = (struct closure *) data; + closure_sub(cl, CLOSURE_TIMER + 1); +} + +void do_closure_timer_init(struct closure *cl) +{ + struct timer_list *timer = closure_timer(cl); + + init_timer(timer); + timer->data = (unsigned long) cl; + timer->function = closure_delay_timer_fn; +} +EXPORT_SYMBOL_GPL(do_closure_timer_init); + +bool __closure_delay(struct closure *cl, unsigned long delay, + struct timer_list *timer) +{ + if (atomic_read(&cl->remaining) & CLOSURE_TIMER) + return false; + + BUG_ON(timer_pending(timer)); + + timer->expires = jiffies + delay; + + atomic_add(CLOSURE_TIMER + 1, &cl->remaining); + add_timer(timer); + return true; +} +EXPORT_SYMBOL_GPL(__closure_delay); + +void __closure_flush(struct closure *cl, struct timer_list *timer) +{ + if (del_timer(timer)) + closure_sub(cl, CLOSURE_TIMER + 1); +} +EXPORT_SYMBOL_GPL(__closure_flush); + +void __closure_flush_sync(struct closure *cl, struct timer_list *timer) +{ + if (del_timer_sync(timer)) + closure_sub(cl, CLOSURE_TIMER + 1); +} +EXPORT_SYMBOL_GPL(__closure_flush_sync); + +#ifdef CONFIG_BCACHE_CLOSURES_DEBUG + +static LIST_HEAD(closure_list); +static DEFINE_SPINLOCK(closure_list_lock); + +void closure_debug_create(struct closure *cl) +{ + unsigned long flags; + + BUG_ON(cl->magic == CLOSURE_MAGIC_ALIVE); + cl->magic = CLOSURE_MAGIC_ALIVE; + + spin_lock_irqsave(&closure_list_lock, flags); + list_add(&cl->all, &closure_list); + spin_unlock_irqrestore(&closure_list_lock, flags); +} +EXPORT_SYMBOL_GPL(closure_debug_create); + +void closure_debug_destroy(struct closure *cl) +{ + unsigned long flags; + + BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE); + cl->magic = CLOSURE_MAGIC_DEAD; + + spin_lock_irqsave(&closure_list_lock, flags); + list_del(&cl->all); + spin_unlock_irqrestore(&closure_list_lock, flags); +} +EXPORT_SYMBOL_GPL(closure_debug_destroy); + +static struct dentry *debug; + +#define work_data_bits(work) ((unsigned long *)(&(work)->data)) + +static int debug_seq_show(struct seq_file *f, void *data) +{ + struct closure *cl; + spin_lock_irq(&closure_list_lock); + + list_for_each_entry(cl, &closure_list, all) { + int r = atomic_read(&cl->remaining); + + seq_printf(f, "%p: %pF -> %pf p %p r %i ", + cl, (void *) cl->ip, cl->fn, cl->parent, + r & CLOSURE_REMAINING_MASK); + + seq_printf(f, "%s%s%s%s%s%s\n", + test_bit(WORK_STRUCT_PENDING, + work_data_bits(&cl->work)) ? "Q" : "", + r & CLOSURE_RUNNING ? "R" : "", + r & CLOSURE_BLOCKING ? "B" : "", + r & CLOSURE_STACK ? "S" : "", + r & CLOSURE_SLEEPING ? "Sl" : "", + r & CLOSURE_TIMER ? "T" : ""); + + if (r & CLOSURE_WAITING) + seq_printf(f, " W %pF\n", + (void *) cl->waiting_on); + + seq_printf(f, "\n"); + } + + spin_unlock_irq(&closure_list_lock); + return 0; +} + +static int debug_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, debug_seq_show, NULL); +} + +static const struct file_operations debug_ops = { + .owner = THIS_MODULE, + .open = debug_seq_open, + .read = seq_read, + .release = single_release +}; + +int __init closure_debug_init(void) +{ + debug = debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops); + return 0; +} + +module_init(closure_debug_init); + +#endif + +MODULE_AUTHOR("Kent Overstreet "); +MODULE_LICENSE("GPL"); diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h new file mode 100644 index 000000000000..3f31d599ea56 --- /dev/null +++ b/drivers/md/bcache/closure.h @@ -0,0 +1,670 @@ +#ifndef _LINUX_CLOSURE_H +#define _LINUX_CLOSURE_H + +#include +#include +#include + +/* + * Closure is perhaps the most overused and abused term in computer science, but + * since I've been unable to come up with anything better you're stuck with it + * again. + * + * What are closures? + * + * They embed a refcount. The basic idea is they count "things that are in + * progress" - in flight bios, some other thread that's doing something else - + * anything you might want to wait on. + * + * The refcount may be manipulated with closure_get() and closure_put(). + * closure_put() is where many of the interesting things happen, when it causes + * the refcount to go to 0. + * + * Closures can be used to wait on things both synchronously and asynchronously, + * and synchronous and asynchronous use can be mixed without restriction. To + * wait synchronously, use closure_sync() - you will sleep until your closure's + * refcount hits 1. + * + * To wait asynchronously, use + * continue_at(cl, next_function, workqueue); + * + * passing it, as you might expect, the function to run when nothing is pending + * and the workqueue to run that function out of. + * + * continue_at() also, critically, is a macro that returns the calling function. + * There's good reason for this. + * + * To use safely closures asynchronously, they must always have a refcount while + * they are running owned by the thread that is running them. Otherwise, suppose + * you submit some bios and wish to have a function run when they all complete: + * + * foo_endio(struct bio *bio, int error) + * { + * closure_put(cl); + * } + * + * closure_init(cl); + * + * do_stuff(); + * closure_get(cl); + * bio1->bi_endio = foo_endio; + * bio_submit(bio1); + * + * do_more_stuff(); + * closure_get(cl); + * bio2->bi_endio = foo_endio; + * bio_submit(bio2); + * + * continue_at(cl, complete_some_read, system_wq); + * + * If closure's refcount started at 0, complete_some_read() could run before the + * second bio was submitted - which is almost always not what you want! More + * importantly, it wouldn't be possible to say whether the original thread or + * complete_some_read()'s thread owned the closure - and whatever state it was + * associated with! + * + * So, closure_init() initializes a closure's refcount to 1 - and when a + * closure_fn is run, the refcount will be reset to 1 first. + * + * Then, the rule is - if you got the refcount with closure_get(), release it + * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount + * on a closure because you called closure_init() or you were run out of a + * closure - _always_ use continue_at(). Doing so consistently will help + * eliminate an entire class of particularly pernicious races. + * + * For a closure to wait on an arbitrary event, we need to introduce waitlists: + * + * struct closure_waitlist list; + * closure_wait_event(list, cl, condition); + * closure_wake_up(wait_list); + * + * These work analagously to wait_event() and wake_up() - except that instead of + * operating on the current thread (for wait_event()) and lists of threads, they + * operate on an explicit closure and lists of closures. + * + * Because it's a closure we can now wait either synchronously or + * asynchronously. closure_wait_event() returns the current value of the + * condition, and if it returned false continue_at() or closure_sync() can be + * used to wait for it to become true. + * + * It's useful for waiting on things when you can't sleep in the context in + * which you must check the condition (perhaps a spinlock held, or you might be + * beneath generic_make_request() - in which case you can't sleep on IO). + * + * closure_wait_event() will wait either synchronously or asynchronously, + * depending on whether the closure is in blocking mode or not. You can pick a + * mode explicitly with closure_wait_event_sync() and + * closure_wait_event_async(), which do just what you might expect. + * + * Lastly, you might have a wait list dedicated to a specific event, and have no + * need for specifying the condition - you just want to wait until someone runs + * closure_wake_up() on the appropriate wait list. In that case, just use + * closure_wait(). It will return either true or false, depending on whether the + * closure was already on a wait list or not - a closure can only be on one wait + * list at a time. + * + * Parents: + * + * closure_init() takes two arguments - it takes the closure to initialize, and + * a (possibly null) parent. + * + * If parent is non null, the new closure will have a refcount for its lifetime; + * a closure is considered to be "finished" when its refcount hits 0 and the + * function to run is null. Hence + * + * continue_at(cl, NULL, NULL); + * + * returns up the (spaghetti) stack of closures, precisely like normal return + * returns up the C stack. continue_at() with non null fn is better thought of + * as doing a tail call. + * + * All this implies that a closure should typically be embedded in a particular + * struct (which its refcount will normally control the lifetime of), and that + * struct can very much be thought of as a stack frame. + * + * Locking: + * + * Closures are based on work items but they can be thought of as more like + * threads - in that like threads and unlike work items they have a well + * defined lifetime; they are created (with closure_init()) and eventually + * complete after a continue_at(cl, NULL, NULL). + * + * Suppose you've got some larger structure with a closure embedded in it that's + * used for periodically doing garbage collection. You only want one garbage + * collection happening at a time, so the natural thing to do is protect it with + * a lock. However, it's difficult to use a lock protecting a closure correctly + * because the unlock should come after the last continue_to() (additionally, if + * you're using the closure asynchronously a mutex won't work since a mutex has + * to be unlocked by the same process that locked it). + * + * So to make it less error prone and more efficient, we also have the ability + * to use closures as locks: + * + * closure_init_unlocked(); + * closure_trylock(); + * + * That's all we need for trylock() - the last closure_put() implicitly unlocks + * it for you. But for closure_lock(), we also need a wait list: + * + * struct closure_with_waitlist frobnicator_cl; + * + * closure_init_unlocked(&frobnicator_cl); + * closure_lock(&frobnicator_cl); + * + * A closure_with_waitlist embeds a closure and a wait list - much like struct + * delayed_work embeds a work item and a timer_list. The important thing is, use + * it exactly like you would a regular closure and closure_put() will magically + * handle everything for you. + * + * We've got closures that embed timers, too. They're called, appropriately + * enough: + * struct closure_with_timer; + * + * This gives you access to closure_delay(). It takes a refcount for a specified + * number of jiffies - you could then call closure_sync() (for a slightly + * convoluted version of msleep()) or continue_at() - which gives you the same + * effect as using a delayed work item, except you can reuse the work_struct + * already embedded in struct closure. + * + * Lastly, there's struct closure_with_waitlist_and_timer. It does what you + * probably expect, if you happen to need the features of both. (You don't + * really want to know how all this is implemented, but if I've done my job + * right you shouldn't have to care). + */ + +struct closure; +typedef void (closure_fn) (struct closure *); + +struct closure_waitlist { + struct llist_head list; +}; + +enum closure_type { + TYPE_closure = 0, + TYPE_closure_with_waitlist = 1, + TYPE_closure_with_timer = 2, + TYPE_closure_with_waitlist_and_timer = 3, + MAX_CLOSURE_TYPE = 3, +}; + +enum closure_state { + /* + * CLOSURE_BLOCKING: Causes closure_wait_event() to block, instead of + * waiting asynchronously + * + * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by + * the thread that owns the closure, and cleared by the thread that's + * waking up the closure. + * + * CLOSURE_SLEEPING: Must be set before a thread uses a closure to sleep + * - indicates that cl->task is valid and closure_put() may wake it up. + * Only set or cleared by the thread that owns the closure. + * + * CLOSURE_TIMER: Analagous to CLOSURE_WAITING, indicates that a closure + * has an outstanding timer. Must be set by the thread that owns the + * closure, and cleared by the timer function when the timer goes off. + * + * The rest are for debugging and don't affect behaviour: + * + * CLOSURE_RUNNING: Set when a closure is running (i.e. by + * closure_init() and when closure_put() runs then next function), and + * must be cleared before remaining hits 0. Primarily to help guard + * against incorrect usage and accidentally transferring references. + * continue_at() and closure_return() clear it for you, if you're doing + * something unusual you can use closure_set_dead() which also helps + * annotate where references are being transferred. + * + * CLOSURE_STACK: Sanity check - remaining should never hit 0 on a + * closure with this flag set + */ + + CLOSURE_BITS_START = (1 << 19), + CLOSURE_DESTRUCTOR = (1 << 19), + CLOSURE_BLOCKING = (1 << 21), + CLOSURE_WAITING = (1 << 23), + CLOSURE_SLEEPING = (1 << 25), + CLOSURE_TIMER = (1 << 27), + CLOSURE_RUNNING = (1 << 29), + CLOSURE_STACK = (1 << 31), +}; + +#define CLOSURE_GUARD_MASK \ + ((CLOSURE_DESTRUCTOR|CLOSURE_BLOCKING|CLOSURE_WAITING| \ + CLOSURE_SLEEPING|CLOSURE_TIMER|CLOSURE_RUNNING|CLOSURE_STACK) << 1) + +#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1) +#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING) + +struct closure { + union { + struct { + struct workqueue_struct *wq; + struct task_struct *task; + struct llist_node list; + closure_fn *fn; + }; + struct work_struct work; + }; + + struct closure *parent; + + atomic_t remaining; + + enum closure_type type; + +#ifdef CONFIG_BCACHE_CLOSURES_DEBUG +#define CLOSURE_MAGIC_DEAD 0xc054dead +#define CLOSURE_MAGIC_ALIVE 0xc054a11e + + unsigned magic; + struct list_head all; + unsigned long ip; + unsigned long waiting_on; +#endif +}; + +struct closure_with_waitlist { + struct closure cl; + struct closure_waitlist wait; +}; + +struct closure_with_timer { + struct closure cl; + struct timer_list timer; +}; + +struct closure_with_waitlist_and_timer { + struct closure cl; + struct closure_waitlist wait; + struct timer_list timer; +}; + +extern unsigned invalid_closure_type(void); + +#define __CLOSURE_TYPE(cl, _t) \ + __builtin_types_compatible_p(typeof(cl), struct _t) \ + ? TYPE_ ## _t : \ + +#define __closure_type(cl) \ +( \ + __CLOSURE_TYPE(cl, closure) \ + __CLOSURE_TYPE(cl, closure_with_waitlist) \ + __CLOSURE_TYPE(cl, closure_with_timer) \ + __CLOSURE_TYPE(cl, closure_with_waitlist_and_timer) \ + invalid_closure_type() \ +) + +void closure_sub(struct closure *cl, int v); +void closure_put(struct closure *cl); +void closure_queue(struct closure *cl); +void __closure_wake_up(struct closure_waitlist *list); +bool closure_wait(struct closure_waitlist *list, struct closure *cl); +void closure_sync(struct closure *cl); + +bool closure_trylock(struct closure *cl, struct closure *parent); +void __closure_lock(struct closure *cl, struct closure *parent, + struct closure_waitlist *wait_list); + +void do_closure_timer_init(struct closure *cl); +bool __closure_delay(struct closure *cl, unsigned long delay, + struct timer_list *timer); +void __closure_flush(struct closure *cl, struct timer_list *timer); +void __closure_flush_sync(struct closure *cl, struct timer_list *timer); + +#ifdef CONFIG_BCACHE_CLOSURES_DEBUG + +void closure_debug_create(struct closure *cl); +void closure_debug_destroy(struct closure *cl); + +#else + +static inline void closure_debug_create(struct closure *cl) {} +static inline void closure_debug_destroy(struct closure *cl) {} + +#endif + +static inline void closure_set_ip(struct closure *cl) +{ +#ifdef CONFIG_BCACHE_CLOSURES_DEBUG + cl->ip = _THIS_IP_; +#endif +} + +static inline void closure_set_ret_ip(struct closure *cl) +{ +#ifdef CONFIG_BCACHE_CLOSURES_DEBUG + cl->ip = _RET_IP_; +#endif +} + +static inline void closure_get(struct closure *cl) +{ +#ifdef CONFIG_BCACHE_CLOSURES_DEBUG + BUG_ON((atomic_inc_return(&cl->remaining) & + CLOSURE_REMAINING_MASK) <= 1); +#else + atomic_inc(&cl->remaining); +#endif +} + +static inline void closure_set_stopped(struct closure *cl) +{ + atomic_sub(CLOSURE_RUNNING, &cl->remaining); +} + +static inline bool closure_is_stopped(struct closure *cl) +{ + return !(atomic_read(&cl->remaining) & CLOSURE_RUNNING); +} + +static inline bool closure_is_unlocked(struct closure *cl) +{ + return atomic_read(&cl->remaining) == -1; +} + +static inline void do_closure_init(struct closure *cl, struct closure *parent, + bool running) +{ + switch (cl->type) { + case TYPE_closure_with_timer: + case TYPE_closure_with_waitlist_and_timer: + do_closure_timer_init(cl); + default: + break; + } + + cl->parent = parent; + if (parent) + closure_get(parent); + + if (running) { + closure_debug_create(cl); + atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); + } else + atomic_set(&cl->remaining, -1); + + closure_set_ip(cl); +} + +/* + * Hack to get at the embedded closure if there is one, by doing an unsafe cast: + * the result of __closure_type() is thrown away, it's used merely for type + * checking. + */ +#define __to_internal_closure(cl) \ +({ \ + BUILD_BUG_ON(__closure_type(*cl) > MAX_CLOSURE_TYPE); \ + (struct closure *) cl; \ +}) + +#define closure_init_type(cl, parent, running) \ +do { \ + struct closure *_cl = __to_internal_closure(cl); \ + _cl->type = __closure_type(*(cl)); \ + do_closure_init(_cl, parent, running); \ +} while (0) + +/** + * __closure_init() - Initialize a closure, skipping the memset() + * + * May be used instead of closure_init() when memory has already been zeroed. + */ +#define __closure_init(cl, parent) \ + closure_init_type(cl, parent, true) + +/** + * closure_init() - Initialize a closure, setting the refcount to 1 + * @cl: closure to initialize + * @parent: parent of the new closure. cl will take a refcount on it for its + * lifetime; may be NULL. + */ +#define closure_init(cl, parent) \ +do { \ + memset((cl), 0, sizeof(*(cl))); \ + __closure_init(cl, parent); \ +} while (0) + +static inline void closure_init_stack(struct closure *cl) +{ + memset(cl, 0, sizeof(struct closure)); + atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER| + CLOSURE_BLOCKING|CLOSURE_STACK); +} + +/** + * closure_init_unlocked() - Initialize a closure but leave it unlocked. + * @cl: closure to initialize + * + * For when the closure will be used as a lock. The closure may not be used + * until after a closure_lock() or closure_trylock(). + */ +#define closure_init_unlocked(cl) \ +do { \ + memset((cl), 0, sizeof(*(cl))); \ + closure_init_type(cl, NULL, false); \ +} while (0) + +/** + * closure_lock() - lock and initialize a closure. + * @cl: the closure to lock + * @parent: the new parent for this closure + * + * The closure must be of one of the types that has a waitlist (otherwise we + * wouldn't be able to sleep on contention). + * + * @parent has exactly the same meaning as in closure_init(); if non null, the + * closure will take a reference on @parent which will be released when it is + * unlocked. + */ +#define closure_lock(cl, parent) \ + __closure_lock(__to_internal_closure(cl), parent, &(cl)->wait) + +/** + * closure_delay() - delay some number of jiffies + * @cl: the closure that will sleep + * @delay: the delay in jiffies + * + * Takes a refcount on @cl which will be released after @delay jiffies; this may + * be used to have a function run after a delay with continue_at(), or + * closure_sync() may be used for a convoluted version of msleep(). + */ +#define closure_delay(cl, delay) \ + __closure_delay(__to_internal_closure(cl), delay, &(cl)->timer) + +#define closure_flush(cl) \ + __closure_flush(__to_internal_closure(cl), &(cl)->timer) + +#define closure_flush_sync(cl) \ + __closure_flush_sync(__to_internal_closure(cl), &(cl)->timer) + +static inline void __closure_end_sleep(struct closure *cl) +{ + __set_current_state(TASK_RUNNING); + + if (atomic_read(&cl->remaining) & CLOSURE_SLEEPING) + atomic_sub(CLOSURE_SLEEPING, &cl->remaining); +} + +static inline void __closure_start_sleep(struct closure *cl) +{ + closure_set_ip(cl); + cl->task = current; + set_current_state(TASK_UNINTERRUPTIBLE); + + if (!(atomic_read(&cl->remaining) & CLOSURE_SLEEPING)) + atomic_add(CLOSURE_SLEEPING, &cl->remaining); +} + +/** + * closure_blocking() - returns true if the closure is in blocking mode. + * + * If a closure is in blocking mode, closure_wait_event() will sleep until the + * condition is true instead of waiting asynchronously. + */ +static inline bool closure_blocking(struct closure *cl) +{ + return atomic_read(&cl->remaining) & CLOSURE_BLOCKING; +} + +/** + * set_closure_blocking() - put a closure in blocking mode. + * + * If a closure is in blocking mode, closure_wait_event() will sleep until the + * condition is true instead of waiting asynchronously. + * + * Not thread safe - can only be called by the thread running the closure. + */ +static inline void set_closure_blocking(struct closure *cl) +{ + if (!closure_blocking(cl)) + atomic_add(CLOSURE_BLOCKING, &cl->remaining); +} + +/* + * Not thread safe - can only be called by the thread running the closure. + */ +static inline void clear_closure_blocking(struct closure *cl) +{ + if (closure_blocking(cl)) + atomic_sub(CLOSURE_BLOCKING, &cl->remaining); +} + +/** + * closure_wake_up() - wake up all closures on a wait list. + */ +static inline void closure_wake_up(struct closure_waitlist *list) +{ + smp_mb(); + __closure_wake_up(list); +} + +/* + * Wait on an event, synchronously or asynchronously - analogous to wait_event() + * but for closures. + * + * The loop is oddly structured so as to avoid a race; we must check the + * condition again after we've added ourself to the waitlist. We know if we were + * already on the waitlist because closure_wait() returns false; thus, we only + * schedule or break if closure_wait() returns false. If it returns true, we + * just loop again - rechecking the condition. + * + * The __closure_wake_up() is necessary because we may race with the event + * becoming true; i.e. we see event false -> wait -> recheck condition, but the + * thread that made the event true may have called closure_wake_up() before we + * added ourself to the wait list. + * + * We have to call closure_sync() at the end instead of just + * __closure_end_sleep() because a different thread might've called + * closure_wake_up() before us and gotten preempted before they dropped the + * refcount on our closure. If this was a stack allocated closure, that would be + * bad. + */ +#define __closure_wait_event(list, cl, condition, _block) \ +({ \ + bool block = _block; \ + typeof(condition) ret; \ + \ + while (1) { \ + ret = (condition); \ + if (ret) { \ + __closure_wake_up(list); \ + if (block) \ + closure_sync(cl); \ + \ + break; \ + } \ + \ + if (block) \ + __closure_start_sleep(cl); \ + \ + if (!closure_wait(list, cl)) { \ + if (!block) \ + break; \ + \ + schedule(); \ + } \ + } \ + \ + ret; \ +}) + +/** + * closure_wait_event() - wait on a condition, synchronously or asynchronously. + * @list: the wait list to wait on + * @cl: the closure that is doing the waiting + * @condition: a C expression for the event to wait for + * + * If the closure is in blocking mode, sleeps until the @condition evaluates to + * true - exactly like wait_event(). + * + * If the closure is not in blocking mode, waits asynchronously; if the + * condition is currently false the @cl is put onto @list and returns. @list + * owns a refcount on @cl; closure_sync() or continue_at() may be used later to + * wait for another thread to wake up @list, which drops the refcount on @cl. + * + * Returns the value of @condition; @cl will be on @list iff @condition was + * false. + * + * closure_wake_up(@list) must be called after changing any variable that could + * cause @condition to become true. + */ +#define closure_wait_event(list, cl, condition) \ + __closure_wait_event(list, cl, condition, closure_blocking(cl)) + +#define closure_wait_event_async(list, cl, condition) \ + __closure_wait_event(list, cl, condition, false) + +#define closure_wait_event_sync(list, cl, condition) \ + __closure_wait_event(list, cl, condition, true) + +static inline void set_closure_fn(struct closure *cl, closure_fn *fn, + struct workqueue_struct *wq) +{ + BUG_ON(object_is_on_stack(cl)); + closure_set_ip(cl); + cl->fn = fn; + cl->wq = wq; + /* between atomic_dec() in closure_put() */ + smp_mb__before_atomic_dec(); +} + +#define continue_at(_cl, _fn, _wq) \ +do { \ + set_closure_fn(_cl, _fn, _wq); \ + closure_sub(_cl, CLOSURE_RUNNING + 1); \ + return; \ +} while (0) + +#define closure_return(_cl) continue_at((_cl), NULL, NULL) + +#define continue_at_nobarrier(_cl, _fn, _wq) \ +do { \ + set_closure_fn(_cl, _fn, _wq); \ + closure_queue(cl); \ + return; \ +} while (0) + +#define closure_return_with_destructor(_cl, _destructor) \ +do { \ + set_closure_fn(_cl, _destructor, NULL); \ + closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \ + return; \ +} while (0) + +static inline void closure_call(struct closure *cl, closure_fn fn, + struct workqueue_struct *wq, + struct closure *parent) +{ + closure_init(cl, parent); + continue_at_nobarrier(cl, fn, wq); +} + +static inline void closure_trylock_call(struct closure *cl, closure_fn fn, + struct workqueue_struct *wq, + struct closure *parent) +{ + if (closure_trylock(cl, parent)) + continue_at_nobarrier(cl, fn, wq); +} + +#endif /* _LINUX_CLOSURE_H */ diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c new file mode 100644 index 000000000000..4b37ef2b80e5 --- /dev/null +++ b/drivers/md/bcache/debug.c @@ -0,0 +1,563 @@ +/* + * Assorted bcache debug code + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" +#include "request.h" + +#include +#include +#include +#include +#include + +static struct dentry *debug; + +const char *bch_ptr_status(struct cache_set *c, const struct bkey *k) +{ + unsigned i; + + for (i = 0; i < KEY_PTRS(k); i++) + if (ptr_available(c, k, i)) { + struct cache *ca = PTR_CACHE(c, k, i); + size_t bucket = PTR_BUCKET_NR(c, k, i); + size_t r = bucket_remainder(c, PTR_OFFSET(k, i)); + + if (KEY_SIZE(k) + r > c->sb.bucket_size) + return "bad, length too big"; + if (bucket < ca->sb.first_bucket) + return "bad, short offset"; + if (bucket >= ca->sb.nbuckets) + return "bad, offset past end of device"; + if (ptr_stale(c, k, i)) + return "stale"; + } + + if (!bkey_cmp(k, &ZERO_KEY)) + return "bad, null key"; + if (!KEY_PTRS(k)) + return "bad, no pointers"; + if (!KEY_SIZE(k)) + return "zeroed key"; + return ""; +} + +struct keyprint_hack bch_pkey(const struct bkey *k) +{ + unsigned i = 0; + struct keyprint_hack r; + char *out = r.s, *end = r.s + KEYHACK_SIZE; + +#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__)) + + p("%llu:%llu len %llu -> [", KEY_INODE(k), KEY_OFFSET(k), KEY_SIZE(k)); + + if (KEY_PTRS(k)) + while (1) { + p("%llu:%llu gen %llu", + PTR_DEV(k, i), PTR_OFFSET(k, i), PTR_GEN(k, i)); + + if (++i == KEY_PTRS(k)) + break; + + p(", "); + } + + p("]"); + + if (KEY_DIRTY(k)) + p(" dirty"); + if (KEY_CSUM(k)) + p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]); +#undef p + return r; +} + +struct keyprint_hack bch_pbtree(const struct btree *b) +{ + struct keyprint_hack r; + + snprintf(r.s, 40, "%li level %i/%i", PTR_BUCKET_NR(b->c, &b->key, 0), + b->level, b->c->root ? b->c->root->level : -1); + return r; +} + +#if defined(CONFIG_BCACHE_DEBUG) || defined(CONFIG_BCACHE_EDEBUG) + +static bool skipped_backwards(struct btree *b, struct bkey *k) +{ + return bkey_cmp(k, (!b->level) + ? &START_KEY(bkey_next(k)) + : bkey_next(k)) > 0; +} + +static void dump_bset(struct btree *b, struct bset *i) +{ + struct bkey *k; + unsigned j; + + for (k = i->start; k < end(i); k = bkey_next(k)) { + printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b), + (uint64_t *) k - i->d, i->keys, pkey(k)); + + for (j = 0; j < KEY_PTRS(k); j++) { + size_t n = PTR_BUCKET_NR(b->c, k, j); + printk(" bucket %zu", n); + + if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets) + printk(" prio %i", + PTR_BUCKET(b->c, k, j)->prio); + } + + printk(" %s\n", bch_ptr_status(b->c, k)); + + if (bkey_next(k) < end(i) && + skipped_backwards(b, k)) + printk(KERN_ERR "Key skipped backwards\n"); + } +} + +#endif + +#ifdef CONFIG_BCACHE_DEBUG + +void bch_btree_verify(struct btree *b, struct bset *new) +{ + struct btree *v = b->c->verify_data; + struct closure cl; + closure_init_stack(&cl); + + if (!b->c->verify) + return; + + closure_wait_event(&b->io.wait, &cl, + atomic_read(&b->io.cl.remaining) == -1); + + mutex_lock(&b->c->verify_lock); + + bkey_copy(&v->key, &b->key); + v->written = 0; + v->level = b->level; + + bch_btree_read(v); + closure_wait_event(&v->io.wait, &cl, + atomic_read(&b->io.cl.remaining) == -1); + + if (new->keys != v->sets[0].data->keys || + memcmp(new->start, + v->sets[0].data->start, + (void *) end(new) - (void *) new->start)) { + unsigned i, j; + + console_lock(); + + printk(KERN_ERR "*** original memory node:\n"); + for (i = 0; i <= b->nsets; i++) + dump_bset(b, b->sets[i].data); + + printk(KERN_ERR "*** sorted memory node:\n"); + dump_bset(b, new); + + printk(KERN_ERR "*** on disk node:\n"); + dump_bset(v, v->sets[0].data); + + for (j = 0; j < new->keys; j++) + if (new->d[j] != v->sets[0].data->d[j]) + break; + + console_unlock(); + panic("verify failed at %u\n", j); + } + + mutex_unlock(&b->c->verify_lock); +} + +static void data_verify_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + closure_put(cl); +} + +void bch_data_verify(struct search *s) +{ + char name[BDEVNAME_SIZE]; + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + struct closure *cl = &s->cl; + struct bio *check; + struct bio_vec *bv; + int i; + + if (!s->unaligned_bvec) + bio_for_each_segment(bv, s->orig_bio, i) + bv->bv_offset = 0, bv->bv_len = PAGE_SIZE; + + check = bio_clone(s->orig_bio, GFP_NOIO); + if (!check) + return; + + if (bio_alloc_pages(check, GFP_NOIO)) + goto out_put; + + check->bi_rw = READ_SYNC; + check->bi_private = cl; + check->bi_end_io = data_verify_endio; + + closure_bio_submit(check, cl, &dc->disk); + closure_sync(cl); + + bio_for_each_segment(bv, s->orig_bio, i) { + void *p1 = kmap(bv->bv_page); + void *p2 = kmap(check->bi_io_vec[i].bv_page); + + if (memcmp(p1 + bv->bv_offset, + p2 + bv->bv_offset, + bv->bv_len)) + printk(KERN_ERR "bcache (%s): verify failed" + " at sector %llu\n", + bdevname(dc->bdev, name), + (uint64_t) s->orig_bio->bi_sector); + + kunmap(bv->bv_page); + kunmap(check->bi_io_vec[i].bv_page); + } + + __bio_for_each_segment(bv, check, i, 0) + __free_page(bv->bv_page); +out_put: + bio_put(check); +} + +#endif + +#ifdef CONFIG_BCACHE_EDEBUG + +unsigned bch_count_data(struct btree *b) +{ + unsigned ret = 0; + struct btree_iter iter; + struct bkey *k; + + if (!b->level) + for_each_key(b, k, &iter) + ret += KEY_SIZE(k); + return ret; +} + +static void vdump_bucket_and_panic(struct btree *b, const char *fmt, + va_list args) +{ + unsigned i; + + console_lock(); + + for (i = 0; i <= b->nsets; i++) + dump_bset(b, b->sets[i].data); + + vprintk(fmt, args); + + console_unlock(); + + panic("at %s\n", pbtree(b)); +} + +void bch_check_key_order_msg(struct btree *b, struct bset *i, + const char *fmt, ...) +{ + struct bkey *k; + + if (!i->keys) + return; + + for (k = i->start; bkey_next(k) < end(i); k = bkey_next(k)) + if (skipped_backwards(b, k)) { + va_list args; + va_start(args, fmt); + + vdump_bucket_and_panic(b, fmt, args); + va_end(args); + } +} + +void bch_check_keys(struct btree *b, const char *fmt, ...) +{ + va_list args; + struct bkey *k, *p = NULL; + struct btree_iter iter; + + if (b->level) + return; + + for_each_key(b, k, &iter) { + if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0) { + printk(KERN_ERR "Keys out of order:\n"); + goto bug; + } + + if (bch_ptr_invalid(b, k)) + continue; + + if (p && bkey_cmp(p, &START_KEY(k)) > 0) { + printk(KERN_ERR "Overlapping keys:\n"); + goto bug; + } + p = k; + } + return; +bug: + va_start(args, fmt); + vdump_bucket_and_panic(b, fmt, args); + va_end(args); +} + +#endif + +#ifdef CONFIG_DEBUG_FS + +/* XXX: cache set refcounting */ + +struct dump_iterator { + char buf[PAGE_SIZE]; + size_t bytes; + struct cache_set *c; + struct keybuf keys; +}; + +static bool dump_pred(struct keybuf *buf, struct bkey *k) +{ + return true; +} + +static ssize_t bch_dump_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iterator *i = file->private_data; + ssize_t ret = 0; + + while (size) { + struct keybuf_key *w; + unsigned bytes = min(i->bytes, size); + + int err = copy_to_user(buf, i->buf, bytes); + if (err) + return err; + + ret += bytes; + buf += bytes; + size -= bytes; + i->bytes -= bytes; + memmove(i->buf, i->buf + bytes, i->bytes); + + if (i->bytes) + break; + + w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY); + if (!w) + break; + + i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", pkey(&w->key)); + bch_keybuf_del(&i->keys, w); + } + + return ret; +} + +static int bch_dump_open(struct inode *inode, struct file *file) +{ + struct cache_set *c = inode->i_private; + struct dump_iterator *i; + + i = kzalloc(sizeof(struct dump_iterator), GFP_KERNEL); + if (!i) + return -ENOMEM; + + file->private_data = i; + i->c = c; + bch_keybuf_init(&i->keys, dump_pred); + i->keys.last_scanned = KEY(0, 0, 0); + + return 0; +} + +static int bch_dump_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static const struct file_operations cache_set_debug_ops = { + .owner = THIS_MODULE, + .open = bch_dump_open, + .read = bch_dump_read, + .release = bch_dump_release +}; + +void bch_debug_init_cache_set(struct cache_set *c) +{ + if (!IS_ERR_OR_NULL(debug)) { + char name[50]; + snprintf(name, 50, "bcache-%pU", c->sb.set_uuid); + + c->debug = debugfs_create_file(name, 0400, debug, c, + &cache_set_debug_ops); + } +} + +#endif + +#ifdef CONFIG_BCACHE_DEBUG +static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a, + const char *buffer, size_t size) +{ + void dump(struct btree *b) + { + struct bset *i; + + for (i = b->sets[0].data; + index(i, b) < btree_blocks(b) && + i->seq == b->sets[0].data->seq; + i = ((void *) i) + set_blocks(i, b->c) * block_bytes(b->c)) + dump_bset(b, i); + } + + struct cache_sb *sb; + struct cache_set *c; + struct btree *all[3], *b, *fill, *orig; + int j; + + struct btree_op op; + bch_btree_op_init_stack(&op); + + sb = kzalloc(sizeof(struct cache_sb), GFP_KERNEL); + if (!sb) + return -ENOMEM; + + sb->bucket_size = 128; + sb->block_size = 4; + + c = bch_cache_set_alloc(sb); + if (!c) + return -ENOMEM; + + for (j = 0; j < 3; j++) { + BUG_ON(list_empty(&c->btree_cache)); + all[j] = list_first_entry(&c->btree_cache, struct btree, list); + list_del_init(&all[j]->list); + + all[j]->key = KEY(0, 0, c->sb.bucket_size); + bkey_copy_key(&all[j]->key, &MAX_KEY); + } + + b = all[0]; + fill = all[1]; + orig = all[2]; + + while (1) { + for (j = 0; j < 3; j++) + all[j]->written = all[j]->nsets = 0; + + bch_bset_init_next(b); + + while (1) { + struct bset *i = write_block(b); + struct bkey *k = op.keys.top; + unsigned rand; + + bkey_init(k); + rand = get_random_int(); + + op.type = rand & 1 + ? BTREE_INSERT + : BTREE_REPLACE; + rand >>= 1; + + SET_KEY_SIZE(k, bucket_remainder(c, rand)); + rand >>= c->bucket_bits; + rand &= 1024 * 512 - 1; + rand += c->sb.bucket_size; + SET_KEY_OFFSET(k, rand); +#if 0 + SET_KEY_PTRS(k, 1); +#endif + bch_keylist_push(&op.keys); + bch_btree_insert_keys(b, &op); + + if (should_split(b) || + set_blocks(i, b->c) != + __set_blocks(i, i->keys + 15, b->c)) { + i->csum = csum_set(i); + + memcpy(write_block(fill), + i, set_bytes(i)); + + b->written += set_blocks(i, b->c); + fill->written = b->written; + if (b->written == btree_blocks(b)) + break; + + bch_btree_sort_lazy(b); + bch_bset_init_next(b); + } + } + + memcpy(orig->sets[0].data, + fill->sets[0].data, + btree_bytes(c)); + + bch_btree_sort(b); + fill->written = 0; + bch_btree_read_done(&fill->io.cl); + + if (b->sets[0].data->keys != fill->sets[0].data->keys || + memcmp(b->sets[0].data->start, + fill->sets[0].data->start, + b->sets[0].data->keys * sizeof(uint64_t))) { + struct bset *i = b->sets[0].data; + struct bkey *k, *l; + + for (k = i->start, + l = fill->sets[0].data->start; + k < end(i); + k = bkey_next(k), l = bkey_next(l)) + if (bkey_cmp(k, l) || + KEY_SIZE(k) != KEY_SIZE(l)) + pr_err("key %zi differs: %s " + "!= %s", (uint64_t *) k - i->d, + pkey(k), pkey(l)); + + for (j = 0; j < 3; j++) { + pr_err("**** Set %i ****", j); + dump(all[j]); + } + panic("\n"); + } + + pr_info("fuzz complete: %i keys", b->sets[0].data->keys); + } +} + +kobj_attribute_write(fuzz, btree_fuzz); +#endif + +void bch_debug_exit(void) +{ + if (!IS_ERR_OR_NULL(debug)) + debugfs_remove_recursive(debug); +} + +int __init bch_debug_init(struct kobject *kobj) +{ + int ret = 0; +#ifdef CONFIG_BCACHE_DEBUG + ret = sysfs_create_file(kobj, &ksysfs_fuzz.attr); + if (ret) + return ret; +#endif + + debug = debugfs_create_dir("bcache", NULL); + return ret; +} diff --git a/drivers/md/bcache/debug.h b/drivers/md/bcache/debug.h new file mode 100644 index 000000000000..f9378a218148 --- /dev/null +++ b/drivers/md/bcache/debug.h @@ -0,0 +1,54 @@ +#ifndef _BCACHE_DEBUG_H +#define _BCACHE_DEBUG_H + +/* Btree/bkey debug printing */ + +#define KEYHACK_SIZE 80 +struct keyprint_hack { + char s[KEYHACK_SIZE]; +}; + +struct keyprint_hack bch_pkey(const struct bkey *k); +struct keyprint_hack bch_pbtree(const struct btree *b); +#define pkey(k) (&bch_pkey(k).s[0]) +#define pbtree(b) (&bch_pbtree(b).s[0]) + +#ifdef CONFIG_BCACHE_EDEBUG + +unsigned bch_count_data(struct btree *); +void bch_check_key_order_msg(struct btree *, struct bset *, const char *, ...); +void bch_check_keys(struct btree *, const char *, ...); + +#define bch_check_key_order(b, i) \ + bch_check_key_order_msg(b, i, "keys out of order") +#define EBUG_ON(cond) BUG_ON(cond) + +#else /* EDEBUG */ + +#define bch_count_data(b) 0 +#define bch_check_key_order(b, i) do {} while (0) +#define bch_check_key_order_msg(b, i, ...) do {} while (0) +#define bch_check_keys(b, ...) do {} while (0) +#define EBUG_ON(cond) do {} while (0) + +#endif + +#ifdef CONFIG_BCACHE_DEBUG + +void bch_btree_verify(struct btree *, struct bset *); +void bch_data_verify(struct search *); + +#else /* DEBUG */ + +static inline void bch_btree_verify(struct btree *b, struct bset *i) {} +static inline void bch_data_verify(struct search *s) {}; + +#endif + +#ifdef CONFIG_DEBUG_FS +void bch_debug_init_cache_set(struct cache_set *); +#else +static inline void bch_debug_init_cache_set(struct cache_set *c) {} +#endif + +#endif diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c new file mode 100644 index 000000000000..f565512f6fac --- /dev/null +++ b/drivers/md/bcache/io.c @@ -0,0 +1,390 @@ +/* + * Some low level IO code, and hacks for various block layer limitations + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "bset.h" +#include "debug.h" + +static void bch_bi_idx_hack_endio(struct bio *bio, int error) +{ + struct bio *p = bio->bi_private; + + bio_endio(p, error); + bio_put(bio); +} + +static void bch_generic_make_request_hack(struct bio *bio) +{ + if (bio->bi_idx) { + struct bio *clone = bio_alloc(GFP_NOIO, bio_segments(bio)); + + memcpy(clone->bi_io_vec, + bio_iovec(bio), + bio_segments(bio) * sizeof(struct bio_vec)); + + clone->bi_sector = bio->bi_sector; + clone->bi_bdev = bio->bi_bdev; + clone->bi_rw = bio->bi_rw; + clone->bi_vcnt = bio_segments(bio); + clone->bi_size = bio->bi_size; + + clone->bi_private = bio; + clone->bi_end_io = bch_bi_idx_hack_endio; + + bio = clone; + } + + generic_make_request(bio); +} + +/** + * bch_bio_split - split a bio + * @bio: bio to split + * @sectors: number of sectors to split from the front of @bio + * @gfp: gfp mask + * @bs: bio set to allocate from + * + * Allocates and returns a new bio which represents @sectors from the start of + * @bio, and updates @bio to represent the remaining sectors. + * + * If bio_sectors(@bio) was less than or equal to @sectors, returns @bio + * unchanged. + * + * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a + * bvec boundry; it is the caller's responsibility to ensure that @bio is not + * freed before the split. + * + * If bch_bio_split() is running under generic_make_request(), it's not safe to + * allocate more than one bio from the same bio set. Therefore, if it is running + * under generic_make_request() it masks out __GFP_WAIT when doing the + * allocation. The caller must check for failure if there's any possibility of + * it being called from under generic_make_request(); it is then the caller's + * responsibility to retry from a safe context (by e.g. punting to workqueue). + */ +struct bio *bch_bio_split(struct bio *bio, int sectors, + gfp_t gfp, struct bio_set *bs) +{ + unsigned idx = bio->bi_idx, vcnt = 0, nbytes = sectors << 9; + struct bio_vec *bv; + struct bio *ret = NULL; + + BUG_ON(sectors <= 0); + + /* + * If we're being called from underneath generic_make_request() and we + * already allocated any bios from this bio set, we risk deadlock if we + * use the mempool. So instead, we possibly fail and let the caller punt + * to workqueue or somesuch and retry in a safe context. + */ + if (current->bio_list) + gfp &= ~__GFP_WAIT; + + if (sectors >= bio_sectors(bio)) + return bio; + + if (bio->bi_rw & REQ_DISCARD) { + ret = bio_alloc_bioset(gfp, 1, bs); + idx = 0; + goto out; + } + + bio_for_each_segment(bv, bio, idx) { + vcnt = idx - bio->bi_idx; + + if (!nbytes) { + ret = bio_alloc_bioset(gfp, vcnt, bs); + if (!ret) + return NULL; + + memcpy(ret->bi_io_vec, bio_iovec(bio), + sizeof(struct bio_vec) * vcnt); + + break; + } else if (nbytes < bv->bv_len) { + ret = bio_alloc_bioset(gfp, ++vcnt, bs); + if (!ret) + return NULL; + + memcpy(ret->bi_io_vec, bio_iovec(bio), + sizeof(struct bio_vec) * vcnt); + + ret->bi_io_vec[vcnt - 1].bv_len = nbytes; + bv->bv_offset += nbytes; + bv->bv_len -= nbytes; + break; + } + + nbytes -= bv->bv_len; + } +out: + ret->bi_bdev = bio->bi_bdev; + ret->bi_sector = bio->bi_sector; + ret->bi_size = sectors << 9; + ret->bi_rw = bio->bi_rw; + ret->bi_vcnt = vcnt; + ret->bi_max_vecs = vcnt; + + bio->bi_sector += sectors; + bio->bi_size -= sectors << 9; + bio->bi_idx = idx; + + if (bio_integrity(bio)) { + if (bio_integrity_clone(ret, bio, gfp)) { + bio_put(ret); + return NULL; + } + + bio_integrity_trim(ret, 0, bio_sectors(ret)); + bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio)); + } + + return ret; +} + +static unsigned bch_bio_max_sectors(struct bio *bio) +{ + unsigned ret = bio_sectors(bio); + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + struct bio_vec *bv, *end = bio_iovec(bio) + + min_t(int, bio_segments(bio), queue_max_segments(q)); + + struct bvec_merge_data bvm = { + .bi_bdev = bio->bi_bdev, + .bi_sector = bio->bi_sector, + .bi_size = 0, + .bi_rw = bio->bi_rw, + }; + + if (bio->bi_rw & REQ_DISCARD) + return min(ret, q->limits.max_discard_sectors); + + if (bio_segments(bio) > queue_max_segments(q) || + q->merge_bvec_fn) { + ret = 0; + + for (bv = bio_iovec(bio); bv < end; bv++) { + if (q->merge_bvec_fn && + q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len) + break; + + ret += bv->bv_len >> 9; + bvm.bi_size += bv->bv_len; + } + + if (ret >= (BIO_MAX_PAGES * PAGE_SIZE) >> 9) + return (BIO_MAX_PAGES * PAGE_SIZE) >> 9; + } + + ret = min(ret, queue_max_sectors(q)); + + WARN_ON(!ret); + ret = max_t(int, ret, bio_iovec(bio)->bv_len >> 9); + + return ret; +} + +static void bch_bio_submit_split_done(struct closure *cl) +{ + struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); + + s->bio->bi_end_io = s->bi_end_io; + s->bio->bi_private = s->bi_private; + bio_endio(s->bio, 0); + + closure_debug_destroy(&s->cl); + mempool_free(s, s->p->bio_split_hook); +} + +static void bch_bio_submit_split_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); + + if (error) + clear_bit(BIO_UPTODATE, &s->bio->bi_flags); + + bio_put(bio); + closure_put(cl); +} + +static void __bch_bio_submit_split(struct closure *cl) +{ + struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); + struct bio *bio = s->bio, *n; + + do { + n = bch_bio_split(bio, bch_bio_max_sectors(bio), + GFP_NOIO, s->p->bio_split); + if (!n) + continue_at(cl, __bch_bio_submit_split, system_wq); + + n->bi_end_io = bch_bio_submit_split_endio; + n->bi_private = cl; + + closure_get(cl); + bch_generic_make_request_hack(n); + } while (n != bio); + + continue_at(cl, bch_bio_submit_split_done, NULL); +} + +void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) +{ + struct bio_split_hook *s; + + if (!bio_has_data(bio) && !(bio->bi_rw & REQ_DISCARD)) + goto submit; + + if (bio_sectors(bio) <= bch_bio_max_sectors(bio)) + goto submit; + + s = mempool_alloc(p->bio_split_hook, GFP_NOIO); + + s->bio = bio; + s->p = p; + s->bi_end_io = bio->bi_end_io; + s->bi_private = bio->bi_private; + bio_get(bio); + + closure_call(&s->cl, __bch_bio_submit_split, NULL, NULL); + return; +submit: + bch_generic_make_request_hack(bio); +} + +/* Bios with headers */ + +void bch_bbio_free(struct bio *bio, struct cache_set *c) +{ + struct bbio *b = container_of(bio, struct bbio, bio); + mempool_free(b, c->bio_meta); +} + +struct bio *bch_bbio_alloc(struct cache_set *c) +{ + struct bbio *b = mempool_alloc(c->bio_meta, GFP_NOIO); + struct bio *bio = &b->bio; + + bio_init(bio); + bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET; + bio->bi_max_vecs = bucket_pages(c); + bio->bi_io_vec = bio->bi_inline_vecs; + + return bio; +} + +void __bch_submit_bbio(struct bio *bio, struct cache_set *c) +{ + struct bbio *b = container_of(bio, struct bbio, bio); + + bio->bi_sector = PTR_OFFSET(&b->key, 0); + bio->bi_bdev = PTR_CACHE(c, &b->key, 0)->bdev; + + b->submit_time_us = local_clock_us(); + closure_bio_submit(bio, bio->bi_private, PTR_CACHE(c, &b->key, 0)); +} + +void bch_submit_bbio(struct bio *bio, struct cache_set *c, + struct bkey *k, unsigned ptr) +{ + struct bbio *b = container_of(bio, struct bbio, bio); + bch_bkey_copy_single_ptr(&b->key, k, ptr); + __bch_submit_bbio(bio, c); +} + +/* IO errors */ + +void bch_count_io_errors(struct cache *ca, int error, const char *m) +{ + /* + * The halflife of an error is: + * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh + */ + + if (ca->set->error_decay) { + unsigned count = atomic_inc_return(&ca->io_count); + + while (count > ca->set->error_decay) { + unsigned errors; + unsigned old = count; + unsigned new = count - ca->set->error_decay; + + /* + * First we subtract refresh from count; each time we + * succesfully do so, we rescale the errors once: + */ + + count = atomic_cmpxchg(&ca->io_count, old, new); + + if (count == old) { + count = new; + + errors = atomic_read(&ca->io_errors); + do { + old = errors; + new = ((uint64_t) errors * 127) / 128; + errors = atomic_cmpxchg(&ca->io_errors, + old, new); + } while (old != errors); + } + } + } + + if (error) { + char buf[BDEVNAME_SIZE]; + unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT, + &ca->io_errors); + errors >>= IO_ERROR_SHIFT; + + if (errors < ca->set->error_limit) + pr_err("%s: IO error on %s, recovering", + bdevname(ca->bdev, buf), m); + else + bch_cache_set_error(ca->set, + "%s: too many IO errors %s", + bdevname(ca->bdev, buf), m); + } +} + +void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio, + int error, const char *m) +{ + struct bbio *b = container_of(bio, struct bbio, bio); + struct cache *ca = PTR_CACHE(c, &b->key, 0); + + unsigned threshold = bio->bi_rw & REQ_WRITE + ? c->congested_write_threshold_us + : c->congested_read_threshold_us; + + if (threshold) { + unsigned t = local_clock_us(); + + int us = t - b->submit_time_us; + int congested = atomic_read(&c->congested); + + if (us > (int) threshold) { + int ms = us / 1024; + c->congested_last_us = t; + + ms = min(ms, CONGESTED_MAX + congested); + atomic_sub(ms, &c->congested); + } else if (congested < 0) + atomic_inc(&c->congested); + } + + bch_count_io_errors(ca, error, m); +} + +void bch_bbio_endio(struct cache_set *c, struct bio *bio, + int error, const char *m) +{ + struct closure *cl = bio->bi_private; + + bch_bbio_count_io_errors(c, bio, error, m); + bio_put(bio); + closure_put(cl); +} diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c new file mode 100644 index 000000000000..c871ffaabbb0 --- /dev/null +++ b/drivers/md/bcache/journal.c @@ -0,0 +1,785 @@ +/* + * bcache journalling code, for btree insertions + * + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" +#include "request.h" + +/* + * Journal replay/recovery: + * + * This code is all driven from run_cache_set(); we first read the journal + * entries, do some other stuff, then we mark all the keys in the journal + * entries (same as garbage collection would), then we replay them - reinserting + * them into the cache in precisely the same order as they appear in the + * journal. + * + * We only journal keys that go in leaf nodes, which simplifies things quite a + * bit. + */ + +static void journal_read_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + closure_put(cl); +} + +static int journal_read_bucket(struct cache *ca, struct list_head *list, + struct btree_op *op, unsigned bucket_index) +{ + struct journal_device *ja = &ca->journal; + struct bio *bio = &ja->bio; + + struct journal_replay *i; + struct jset *j, *data = ca->set->journal.w[0].data; + unsigned len, left, offset = 0; + int ret = 0; + sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bucket_index]); + + pr_debug("reading %llu", (uint64_t) bucket); + + while (offset < ca->sb.bucket_size) { +reread: left = ca->sb.bucket_size - offset; + len = min_t(unsigned, left, PAGE_SECTORS * 8); + + bio_reset(bio); + bio->bi_sector = bucket + offset; + bio->bi_bdev = ca->bdev; + bio->bi_rw = READ; + bio->bi_size = len << 9; + + bio->bi_end_io = journal_read_endio; + bio->bi_private = &op->cl; + bio_map(bio, data); + + closure_bio_submit(bio, &op->cl, ca); + closure_sync(&op->cl); + + /* This function could be simpler now since we no longer write + * journal entries that overlap bucket boundaries; this means + * the start of a bucket will always have a valid journal entry + * if it has any journal entries at all. + */ + + j = data; + while (len) { + struct list_head *where; + size_t blocks, bytes = set_bytes(j); + + if (j->magic != jset_magic(ca->set)) + return ret; + + if (bytes > left << 9) + return ret; + + if (bytes > len << 9) + goto reread; + + if (j->csum != csum_set(j)) + return ret; + + blocks = set_blocks(j, ca->set); + + while (!list_empty(list)) { + i = list_first_entry(list, + struct journal_replay, list); + if (i->j.seq >= j->last_seq) + break; + list_del(&i->list); + kfree(i); + } + + list_for_each_entry_reverse(i, list, list) { + if (j->seq == i->j.seq) + goto next_set; + + if (j->seq < i->j.last_seq) + goto next_set; + + if (j->seq > i->j.seq) { + where = &i->list; + goto add; + } + } + + where = list; +add: + i = kmalloc(offsetof(struct journal_replay, j) + + bytes, GFP_KERNEL); + if (!i) + return -ENOMEM; + memcpy(&i->j, j, bytes); + list_add(&i->list, where); + ret = 1; + + ja->seq[bucket_index] = j->seq; +next_set: + offset += blocks * ca->sb.block_size; + len -= blocks * ca->sb.block_size; + j = ((void *) j) + blocks * block_bytes(ca); + } + } + + return ret; +} + +int bch_journal_read(struct cache_set *c, struct list_head *list, + struct btree_op *op) +{ +#define read_bucket(b) \ + ({ \ + int ret = journal_read_bucket(ca, list, op, b); \ + __set_bit(b, bitmap); \ + if (ret < 0) \ + return ret; \ + ret; \ + }) + + struct cache *ca; + unsigned iter; + + for_each_cache(ca, c, iter) { + struct journal_device *ja = &ca->journal; + unsigned long bitmap[SB_JOURNAL_BUCKETS / BITS_PER_LONG]; + unsigned i, l, r, m; + uint64_t seq; + + bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); + pr_debug("%u journal buckets", ca->sb.njournal_buckets); + + /* Read journal buckets ordered by golden ratio hash to quickly + * find a sequence of buckets with valid journal entries + */ + for (i = 0; i < ca->sb.njournal_buckets; i++) { + l = (i * 2654435769U) % ca->sb.njournal_buckets; + + if (test_bit(l, bitmap)) + break; + + if (read_bucket(l)) + goto bsearch; + } + + /* If that fails, check all the buckets we haven't checked + * already + */ + pr_debug("falling back to linear search"); + + for (l = 0; l < ca->sb.njournal_buckets; l++) { + if (test_bit(l, bitmap)) + continue; + + if (read_bucket(l)) + goto bsearch; + } +bsearch: + /* Binary search */ + m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); + pr_debug("starting binary search, l %u r %u", l, r); + + while (l + 1 < r) { + m = (l + r) >> 1; + + if (read_bucket(m)) + l = m; + else + r = m; + } + + /* Read buckets in reverse order until we stop finding more + * journal entries + */ + pr_debug("finishing up"); + l = m; + + while (1) { + if (!l--) + l = ca->sb.njournal_buckets - 1; + + if (l == m) + break; + + if (test_bit(l, bitmap)) + continue; + + if (!read_bucket(l)) + break; + } + + seq = 0; + + for (i = 0; i < ca->sb.njournal_buckets; i++) + if (ja->seq[i] > seq) { + seq = ja->seq[i]; + ja->cur_idx = ja->discard_idx = + ja->last_idx = i; + + } + } + + c->journal.seq = list_entry(list->prev, + struct journal_replay, + list)->j.seq; + + return 0; +#undef read_bucket +} + +void bch_journal_mark(struct cache_set *c, struct list_head *list) +{ + atomic_t p = { 0 }; + struct bkey *k; + struct journal_replay *i; + struct journal *j = &c->journal; + uint64_t last = j->seq; + + /* + * journal.pin should never fill up - we never write a journal + * entry when it would fill up. But if for some reason it does, we + * iterate over the list in reverse order so that we can just skip that + * refcount instead of bugging. + */ + + list_for_each_entry_reverse(i, list, list) { + BUG_ON(last < i->j.seq); + i->pin = NULL; + + while (last-- != i->j.seq) + if (fifo_free(&j->pin) > 1) { + fifo_push_front(&j->pin, p); + atomic_set(&fifo_front(&j->pin), 0); + } + + if (fifo_free(&j->pin) > 1) { + fifo_push_front(&j->pin, p); + i->pin = &fifo_front(&j->pin); + atomic_set(i->pin, 1); + } + + for (k = i->j.start; + k < end(&i->j); + k = bkey_next(k)) { + unsigned j; + + for (j = 0; j < KEY_PTRS(k); j++) { + struct bucket *g = PTR_BUCKET(c, k, j); + atomic_inc(&g->pin); + + if (g->prio == BTREE_PRIO && + !ptr_stale(c, k, j)) + g->prio = INITIAL_PRIO; + } + + __bch_btree_mark_key(c, 0, k); + } + } +} + +int bch_journal_replay(struct cache_set *s, struct list_head *list, + struct btree_op *op) +{ + int ret = 0, keys = 0, entries = 0; + struct bkey *k; + struct journal_replay *i = + list_entry(list->prev, struct journal_replay, list); + + uint64_t start = i->j.last_seq, end = i->j.seq, n = start; + + list_for_each_entry(i, list, list) { + BUG_ON(i->pin && atomic_read(i->pin) != 1); + + if (n != i->j.seq) + pr_err("journal entries %llu-%llu " + "missing! (replaying %llu-%llu)\n", + n, i->j.seq - 1, start, end); + + for (k = i->j.start; + k < end(&i->j); + k = bkey_next(k)) { + pr_debug("%s", pkey(k)); + bkey_copy(op->keys.top, k); + bch_keylist_push(&op->keys); + + op->journal = i->pin; + atomic_inc(op->journal); + + ret = bch_btree_insert(op, s); + if (ret) + goto err; + + BUG_ON(!bch_keylist_empty(&op->keys)); + keys++; + + cond_resched(); + } + + if (i->pin) + atomic_dec(i->pin); + n = i->j.seq + 1; + entries++; + } + + pr_info("journal replay done, %i keys in %i entries, seq %llu", + keys, entries, end); + + while (!list_empty(list)) { + i = list_first_entry(list, struct journal_replay, list); + list_del(&i->list); + kfree(i); + } +err: + closure_sync(&op->cl); + return ret; +} + +/* Journalling */ + +static void btree_flush_write(struct cache_set *c) +{ + /* + * Try to find the btree node with that references the oldest journal + * entry, best is our current candidate and is locked if non NULL: + */ + struct btree *b, *best = NULL; + unsigned iter; + + for_each_cached_btree(b, c, iter) { + if (!down_write_trylock(&b->lock)) + continue; + + if (!btree_node_dirty(b) || + !btree_current_write(b)->journal) { + rw_unlock(true, b); + continue; + } + + if (!best) + best = b; + else if (journal_pin_cmp(c, + btree_current_write(best), + btree_current_write(b))) { + rw_unlock(true, best); + best = b; + } else + rw_unlock(true, b); + } + + if (best) + goto out; + + /* We can't find the best btree node, just pick the first */ + list_for_each_entry(b, &c->btree_cache, list) + if (!b->level && btree_node_dirty(b)) { + best = b; + rw_lock(true, best, best->level); + goto found; + } + +out: + if (!best) + return; +found: + if (btree_node_dirty(best)) + bch_btree_write(best, true, NULL); + rw_unlock(true, best); +} + +#define last_seq(j) ((j)->seq - fifo_used(&(j)->pin) + 1) + +static void journal_discard_endio(struct bio *bio, int error) +{ + struct journal_device *ja = + container_of(bio, struct journal_device, discard_bio); + struct cache *ca = container_of(ja, struct cache, journal); + + atomic_set(&ja->discard_in_flight, DISCARD_DONE); + + closure_wake_up(&ca->set->journal.wait); + closure_put(&ca->set->cl); +} + +static void journal_discard_work(struct work_struct *work) +{ + struct journal_device *ja = + container_of(work, struct journal_device, discard_work); + + submit_bio(0, &ja->discard_bio); +} + +static void do_journal_discard(struct cache *ca) +{ + struct journal_device *ja = &ca->journal; + struct bio *bio = &ja->discard_bio; + + if (!ca->discard) { + ja->discard_idx = ja->last_idx; + return; + } + + switch (atomic_read(&ja->discard_in_flight) == DISCARD_IN_FLIGHT) { + case DISCARD_IN_FLIGHT: + return; + + case DISCARD_DONE: + ja->discard_idx = (ja->discard_idx + 1) % + ca->sb.njournal_buckets; + + atomic_set(&ja->discard_in_flight, DISCARD_READY); + /* fallthrough */ + + case DISCARD_READY: + if (ja->discard_idx == ja->last_idx) + return; + + atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT); + + bio_init(bio); + bio->bi_sector = bucket_to_sector(ca->set, + ca->sb.d[ja->discard_idx]); + bio->bi_bdev = ca->bdev; + bio->bi_rw = REQ_WRITE|REQ_DISCARD; + bio->bi_max_vecs = 1; + bio->bi_io_vec = bio->bi_inline_vecs; + bio->bi_size = bucket_bytes(ca); + bio->bi_end_io = journal_discard_endio; + + closure_get(&ca->set->cl); + INIT_WORK(&ja->discard_work, journal_discard_work); + schedule_work(&ja->discard_work); + } +} + +static void journal_reclaim(struct cache_set *c) +{ + struct bkey *k = &c->journal.key; + struct cache *ca; + uint64_t last_seq; + unsigned iter, n = 0; + atomic_t p; + + while (!atomic_read(&fifo_front(&c->journal.pin))) + fifo_pop(&c->journal.pin, p); + + last_seq = last_seq(&c->journal); + + /* Update last_idx */ + + for_each_cache(ca, c, iter) { + struct journal_device *ja = &ca->journal; + + while (ja->last_idx != ja->cur_idx && + ja->seq[ja->last_idx] < last_seq) + ja->last_idx = (ja->last_idx + 1) % + ca->sb.njournal_buckets; + } + + for_each_cache(ca, c, iter) + do_journal_discard(ca); + + if (c->journal.blocks_free) + return; + + /* + * Allocate: + * XXX: Sort by free journal space + */ + + for_each_cache(ca, c, iter) { + struct journal_device *ja = &ca->journal; + unsigned next = (ja->cur_idx + 1) % ca->sb.njournal_buckets; + + /* No space available on this device */ + if (next == ja->discard_idx) + continue; + + ja->cur_idx = next; + k->ptr[n++] = PTR(0, + bucket_to_sector(c, ca->sb.d[ja->cur_idx]), + ca->sb.nr_this_dev); + } + + bkey_init(k); + SET_KEY_PTRS(k, n); + + if (n) + c->journal.blocks_free = c->sb.bucket_size >> c->block_bits; + + if (!journal_full(&c->journal)) + __closure_wake_up(&c->journal.wait); +} + +void bch_journal_next(struct journal *j) +{ + atomic_t p = { 1 }; + + j->cur = (j->cur == j->w) + ? &j->w[1] + : &j->w[0]; + + /* + * The fifo_push() needs to happen at the same time as j->seq is + * incremented for last_seq() to be calculated correctly + */ + BUG_ON(!fifo_push(&j->pin, p)); + atomic_set(&fifo_back(&j->pin), 1); + + j->cur->data->seq = ++j->seq; + j->cur->need_write = false; + j->cur->data->keys = 0; + + if (fifo_full(&j->pin)) + pr_debug("journal_pin full (%zu)", fifo_used(&j->pin)); +} + +static void journal_write_endio(struct bio *bio, int error) +{ + struct journal_write *w = bio->bi_private; + + cache_set_err_on(error, w->c, "journal io error"); + closure_put(&w->c->journal.io.cl); +} + +static void journal_write(struct closure *); + +static void journal_write_done(struct closure *cl) +{ + struct journal *j = container_of(cl, struct journal, io.cl); + struct cache_set *c = container_of(j, struct cache_set, journal); + + struct journal_write *w = (j->cur == j->w) + ? &j->w[1] + : &j->w[0]; + + __closure_wake_up(&w->wait); + + if (c->journal_delay_ms) + closure_delay(&j->io, msecs_to_jiffies(c->journal_delay_ms)); + + continue_at(cl, journal_write, system_wq); +} + +static void journal_write_unlocked(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, journal.io.cl); + struct cache *ca; + struct journal_write *w = c->journal.cur; + struct bkey *k = &c->journal.key; + unsigned i, sectors = set_blocks(w->data, c) * c->sb.block_size; + + struct bio *bio; + struct bio_list list; + bio_list_init(&list); + + if (!w->need_write) { + /* + * XXX: have to unlock closure before we unlock journal lock, + * else we race with bch_journal(). But this way we race + * against cache set unregister. Doh. + */ + set_closure_fn(cl, NULL, NULL); + closure_sub(cl, CLOSURE_RUNNING + 1); + spin_unlock(&c->journal.lock); + return; + } else if (journal_full(&c->journal)) { + journal_reclaim(c); + spin_unlock(&c->journal.lock); + + btree_flush_write(c); + continue_at(cl, journal_write, system_wq); + } + + c->journal.blocks_free -= set_blocks(w->data, c); + + w->data->btree_level = c->root->level; + + bkey_copy(&w->data->btree_root, &c->root->key); + bkey_copy(&w->data->uuid_bucket, &c->uuid_bucket); + + for_each_cache(ca, c, i) + w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0]; + + w->data->magic = jset_magic(c); + w->data->version = BCACHE_JSET_VERSION; + w->data->last_seq = last_seq(&c->journal); + w->data->csum = csum_set(w->data); + + for (i = 0; i < KEY_PTRS(k); i++) { + ca = PTR_CACHE(c, k, i); + bio = &ca->journal.bio; + + atomic_long_add(sectors, &ca->meta_sectors_written); + + bio_reset(bio); + bio->bi_sector = PTR_OFFSET(k, i); + bio->bi_bdev = ca->bdev; + bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH; + bio->bi_size = sectors << 9; + + bio->bi_end_io = journal_write_endio; + bio->bi_private = w; + bio_map(bio, w->data); + + trace_bcache_journal_write(bio); + bio_list_add(&list, bio); + + SET_PTR_OFFSET(k, i, PTR_OFFSET(k, i) + sectors); + + ca->journal.seq[ca->journal.cur_idx] = w->data->seq; + } + + atomic_dec_bug(&fifo_back(&c->journal.pin)); + bch_journal_next(&c->journal); + journal_reclaim(c); + + spin_unlock(&c->journal.lock); + + while ((bio = bio_list_pop(&list))) + closure_bio_submit(bio, cl, c->cache[0]); + + continue_at(cl, journal_write_done, NULL); +} + +static void journal_write(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, journal.io.cl); + + spin_lock(&c->journal.lock); + journal_write_unlocked(cl); +} + +static void __journal_try_write(struct cache_set *c, bool noflush) +{ + struct closure *cl = &c->journal.io.cl; + + if (!closure_trylock(cl, &c->cl)) + spin_unlock(&c->journal.lock); + else if (noflush && journal_full(&c->journal)) { + spin_unlock(&c->journal.lock); + continue_at(cl, journal_write, system_wq); + } else + journal_write_unlocked(cl); +} + +#define journal_try_write(c) __journal_try_write(c, false) + +void bch_journal_meta(struct cache_set *c, struct closure *cl) +{ + struct journal_write *w; + + if (CACHE_SYNC(&c->sb)) { + spin_lock(&c->journal.lock); + + w = c->journal.cur; + w->need_write = true; + + if (cl) + BUG_ON(!closure_wait(&w->wait, cl)); + + __journal_try_write(c, true); + } +} + +/* + * Entry point to the journalling code - bio_insert() and btree_invalidate() + * pass bch_journal() a list of keys to be journalled, and then + * bch_journal() hands those same keys off to btree_insert_async() + */ + +void bch_journal(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + struct cache_set *c = op->c; + struct journal_write *w; + size_t b, n = ((uint64_t *) op->keys.top) - op->keys.list; + + if (op->type != BTREE_INSERT || + !CACHE_SYNC(&c->sb)) + goto out; + + /* + * If we're looping because we errored, might already be waiting on + * another journal write: + */ + while (atomic_read(&cl->parent->remaining) & CLOSURE_WAITING) + closure_sync(cl->parent); + + spin_lock(&c->journal.lock); + + if (journal_full(&c->journal)) { + /* XXX: tracepoint */ + closure_wait(&c->journal.wait, cl); + + journal_reclaim(c); + spin_unlock(&c->journal.lock); + + btree_flush_write(c); + continue_at(cl, bch_journal, bcache_wq); + } + + w = c->journal.cur; + w->need_write = true; + b = __set_blocks(w->data, w->data->keys + n, c); + + if (b * c->sb.block_size > PAGE_SECTORS << JSET_BITS || + b > c->journal.blocks_free) { + /* XXX: If we were inserting so many keys that they won't fit in + * an _empty_ journal write, we'll deadlock. For now, handle + * this in bch_keylist_realloc() - but something to think about. + */ + BUG_ON(!w->data->keys); + + /* XXX: tracepoint */ + BUG_ON(!closure_wait(&w->wait, cl)); + + closure_flush(&c->journal.io); + + journal_try_write(c); + continue_at(cl, bch_journal, bcache_wq); + } + + memcpy(end(w->data), op->keys.list, n * sizeof(uint64_t)); + w->data->keys += n; + + op->journal = &fifo_back(&c->journal.pin); + atomic_inc(op->journal); + + if (op->flush_journal) { + closure_flush(&c->journal.io); + closure_wait(&w->wait, cl->parent); + } + + journal_try_write(c); +out: + bch_btree_insert_async(cl); +} + +void bch_journal_free(struct cache_set *c) +{ + free_pages((unsigned long) c->journal.w[1].data, JSET_BITS); + free_pages((unsigned long) c->journal.w[0].data, JSET_BITS); + free_fifo(&c->journal.pin); +} + +int bch_journal_alloc(struct cache_set *c) +{ + struct journal *j = &c->journal; + + closure_init_unlocked(&j->io); + spin_lock_init(&j->lock); + + c->journal_delay_ms = 100; + + j->w[0].c = c; + j->w[1].c = c; + + if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) || + !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) || + !(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS))) + return -ENOMEM; + + return 0; +} diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h new file mode 100644 index 000000000000..3d7851274b04 --- /dev/null +++ b/drivers/md/bcache/journal.h @@ -0,0 +1,215 @@ +#ifndef _BCACHE_JOURNAL_H +#define _BCACHE_JOURNAL_H + +/* + * THE JOURNAL: + * + * The journal is treated as a circular buffer of buckets - a journal entry + * never spans two buckets. This means (not implemented yet) we can resize the + * journal at runtime, and will be needed for bcache on raw flash support. + * + * Journal entries contain a list of keys, ordered by the time they were + * inserted; thus journal replay just has to reinsert the keys. + * + * We also keep some things in the journal header that are logically part of the + * superblock - all the things that are frequently updated. This is for future + * bcache on raw flash support; the superblock (which will become another + * journal) can't be moved or wear leveled, so it contains just enough + * information to find the main journal, and the superblock only has to be + * rewritten when we want to move/wear level the main journal. + * + * Currently, we don't journal BTREE_REPLACE operations - this will hopefully be + * fixed eventually. This isn't a bug - BTREE_REPLACE is used for insertions + * from cache misses, which don't have to be journaled, and for writeback and + * moving gc we work around it by flushing the btree to disk before updating the + * gc information. But it is a potential issue with incremental garbage + * collection, and it's fragile. + * + * OPEN JOURNAL ENTRIES: + * + * Each journal entry contains, in the header, the sequence number of the last + * journal entry still open - i.e. that has keys that haven't been flushed to + * disk in the btree. + * + * We track this by maintaining a refcount for every open journal entry, in a + * fifo; each entry in the fifo corresponds to a particular journal + * entry/sequence number. When the refcount at the tail of the fifo goes to + * zero, we pop it off - thus, the size of the fifo tells us the number of open + * journal entries + * + * We take a refcount on a journal entry when we add some keys to a journal + * entry that we're going to insert (held by struct btree_op), and then when we + * insert those keys into the btree the btree write we're setting up takes a + * copy of that refcount (held by struct btree_write). That refcount is dropped + * when the btree write completes. + * + * A struct btree_write can only hold a refcount on a single journal entry, but + * might contain keys for many journal entries - we handle this by making sure + * it always has a refcount on the _oldest_ journal entry of all the journal + * entries it has keys for. + * + * JOURNAL RECLAIM: + * + * As mentioned previously, our fifo of refcounts tells us the number of open + * journal entries; from that and the current journal sequence number we compute + * last_seq - the oldest journal entry we still need. We write last_seq in each + * journal entry, and we also have to keep track of where it exists on disk so + * we don't overwrite it when we loop around the journal. + * + * To do that we track, for each journal bucket, the sequence number of the + * newest journal entry it contains - if we don't need that journal entry we + * don't need anything in that bucket anymore. From that we track the last + * journal bucket we still need; all this is tracked in struct journal_device + * and updated by journal_reclaim(). + * + * JOURNAL FILLING UP: + * + * There are two ways the journal could fill up; either we could run out of + * space to write to, or we could have too many open journal entries and run out + * of room in the fifo of refcounts. Since those refcounts are decremented + * without any locking we can't safely resize that fifo, so we handle it the + * same way. + * + * If the journal fills up, we start flushing dirty btree nodes until we can + * allocate space for a journal write again - preferentially flushing btree + * nodes that are pinning the oldest journal entries first. + */ + +#define BCACHE_JSET_VERSION_UUIDv1 1 +/* Always latest UUID format */ +#define BCACHE_JSET_VERSION_UUID 1 +#define BCACHE_JSET_VERSION 1 + +/* + * On disk format for a journal entry: + * seq is monotonically increasing; every journal entry has its own unique + * sequence number. + * + * last_seq is the oldest journal entry that still has keys the btree hasn't + * flushed to disk yet. + * + * version is for on disk format changes. + */ +struct jset { + uint64_t csum; + uint64_t magic; + uint64_t seq; + uint32_t version; + uint32_t keys; + + uint64_t last_seq; + + BKEY_PADDED(uuid_bucket); + BKEY_PADDED(btree_root); + uint16_t btree_level; + uint16_t pad[3]; + + uint64_t prio_bucket[MAX_CACHES_PER_SET]; + + union { + struct bkey start[0]; + uint64_t d[0]; + }; +}; + +/* + * Only used for holding the journal entries we read in btree_journal_read() + * during cache_registration + */ +struct journal_replay { + struct list_head list; + atomic_t *pin; + struct jset j; +}; + +/* + * We put two of these in struct journal; we used them for writes to the + * journal that are being staged or in flight. + */ +struct journal_write { + struct jset *data; +#define JSET_BITS 3 + + struct cache_set *c; + struct closure_waitlist wait; + bool need_write; +}; + +/* Embedded in struct cache_set */ +struct journal { + spinlock_t lock; + /* used when waiting because the journal was full */ + struct closure_waitlist wait; + struct closure_with_timer io; + + /* Number of blocks free in the bucket(s) we're currently writing to */ + unsigned blocks_free; + uint64_t seq; + DECLARE_FIFO(atomic_t, pin); + + BKEY_PADDED(key); + + struct journal_write w[2], *cur; +}; + +/* + * Embedded in struct cache. First three fields refer to the array of journal + * buckets, in cache_sb. + */ +struct journal_device { + /* + * For each journal bucket, contains the max sequence number of the + * journal writes it contains - so we know when a bucket can be reused. + */ + uint64_t seq[SB_JOURNAL_BUCKETS]; + + /* Journal bucket we're currently writing to */ + unsigned cur_idx; + + /* Last journal bucket that still contains an open journal entry */ + unsigned last_idx; + + /* Next journal bucket to be discarded */ + unsigned discard_idx; + +#define DISCARD_READY 0 +#define DISCARD_IN_FLIGHT 1 +#define DISCARD_DONE 2 + /* 1 - discard in flight, -1 - discard completed */ + atomic_t discard_in_flight; + + struct work_struct discard_work; + struct bio discard_bio; + struct bio_vec discard_bv; + + /* Bio for journal reads/writes to this device */ + struct bio bio; + struct bio_vec bv[8]; +}; + +#define journal_pin_cmp(c, l, r) \ + (fifo_idx(&(c)->journal.pin, (l)->journal) > \ + fifo_idx(&(c)->journal.pin, (r)->journal)) + +#define JOURNAL_PIN 20000 + +#define journal_full(j) \ + (!(j)->blocks_free || fifo_free(&(j)->pin) <= 1) + +struct closure; +struct cache_set; +struct btree_op; + +void bch_journal(struct closure *); +void bch_journal_next(struct journal *); +void bch_journal_mark(struct cache_set *, struct list_head *); +void bch_journal_meta(struct cache_set *, struct closure *); +int bch_journal_read(struct cache_set *, struct list_head *, + struct btree_op *); +int bch_journal_replay(struct cache_set *, struct list_head *, + struct btree_op *); + +void bch_journal_free(struct cache_set *); +int bch_journal_alloc(struct cache_set *); + +#endif /* _BCACHE_JOURNAL_H */ diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c new file mode 100644 index 000000000000..c69fc92b02cf --- /dev/null +++ b/drivers/md/bcache/movinggc.c @@ -0,0 +1,254 @@ +/* + * Moving/copying garbage collector + * + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" +#include "request.h" + +struct moving_io { + struct keybuf_key *w; + struct search s; + struct bbio bio; +}; + +static bool moving_pred(struct keybuf *buf, struct bkey *k) +{ + struct cache_set *c = container_of(buf, struct cache_set, + moving_gc_keys); + unsigned i; + + for (i = 0; i < KEY_PTRS(k); i++) { + struct cache *ca = PTR_CACHE(c, k, i); + struct bucket *g = PTR_BUCKET(c, k, i); + + if (GC_SECTORS_USED(g) < ca->gc_move_threshold) + return true; + } + + return false; +} + +/* Moving GC - IO loop */ + +static void moving_io_destructor(struct closure *cl) +{ + struct moving_io *io = container_of(cl, struct moving_io, s.cl); + kfree(io); +} + +static void write_moving_finish(struct closure *cl) +{ + struct moving_io *io = container_of(cl, struct moving_io, s.cl); + struct bio *bio = &io->bio.bio; + struct bio_vec *bv = bio_iovec_idx(bio, bio->bi_vcnt); + + while (bv-- != bio->bi_io_vec) + __free_page(bv->bv_page); + + pr_debug("%s %s", io->s.op.insert_collision + ? "collision moving" : "moved", + pkey(&io->w->key)); + + bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w); + + atomic_dec_bug(&io->s.op.c->in_flight); + closure_wake_up(&io->s.op.c->moving_gc_wait); + + closure_return_with_destructor(cl, moving_io_destructor); +} + +static void read_moving_endio(struct bio *bio, int error) +{ + struct moving_io *io = container_of(bio->bi_private, + struct moving_io, s.cl); + + if (error) + io->s.error = error; + + bch_bbio_endio(io->s.op.c, bio, error, "reading data to move"); +} + +static void moving_init(struct moving_io *io) +{ + struct bio *bio = &io->bio.bio; + + bio_init(bio); + bio_get(bio); + bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); + + bio->bi_size = KEY_SIZE(&io->w->key) << 9; + bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&io->w->key), + PAGE_SECTORS); + bio->bi_private = &io->s.cl; + bio->bi_io_vec = bio->bi_inline_vecs; + bio_map(bio, NULL); +} + +static void write_moving(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct moving_io *io = container_of(s, struct moving_io, s); + + if (!s->error) { + trace_bcache_write_moving(&io->bio.bio); + + moving_init(io); + + io->bio.bio.bi_sector = KEY_START(&io->w->key); + s->op.lock = -1; + s->op.write_prio = 1; + s->op.cache_bio = &io->bio.bio; + + s->writeback = KEY_DIRTY(&io->w->key); + s->op.csum = KEY_CSUM(&io->w->key); + + s->op.type = BTREE_REPLACE; + bkey_copy(&s->op.replace, &io->w->key); + + closure_init(&s->op.cl, cl); + bch_insert_data(&s->op.cl); + } + + continue_at(cl, write_moving_finish, NULL); +} + +static void read_moving_submit(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct moving_io *io = container_of(s, struct moving_io, s); + struct bio *bio = &io->bio.bio; + + trace_bcache_read_moving(bio); + bch_submit_bbio(bio, s->op.c, &io->w->key, 0); + + continue_at(cl, write_moving, bch_gc_wq); +} + +static void read_moving(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, moving_gc); + struct keybuf_key *w; + struct moving_io *io; + struct bio *bio; + + /* XXX: if we error, background writeback could stall indefinitely */ + + while (!test_bit(CACHE_SET_STOPPING, &c->flags)) { + w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, &MAX_KEY); + if (!w) + break; + + io = kzalloc(sizeof(struct moving_io) + sizeof(struct bio_vec) + * DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), + GFP_KERNEL); + if (!io) + goto err; + + w->private = io; + io->w = w; + io->s.op.inode = KEY_INODE(&w->key); + io->s.op.c = c; + + moving_init(io); + bio = &io->bio.bio; + + bio->bi_rw = READ; + bio->bi_end_io = read_moving_endio; + + if (bio_alloc_pages(bio, GFP_KERNEL)) + goto err; + + pr_debug("%s", pkey(&w->key)); + + closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl); + + if (atomic_inc_return(&c->in_flight) >= 64) { + closure_wait_event(&c->moving_gc_wait, cl, + atomic_read(&c->in_flight) < 64); + continue_at(cl, read_moving, bch_gc_wq); + } + } + + if (0) { +err: if (!IS_ERR_OR_NULL(w->private)) + kfree(w->private); + + bch_keybuf_del(&c->moving_gc_keys, w); + } + + closure_return(cl); +} + +void bch_moving_gc(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, gc.cl); + struct cache *ca; + struct bucket *b; + unsigned i; + + bool bucket_cmp(struct bucket *l, struct bucket *r) + { + return GC_SECTORS_USED(l) < GC_SECTORS_USED(r); + } + + unsigned top(struct cache *ca) + { + return GC_SECTORS_USED(heap_peek(&ca->heap)); + } + + if (!c->copy_gc_enabled) + closure_return(cl); + + mutex_lock(&c->bucket_lock); + + for_each_cache(ca, c, i) { + unsigned sectors_to_move = 0; + unsigned reserve_sectors = ca->sb.bucket_size * + min(fifo_used(&ca->free), ca->free.size / 2); + + ca->heap.used = 0; + + for_each_bucket(b, ca) { + if (!GC_SECTORS_USED(b)) + continue; + + if (!heap_full(&ca->heap)) { + sectors_to_move += GC_SECTORS_USED(b); + heap_add(&ca->heap, b, bucket_cmp); + } else if (bucket_cmp(b, heap_peek(&ca->heap))) { + sectors_to_move -= top(ca); + sectors_to_move += GC_SECTORS_USED(b); + + ca->heap.data[0] = b; + heap_sift(&ca->heap, 0, bucket_cmp); + } + } + + while (sectors_to_move > reserve_sectors) { + heap_pop(&ca->heap, b, bucket_cmp); + sectors_to_move -= GC_SECTORS_USED(b); + } + + ca->gc_move_threshold = top(ca); + + pr_debug("threshold %u", ca->gc_move_threshold); + } + + mutex_unlock(&c->bucket_lock); + + c->moving_gc_keys.last_scanned = ZERO_KEY; + + closure_init(&c->moving_gc, cl); + read_moving(&c->moving_gc); + + closure_return(cl); +} + +void bch_moving_init_cache_set(struct cache_set *c) +{ + bch_keybuf_init(&c->moving_gc_keys, moving_pred); +} diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c new file mode 100644 index 000000000000..4f552de49aaa --- /dev/null +++ b/drivers/md/bcache/request.c @@ -0,0 +1,1409 @@ +/* + * Main bcache entry point - handle a read or a write request and decide what to + * do with it; the make_request functions are called by the block layer. + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" +#include "request.h" + +#include +#include +#include +#include +#include "blk-cgroup.h" + +#include + +#define CUTOFF_CACHE_ADD 95 +#define CUTOFF_CACHE_READA 90 +#define CUTOFF_WRITEBACK 50 +#define CUTOFF_WRITEBACK_SYNC 75 + +struct kmem_cache *bch_search_cache; + +static void check_should_skip(struct cached_dev *, struct search *); + +/* Cgroup interface */ + +#ifdef CONFIG_CGROUP_BCACHE +static struct bch_cgroup bcache_default_cgroup = { .cache_mode = -1 }; + +static struct bch_cgroup *cgroup_to_bcache(struct cgroup *cgroup) +{ + struct cgroup_subsys_state *css; + return cgroup && + (css = cgroup_subsys_state(cgroup, bcache_subsys_id)) + ? container_of(css, struct bch_cgroup, css) + : &bcache_default_cgroup; +} + +struct bch_cgroup *bch_bio_to_cgroup(struct bio *bio) +{ + struct cgroup_subsys_state *css = bio->bi_css + ? cgroup_subsys_state(bio->bi_css->cgroup, bcache_subsys_id) + : task_subsys_state(current, bcache_subsys_id); + + return css + ? container_of(css, struct bch_cgroup, css) + : &bcache_default_cgroup; +} + +static ssize_t cache_mode_read(struct cgroup *cgrp, struct cftype *cft, + struct file *file, + char __user *buf, size_t nbytes, loff_t *ppos) +{ + char tmp[1024]; + int len = snprint_string_list(tmp, PAGE_SIZE, bch_cache_modes, + cgroup_to_bcache(cgrp)->cache_mode + 1); + + if (len < 0) + return len; + + return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); +} + +static int cache_mode_write(struct cgroup *cgrp, struct cftype *cft, + const char *buf) +{ + int v = read_string_list(buf, bch_cache_modes); + if (v < 0) + return v; + + cgroup_to_bcache(cgrp)->cache_mode = v - 1; + return 0; +} + +static u64 bch_verify_read(struct cgroup *cgrp, struct cftype *cft) +{ + return cgroup_to_bcache(cgrp)->verify; +} + +static int bch_verify_write(struct cgroup *cgrp, struct cftype *cft, u64 val) +{ + cgroup_to_bcache(cgrp)->verify = val; + return 0; +} + +static u64 bch_cache_hits_read(struct cgroup *cgrp, struct cftype *cft) +{ + struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); + return atomic_read(&bcachecg->stats.cache_hits); +} + +static u64 bch_cache_misses_read(struct cgroup *cgrp, struct cftype *cft) +{ + struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); + return atomic_read(&bcachecg->stats.cache_misses); +} + +static u64 bch_cache_bypass_hits_read(struct cgroup *cgrp, + struct cftype *cft) +{ + struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); + return atomic_read(&bcachecg->stats.cache_bypass_hits); +} + +static u64 bch_cache_bypass_misses_read(struct cgroup *cgrp, + struct cftype *cft) +{ + struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); + return atomic_read(&bcachecg->stats.cache_bypass_misses); +} + +static struct cftype bch_files[] = { + { + .name = "cache_mode", + .read = cache_mode_read, + .write_string = cache_mode_write, + }, + { + .name = "verify", + .read_u64 = bch_verify_read, + .write_u64 = bch_verify_write, + }, + { + .name = "cache_hits", + .read_u64 = bch_cache_hits_read, + }, + { + .name = "cache_misses", + .read_u64 = bch_cache_misses_read, + }, + { + .name = "cache_bypass_hits", + .read_u64 = bch_cache_bypass_hits_read, + }, + { + .name = "cache_bypass_misses", + .read_u64 = bch_cache_bypass_misses_read, + }, + { } /* terminate */ +}; + +static void init_bch_cgroup(struct bch_cgroup *cg) +{ + cg->cache_mode = -1; +} + +static struct cgroup_subsys_state *bcachecg_create(struct cgroup *cgroup) +{ + struct bch_cgroup *cg; + + cg = kzalloc(sizeof(*cg), GFP_KERNEL); + if (!cg) + return ERR_PTR(-ENOMEM); + init_bch_cgroup(cg); + return &cg->css; +} + +static void bcachecg_destroy(struct cgroup *cgroup) +{ + struct bch_cgroup *cg = cgroup_to_bcache(cgroup); + free_css_id(&bcache_subsys, &cg->css); + kfree(cg); +} + +struct cgroup_subsys bcache_subsys = { + .create = bcachecg_create, + .destroy = bcachecg_destroy, + .subsys_id = bcache_subsys_id, + .name = "bcache", + .module = THIS_MODULE, +}; +EXPORT_SYMBOL_GPL(bcache_subsys); +#endif + +static unsigned cache_mode(struct cached_dev *dc, struct bio *bio) +{ +#ifdef CONFIG_CGROUP_BCACHE + int r = bch_bio_to_cgroup(bio)->cache_mode; + if (r >= 0) + return r; +#endif + return BDEV_CACHE_MODE(&dc->sb); +} + +static bool verify(struct cached_dev *dc, struct bio *bio) +{ +#ifdef CONFIG_CGROUP_BCACHE + if (bch_bio_to_cgroup(bio)->verify) + return true; +#endif + return dc->verify; +} + +static void bio_csum(struct bio *bio, struct bkey *k) +{ + struct bio_vec *bv; + uint64_t csum = 0; + int i; + + bio_for_each_segment(bv, bio, i) { + void *d = kmap(bv->bv_page) + bv->bv_offset; + csum = crc64_update(csum, d, bv->bv_len); + kunmap(bv->bv_page); + } + + k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1); +} + +/* Insert data into cache */ + +static void bio_invalidate(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + struct bio *bio = op->cache_bio; + + pr_debug("invalidating %i sectors from %llu", + bio_sectors(bio), (uint64_t) bio->bi_sector); + + while (bio_sectors(bio)) { + unsigned len = min(bio_sectors(bio), 1U << 14); + + if (bch_keylist_realloc(&op->keys, 0, op->c)) + goto out; + + bio->bi_sector += len; + bio->bi_size -= len << 9; + + bch_keylist_add(&op->keys, + &KEY(op->inode, bio->bi_sector, len)); + } + + op->insert_data_done = true; + bio_put(bio); +out: + continue_at(cl, bch_journal, bcache_wq); +} + +struct open_bucket { + struct list_head list; + struct task_struct *last; + unsigned sectors_free; + BKEY_PADDED(key); +}; + +void bch_open_buckets_free(struct cache_set *c) +{ + struct open_bucket *b; + + while (!list_empty(&c->data_buckets)) { + b = list_first_entry(&c->data_buckets, + struct open_bucket, list); + list_del(&b->list); + kfree(b); + } +} + +int bch_open_buckets_alloc(struct cache_set *c) +{ + int i; + + spin_lock_init(&c->data_bucket_lock); + + for (i = 0; i < 6; i++) { + struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL); + if (!b) + return -ENOMEM; + + list_add(&b->list, &c->data_buckets); + } + + return 0; +} + +/* + * We keep multiple buckets open for writes, and try to segregate different + * write streams for better cache utilization: first we look for a bucket where + * the last write to it was sequential with the current write, and failing that + * we look for a bucket that was last used by the same task. + * + * The ideas is if you've got multiple tasks pulling data into the cache at the + * same time, you'll get better cache utilization if you try to segregate their + * data and preserve locality. + * + * For example, say you've starting Firefox at the same time you're copying a + * bunch of files. Firefox will likely end up being fairly hot and stay in the + * cache awhile, but the data you copied might not be; if you wrote all that + * data to the same buckets it'd get invalidated at the same time. + * + * Both of those tasks will be doing fairly random IO so we can't rely on + * detecting sequential IO to segregate their data, but going off of the task + * should be a sane heuristic. + */ +static struct open_bucket *pick_data_bucket(struct cache_set *c, + const struct bkey *search, + struct task_struct *task, + struct bkey *alloc) +{ + struct open_bucket *ret, *ret_task = NULL; + + list_for_each_entry_reverse(ret, &c->data_buckets, list) + if (!bkey_cmp(&ret->key, search)) + goto found; + else if (ret->last == task) + ret_task = ret; + + ret = ret_task ?: list_first_entry(&c->data_buckets, + struct open_bucket, list); +found: + if (!ret->sectors_free && KEY_PTRS(alloc)) { + ret->sectors_free = c->sb.bucket_size; + bkey_copy(&ret->key, alloc); + bkey_init(alloc); + } + + if (!ret->sectors_free) + ret = NULL; + + return ret; +} + +/* + * Allocates some space in the cache to write to, and k to point to the newly + * allocated space, and updates KEY_SIZE(k) and KEY_OFFSET(k) (to point to the + * end of the newly allocated space). + * + * May allocate fewer sectors than @sectors, KEY_SIZE(k) indicates how many + * sectors were actually allocated. + * + * If s->writeback is true, will not fail. + */ +static bool bch_alloc_sectors(struct bkey *k, unsigned sectors, + struct search *s) +{ + struct cache_set *c = s->op.c; + struct open_bucket *b; + BKEY_PADDED(key) alloc; + struct closure cl, *w = NULL; + unsigned i; + + if (s->writeback) { + closure_init_stack(&cl); + w = &cl; + } + + /* + * We might have to allocate a new bucket, which we can't do with a + * spinlock held. So if we have to allocate, we drop the lock, allocate + * and then retry. KEY_PTRS() indicates whether alloc points to + * allocated bucket(s). + */ + + bkey_init(&alloc.key); + spin_lock(&c->data_bucket_lock); + + while (!(b = pick_data_bucket(c, k, s->task, &alloc.key))) { + unsigned watermark = s->op.write_prio + ? WATERMARK_MOVINGGC + : WATERMARK_NONE; + + spin_unlock(&c->data_bucket_lock); + + if (bch_bucket_alloc_set(c, watermark, &alloc.key, 1, w)) + return false; + + spin_lock(&c->data_bucket_lock); + } + + /* + * If we had to allocate, we might race and not need to allocate the + * second time we call find_data_bucket(). If we allocated a bucket but + * didn't use it, drop the refcount bch_bucket_alloc_set() took: + */ + if (KEY_PTRS(&alloc.key)) + __bkey_put(c, &alloc.key); + + for (i = 0; i < KEY_PTRS(&b->key); i++) + EBUG_ON(ptr_stale(c, &b->key, i)); + + /* Set up the pointer to the space we're allocating: */ + + for (i = 0; i < KEY_PTRS(&b->key); i++) + k->ptr[i] = b->key.ptr[i]; + + sectors = min(sectors, b->sectors_free); + + SET_KEY_OFFSET(k, KEY_OFFSET(k) + sectors); + SET_KEY_SIZE(k, sectors); + SET_KEY_PTRS(k, KEY_PTRS(&b->key)); + + /* + * Move b to the end of the lru, and keep track of what this bucket was + * last used for: + */ + list_move_tail(&b->list, &c->data_buckets); + bkey_copy_key(&b->key, k); + b->last = s->task; + + b->sectors_free -= sectors; + + for (i = 0; i < KEY_PTRS(&b->key); i++) { + SET_PTR_OFFSET(&b->key, i, PTR_OFFSET(&b->key, i) + sectors); + + atomic_long_add(sectors, + &PTR_CACHE(c, &b->key, i)->sectors_written); + } + + if (b->sectors_free < c->sb.block_size) + b->sectors_free = 0; + + /* + * k takes refcounts on the buckets it points to until it's inserted + * into the btree, but if we're done with this bucket we just transfer + * get_data_bucket()'s refcount. + */ + if (b->sectors_free) + for (i = 0; i < KEY_PTRS(&b->key); i++) + atomic_inc(&PTR_BUCKET(c, &b->key, i)->pin); + + spin_unlock(&c->data_bucket_lock); + return true; +} + +static void bch_insert_data_error(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + + /* + * Our data write just errored, which means we've got a bunch of keys to + * insert that point to data that wasn't succesfully written. + * + * We don't have to insert those keys but we still have to invalidate + * that region of the cache - so, if we just strip off all the pointers + * from the keys we'll accomplish just that. + */ + + struct bkey *src = op->keys.bottom, *dst = op->keys.bottom; + + while (src != op->keys.top) { + struct bkey *n = bkey_next(src); + + SET_KEY_PTRS(src, 0); + bkey_copy(dst, src); + + dst = bkey_next(dst); + src = n; + } + + op->keys.top = dst; + + bch_journal(cl); +} + +static void bch_insert_data_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + struct btree_op *op = container_of(cl, struct btree_op, cl); + struct search *s = container_of(op, struct search, op); + + if (error) { + /* TODO: We could try to recover from this. */ + if (s->writeback) + s->error = error; + else if (s->write) + set_closure_fn(cl, bch_insert_data_error, bcache_wq); + else + set_closure_fn(cl, NULL, NULL); + } + + bch_bbio_endio(op->c, bio, error, "writing data to cache"); +} + +static void bch_insert_data_loop(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + struct search *s = container_of(op, struct search, op); + struct bio *bio = op->cache_bio, *n; + + if (op->skip) + return bio_invalidate(cl); + + if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) { + set_gc_sectors(op->c); + bch_queue_gc(op->c); + } + + do { + unsigned i; + struct bkey *k; + struct bio_set *split = s->d + ? s->d->bio_split : op->c->bio_split; + + /* 1 for the device pointer and 1 for the chksum */ + if (bch_keylist_realloc(&op->keys, + 1 + (op->csum ? 1 : 0), + op->c)) + continue_at(cl, bch_journal, bcache_wq); + + k = op->keys.top; + bkey_init(k); + SET_KEY_INODE(k, op->inode); + SET_KEY_OFFSET(k, bio->bi_sector); + + if (!bch_alloc_sectors(k, bio_sectors(bio), s)) + goto err; + + n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split); + if (!n) { + __bkey_put(op->c, k); + continue_at(cl, bch_insert_data_loop, bcache_wq); + } + + n->bi_end_io = bch_insert_data_endio; + n->bi_private = cl; + + if (s->writeback) { + SET_KEY_DIRTY(k, true); + + for (i = 0; i < KEY_PTRS(k); i++) + SET_GC_MARK(PTR_BUCKET(op->c, k, i), + GC_MARK_DIRTY); + } + + SET_KEY_CSUM(k, op->csum); + if (KEY_CSUM(k)) + bio_csum(n, k); + + pr_debug("%s", pkey(k)); + bch_keylist_push(&op->keys); + + trace_bcache_cache_insert(n, n->bi_sector, n->bi_bdev); + n->bi_rw |= REQ_WRITE; + bch_submit_bbio(n, op->c, k, 0); + } while (n != bio); + + op->insert_data_done = true; + continue_at(cl, bch_journal, bcache_wq); +err: + /* bch_alloc_sectors() blocks if s->writeback = true */ + BUG_ON(s->writeback); + + /* + * But if it's not a writeback write we'd rather just bail out if + * there aren't any buckets ready to write to - it might take awhile and + * we might be starving btree writes for gc or something. + */ + + if (s->write) { + /* + * Writethrough write: We can't complete the write until we've + * updated the index. But we don't want to delay the write while + * we wait for buckets to be freed up, so just invalidate the + * rest of the write. + */ + op->skip = true; + return bio_invalidate(cl); + } else { + /* + * From a cache miss, we can just insert the keys for the data + * we have written or bail out if we didn't do anything. + */ + op->insert_data_done = true; + bio_put(bio); + + if (!bch_keylist_empty(&op->keys)) + continue_at(cl, bch_journal, bcache_wq); + else + closure_return(cl); + } +} + +/** + * bch_insert_data - stick some data in the cache + * + * This is the starting point for any data to end up in a cache device; it could + * be from a normal write, or a writeback write, or a write to a flash only + * volume - it's also used by the moving garbage collector to compact data in + * mostly empty buckets. + * + * It first writes the data to the cache, creating a list of keys to be inserted + * (if the data had to be fragmented there will be multiple keys); after the + * data is written it calls bch_journal, and after the keys have been added to + * the next journal write they're inserted into the btree. + * + * It inserts the data in op->cache_bio; bi_sector is used for the key offset, + * and op->inode is used for the key inode. + * + * If op->skip is true, instead of inserting the data it invalidates the region + * of the cache represented by op->cache_bio and op->inode. + */ +void bch_insert_data(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + + bch_keylist_init(&op->keys); + bio_get(op->cache_bio); + bch_insert_data_loop(cl); +} + +void bch_btree_insert_async(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + struct search *s = container_of(op, struct search, op); + + if (bch_btree_insert(op, op->c)) { + s->error = -ENOMEM; + op->insert_data_done = true; + } + + if (op->insert_data_done) { + bch_keylist_free(&op->keys); + closure_return(cl); + } else + continue_at(cl, bch_insert_data_loop, bcache_wq); +} + +/* Common code for the make_request functions */ + +static void request_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + + if (error) { + struct search *s = container_of(cl, struct search, cl); + s->error = error; + /* Only cache read errors are recoverable */ + s->recoverable = false; + } + + bio_put(bio); + closure_put(cl); +} + +void bch_cache_read_endio(struct bio *bio, int error) +{ + struct bbio *b = container_of(bio, struct bbio, bio); + struct closure *cl = bio->bi_private; + struct search *s = container_of(cl, struct search, cl); + + /* + * If the bucket was reused while our bio was in flight, we might have + * read the wrong data. Set s->error but not error so it doesn't get + * counted against the cache device, but we'll still reread the data + * from the backing device. + */ + + if (error) + s->error = error; + else if (ptr_stale(s->op.c, &b->key, 0)) { + atomic_long_inc(&s->op.c->cache_read_races); + s->error = -EINTR; + } + + bch_bbio_endio(s->op.c, bio, error, "reading from cache"); +} + +static void bio_complete(struct search *s) +{ + if (s->orig_bio) { + int cpu, rw = bio_data_dir(s->orig_bio); + unsigned long duration = jiffies - s->start_time; + + cpu = part_stat_lock(); + part_round_stats(cpu, &s->d->disk->part0); + part_stat_add(cpu, &s->d->disk->part0, ticks[rw], duration); + part_stat_unlock(); + + trace_bcache_request_end(s, s->orig_bio); + bio_endio(s->orig_bio, s->error); + s->orig_bio = NULL; + } +} + +static void do_bio_hook(struct search *s) +{ + struct bio *bio = &s->bio.bio; + memcpy(bio, s->orig_bio, sizeof(struct bio)); + + bio->bi_end_io = request_endio; + bio->bi_private = &s->cl; + atomic_set(&bio->bi_cnt, 3); +} + +static void search_free(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + bio_complete(s); + + if (s->op.cache_bio) + bio_put(s->op.cache_bio); + + if (s->unaligned_bvec) + mempool_free(s->bio.bio.bi_io_vec, s->d->unaligned_bvec); + + closure_debug_destroy(cl); + mempool_free(s, s->d->c->search); +} + +static struct search *search_alloc(struct bio *bio, struct bcache_device *d) +{ + struct bio_vec *bv; + struct search *s = mempool_alloc(d->c->search, GFP_NOIO); + memset(s, 0, offsetof(struct search, op.keys)); + + __closure_init(&s->cl, NULL); + + s->op.inode = d->id; + s->op.c = d->c; + s->d = d; + s->op.lock = -1; + s->task = current; + s->orig_bio = bio; + s->write = (bio->bi_rw & REQ_WRITE) != 0; + s->op.flush_journal = (bio->bi_rw & REQ_FLUSH) != 0; + s->op.skip = (bio->bi_rw & REQ_DISCARD) != 0; + s->recoverable = 1; + s->start_time = jiffies; + do_bio_hook(s); + + if (bio->bi_size != bio_segments(bio) * PAGE_SIZE) { + bv = mempool_alloc(d->unaligned_bvec, GFP_NOIO); + memcpy(bv, bio_iovec(bio), + sizeof(struct bio_vec) * bio_segments(bio)); + + s->bio.bio.bi_io_vec = bv; + s->unaligned_bvec = 1; + } + + return s; +} + +static void btree_read_async(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + + int ret = btree_root(search_recurse, op->c, op); + + if (ret == -EAGAIN) + continue_at(cl, btree_read_async, bcache_wq); + + closure_return(cl); +} + +/* Cached devices */ + +static void cached_dev_bio_complete(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + + search_free(cl); + cached_dev_put(dc); +} + +/* Process reads */ + +static void cached_dev_read_complete(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + + if (s->op.insert_collision) + bch_mark_cache_miss_collision(s); + + if (s->op.cache_bio) { + int i; + struct bio_vec *bv; + + __bio_for_each_segment(bv, s->op.cache_bio, i, 0) + __free_page(bv->bv_page); + } + + cached_dev_bio_complete(cl); +} + +static void request_read_error(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct bio_vec *bv; + int i; + + if (s->recoverable) { + /* The cache read failed, but we can retry from the backing + * device. + */ + pr_debug("recovering at sector %llu", + (uint64_t) s->orig_bio->bi_sector); + + s->error = 0; + bv = s->bio.bio.bi_io_vec; + do_bio_hook(s); + s->bio.bio.bi_io_vec = bv; + + if (!s->unaligned_bvec) + bio_for_each_segment(bv, s->orig_bio, i) + bv->bv_offset = 0, bv->bv_len = PAGE_SIZE; + else + memcpy(s->bio.bio.bi_io_vec, + bio_iovec(s->orig_bio), + sizeof(struct bio_vec) * + bio_segments(s->orig_bio)); + + /* XXX: invalidate cache */ + + trace_bcache_read_retry(&s->bio.bio); + closure_bio_submit(&s->bio.bio, &s->cl, s->d); + } + + continue_at(cl, cached_dev_read_complete, NULL); +} + +static void request_read_done(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + + /* + * s->cache_bio != NULL implies that we had a cache miss; cache_bio now + * contains data ready to be inserted into the cache. + * + * First, we copy the data we just read from cache_bio's bounce buffers + * to the buffers the original bio pointed to: + */ + + if (s->op.cache_bio) { + struct bio_vec *src, *dst; + unsigned src_offset, dst_offset, bytes; + void *dst_ptr; + + bio_reset(s->op.cache_bio); + s->op.cache_bio->bi_sector = s->cache_miss->bi_sector; + s->op.cache_bio->bi_bdev = s->cache_miss->bi_bdev; + s->op.cache_bio->bi_size = s->cache_bio_sectors << 9; + bio_map(s->op.cache_bio, NULL); + + src = bio_iovec(s->op.cache_bio); + dst = bio_iovec(s->cache_miss); + src_offset = src->bv_offset; + dst_offset = dst->bv_offset; + dst_ptr = kmap(dst->bv_page); + + while (1) { + if (dst_offset == dst->bv_offset + dst->bv_len) { + kunmap(dst->bv_page); + dst++; + if (dst == bio_iovec_idx(s->cache_miss, + s->cache_miss->bi_vcnt)) + break; + + dst_offset = dst->bv_offset; + dst_ptr = kmap(dst->bv_page); + } + + if (src_offset == src->bv_offset + src->bv_len) { + src++; + if (src == bio_iovec_idx(s->op.cache_bio, + s->op.cache_bio->bi_vcnt)) + BUG(); + + src_offset = src->bv_offset; + } + + bytes = min(dst->bv_offset + dst->bv_len - dst_offset, + src->bv_offset + src->bv_len - src_offset); + + memcpy(dst_ptr + dst_offset, + page_address(src->bv_page) + src_offset, + bytes); + + src_offset += bytes; + dst_offset += bytes; + } + + bio_put(s->cache_miss); + s->cache_miss = NULL; + } + + if (verify(dc, &s->bio.bio) && s->recoverable) + bch_data_verify(s); + + bio_complete(s); + + if (s->op.cache_bio && + !test_bit(CACHE_SET_STOPPING, &s->op.c->flags)) { + s->op.type = BTREE_REPLACE; + closure_call(&s->op.cl, bch_insert_data, NULL, cl); + } + + continue_at(cl, cached_dev_read_complete, NULL); +} + +static void request_read_done_bh(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + + bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip); + + if (s->error) + continue_at_nobarrier(cl, request_read_error, bcache_wq); + else if (s->op.cache_bio || verify(dc, &s->bio.bio)) + continue_at_nobarrier(cl, request_read_done, bcache_wq); + else + continue_at_nobarrier(cl, cached_dev_read_complete, NULL); +} + +static int cached_dev_cache_miss(struct btree *b, struct search *s, + struct bio *bio, unsigned sectors) +{ + int ret = 0; + unsigned reada; + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + struct bio *miss; + + miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); + if (!miss) + return -EAGAIN; + + if (miss == bio) + s->op.lookup_done = true; + + miss->bi_end_io = request_endio; + miss->bi_private = &s->cl; + + if (s->cache_miss || s->op.skip) + goto out_submit; + + if (miss != bio || + (bio->bi_rw & REQ_RAHEAD) || + (bio->bi_rw & REQ_META) || + s->op.c->gc_stats.in_use >= CUTOFF_CACHE_READA) + reada = 0; + else { + reada = min(dc->readahead >> 9, + sectors - bio_sectors(miss)); + + if (bio_end(miss) + reada > bdev_sectors(miss->bi_bdev)) + reada = bdev_sectors(miss->bi_bdev) - bio_end(miss); + } + + s->cache_bio_sectors = bio_sectors(miss) + reada; + s->op.cache_bio = bio_alloc_bioset(GFP_NOWAIT, + DIV_ROUND_UP(s->cache_bio_sectors, PAGE_SECTORS), + dc->disk.bio_split); + + if (!s->op.cache_bio) + goto out_submit; + + s->op.cache_bio->bi_sector = miss->bi_sector; + s->op.cache_bio->bi_bdev = miss->bi_bdev; + s->op.cache_bio->bi_size = s->cache_bio_sectors << 9; + + s->op.cache_bio->bi_end_io = request_endio; + s->op.cache_bio->bi_private = &s->cl; + + /* btree_search_recurse()'s btree iterator is no good anymore */ + ret = -EINTR; + if (!bch_btree_insert_check_key(b, &s->op, s->op.cache_bio)) + goto out_put; + + bio_map(s->op.cache_bio, NULL); + if (bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO)) + goto out_put; + + s->cache_miss = miss; + bio_get(s->op.cache_bio); + + trace_bcache_cache_miss(s->orig_bio); + closure_bio_submit(s->op.cache_bio, &s->cl, s->d); + + return ret; +out_put: + bio_put(s->op.cache_bio); + s->op.cache_bio = NULL; +out_submit: + closure_bio_submit(miss, &s->cl, s->d); + return ret; +} + +static void request_read(struct cached_dev *dc, struct search *s) +{ + struct closure *cl = &s->cl; + + check_should_skip(dc, s); + closure_call(&s->op.cl, btree_read_async, NULL, cl); + + continue_at(cl, request_read_done_bh, NULL); +} + +/* Process writes */ + +static void cached_dev_write_complete(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + + up_read_non_owner(&dc->writeback_lock); + cached_dev_bio_complete(cl); +} + +static bool should_writeback(struct cached_dev *dc, struct bio *bio) +{ + unsigned threshold = (bio->bi_rw & REQ_SYNC) + ? CUTOFF_WRITEBACK_SYNC + : CUTOFF_WRITEBACK; + + return !atomic_read(&dc->disk.detaching) && + cache_mode(dc, bio) == CACHE_MODE_WRITEBACK && + dc->disk.c->gc_stats.in_use < threshold; +} + +static void request_write(struct cached_dev *dc, struct search *s) +{ + struct closure *cl = &s->cl; + struct bio *bio = &s->bio.bio; + struct bkey start, end; + start = KEY(dc->disk.id, bio->bi_sector, 0); + end = KEY(dc->disk.id, bio_end(bio), 0); + + bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, &start, &end); + + check_should_skip(dc, s); + down_read_non_owner(&dc->writeback_lock); + + if (bch_keybuf_check_overlapping(&dc->writeback_keys, &start, &end)) { + s->op.skip = false; + s->writeback = true; + } + + if (bio->bi_rw & REQ_DISCARD) + goto skip; + + if (s->op.skip) + goto skip; + + if (should_writeback(dc, s->orig_bio)) + s->writeback = true; + + if (!s->writeback) { + s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, + dc->disk.bio_split); + + trace_bcache_writethrough(s->orig_bio); + closure_bio_submit(bio, cl, s->d); + } else { + s->op.cache_bio = bio; + trace_bcache_writeback(s->orig_bio); + bch_writeback_add(dc, bio_sectors(bio)); + } +out: + closure_call(&s->op.cl, bch_insert_data, NULL, cl); + continue_at(cl, cached_dev_write_complete, NULL); +skip: + s->op.skip = true; + s->op.cache_bio = s->orig_bio; + bio_get(s->op.cache_bio); + trace_bcache_write_skip(s->orig_bio); + + if ((bio->bi_rw & REQ_DISCARD) && + !blk_queue_discard(bdev_get_queue(dc->bdev))) + goto out; + + closure_bio_submit(bio, cl, s->d); + goto out; +} + +static void request_nodata(struct cached_dev *dc, struct search *s) +{ + struct closure *cl = &s->cl; + struct bio *bio = &s->bio.bio; + + if (bio->bi_rw & REQ_DISCARD) { + request_write(dc, s); + return; + } + + if (s->op.flush_journal) + bch_journal_meta(s->op.c, cl); + + closure_bio_submit(bio, cl, s->d); + + continue_at(cl, cached_dev_bio_complete, NULL); +} + +/* Cached devices - read & write stuff */ + +int bch_get_congested(struct cache_set *c) +{ + int i; + + if (!c->congested_read_threshold_us && + !c->congested_write_threshold_us) + return 0; + + i = (local_clock_us() - c->congested_last_us) / 1024; + if (i < 0) + return 0; + + i += atomic_read(&c->congested); + if (i >= 0) + return 0; + + i += CONGESTED_MAX; + + return i <= 0 ? 1 : fract_exp_two(i, 6); +} + +static void add_sequential(struct task_struct *t) +{ + ewma_add(t->sequential_io_avg, + t->sequential_io, 8, 0); + + t->sequential_io = 0; +} + +static void check_should_skip(struct cached_dev *dc, struct search *s) +{ + struct hlist_head *iohash(uint64_t k) + { return &dc->io_hash[hash_64(k, RECENT_IO_BITS)]; } + + struct cache_set *c = s->op.c; + struct bio *bio = &s->bio.bio; + + long rand; + int cutoff = bch_get_congested(c); + unsigned mode = cache_mode(dc, bio); + + if (atomic_read(&dc->disk.detaching) || + c->gc_stats.in_use > CUTOFF_CACHE_ADD || + (bio->bi_rw & REQ_DISCARD)) + goto skip; + + if (mode == CACHE_MODE_NONE || + (mode == CACHE_MODE_WRITEAROUND && + (bio->bi_rw & REQ_WRITE))) + goto skip; + + if (bio->bi_sector & (c->sb.block_size - 1) || + bio_sectors(bio) & (c->sb.block_size - 1)) { + pr_debug("skipping unaligned io"); + goto skip; + } + + if (!cutoff) { + cutoff = dc->sequential_cutoff >> 9; + + if (!cutoff) + goto rescale; + + if (mode == CACHE_MODE_WRITEBACK && + (bio->bi_rw & REQ_WRITE) && + (bio->bi_rw & REQ_SYNC)) + goto rescale; + } + + if (dc->sequential_merge) { + struct io *i; + + spin_lock(&dc->io_lock); + + hlist_for_each_entry(i, iohash(bio->bi_sector), hash) + if (i->last == bio->bi_sector && + time_before(jiffies, i->jiffies)) + goto found; + + i = list_first_entry(&dc->io_lru, struct io, lru); + + add_sequential(s->task); + i->sequential = 0; +found: + if (i->sequential + bio->bi_size > i->sequential) + i->sequential += bio->bi_size; + + i->last = bio_end(bio); + i->jiffies = jiffies + msecs_to_jiffies(5000); + s->task->sequential_io = i->sequential; + + hlist_del(&i->hash); + hlist_add_head(&i->hash, iohash(i->last)); + list_move_tail(&i->lru, &dc->io_lru); + + spin_unlock(&dc->io_lock); + } else { + s->task->sequential_io = bio->bi_size; + + add_sequential(s->task); + } + + rand = get_random_int(); + cutoff -= bitmap_weight(&rand, BITS_PER_LONG); + + if (cutoff <= (int) (max(s->task->sequential_io, + s->task->sequential_io_avg) >> 9)) + goto skip; + +rescale: + bch_rescale_priorities(c, bio_sectors(bio)); + return; +skip: + bch_mark_sectors_bypassed(s, bio_sectors(bio)); + s->op.skip = true; +} + +static void cached_dev_make_request(struct request_queue *q, struct bio *bio) +{ + struct search *s; + struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; + struct cached_dev *dc = container_of(d, struct cached_dev, disk); + int cpu, rw = bio_data_dir(bio); + + cpu = part_stat_lock(); + part_stat_inc(cpu, &d->disk->part0, ios[rw]); + part_stat_add(cpu, &d->disk->part0, sectors[rw], bio_sectors(bio)); + part_stat_unlock(); + + bio->bi_bdev = dc->bdev; + bio->bi_sector += BDEV_DATA_START; + + if (cached_dev_get(dc)) { + s = search_alloc(bio, d); + trace_bcache_request_start(s, bio); + + if (!bio_has_data(bio)) + request_nodata(dc, s); + else if (rw) + request_write(dc, s); + else + request_read(dc, s); + } else { + if ((bio->bi_rw & REQ_DISCARD) && + !blk_queue_discard(bdev_get_queue(dc->bdev))) + bio_endio(bio, 0); + else + bch_generic_make_request(bio, &d->bio_split_hook); + } +} + +static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct cached_dev *dc = container_of(d, struct cached_dev, disk); + return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg); +} + +static int cached_dev_congested(void *data, int bits) +{ + struct bcache_device *d = data; + struct cached_dev *dc = container_of(d, struct cached_dev, disk); + struct request_queue *q = bdev_get_queue(dc->bdev); + int ret = 0; + + if (bdi_congested(&q->backing_dev_info, bits)) + return 1; + + if (cached_dev_get(dc)) { + unsigned i; + struct cache *ca; + + for_each_cache(ca, d->c, i) { + q = bdev_get_queue(ca->bdev); + ret |= bdi_congested(&q->backing_dev_info, bits); + } + + cached_dev_put(dc); + } + + return ret; +} + +void bch_cached_dev_request_init(struct cached_dev *dc) +{ + struct gendisk *g = dc->disk.disk; + + g->queue->make_request_fn = cached_dev_make_request; + g->queue->backing_dev_info.congested_fn = cached_dev_congested; + dc->disk.cache_miss = cached_dev_cache_miss; + dc->disk.ioctl = cached_dev_ioctl; +} + +/* Flash backed devices */ + +static int flash_dev_cache_miss(struct btree *b, struct search *s, + struct bio *bio, unsigned sectors) +{ + /* Zero fill bio */ + + while (bio->bi_idx != bio->bi_vcnt) { + struct bio_vec *bv = bio_iovec(bio); + unsigned j = min(bv->bv_len >> 9, sectors); + + void *p = kmap(bv->bv_page); + memset(p + bv->bv_offset, 0, j << 9); + kunmap(bv->bv_page); + + bv->bv_len -= j << 9; + bv->bv_offset += j << 9; + + if (bv->bv_len) + return 0; + + bio->bi_sector += j; + bio->bi_size -= j << 9; + + bio->bi_idx++; + sectors -= j; + } + + s->op.lookup_done = true; + + return 0; +} + +static void flash_dev_make_request(struct request_queue *q, struct bio *bio) +{ + struct search *s; + struct closure *cl; + struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; + int cpu, rw = bio_data_dir(bio); + + cpu = part_stat_lock(); + part_stat_inc(cpu, &d->disk->part0, ios[rw]); + part_stat_add(cpu, &d->disk->part0, sectors[rw], bio_sectors(bio)); + part_stat_unlock(); + + s = search_alloc(bio, d); + cl = &s->cl; + bio = &s->bio.bio; + + trace_bcache_request_start(s, bio); + + if (bio_has_data(bio) && !rw) { + closure_call(&s->op.cl, btree_read_async, NULL, cl); + } else if (bio_has_data(bio) || s->op.skip) { + bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, + &KEY(d->id, bio->bi_sector, 0), + &KEY(d->id, bio_end(bio), 0)); + + s->writeback = true; + s->op.cache_bio = bio; + + closure_call(&s->op.cl, bch_insert_data, NULL, cl); + } else { + /* No data - probably a cache flush */ + if (s->op.flush_journal) + bch_journal_meta(s->op.c, cl); + } + + continue_at(cl, search_free, NULL); +} + +static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + return -ENOTTY; +} + +static int flash_dev_congested(void *data, int bits) +{ + struct bcache_device *d = data; + struct request_queue *q; + struct cache *ca; + unsigned i; + int ret = 0; + + for_each_cache(ca, d->c, i) { + q = bdev_get_queue(ca->bdev); + ret |= bdi_congested(&q->backing_dev_info, bits); + } + + return ret; +} + +void bch_flash_dev_request_init(struct bcache_device *d) +{ + struct gendisk *g = d->disk; + + g->queue->make_request_fn = flash_dev_make_request; + g->queue->backing_dev_info.congested_fn = flash_dev_congested; + d->cache_miss = flash_dev_cache_miss; + d->ioctl = flash_dev_ioctl; +} + +void bch_request_exit(void) +{ +#ifdef CONFIG_CGROUP_BCACHE + cgroup_unload_subsys(&bcache_subsys); +#endif + if (bch_search_cache) + kmem_cache_destroy(bch_search_cache); +} + +int __init bch_request_init(void) +{ + bch_search_cache = KMEM_CACHE(search, 0); + if (!bch_search_cache) + return -ENOMEM; + +#ifdef CONFIG_CGROUP_BCACHE + cgroup_load_subsys(&bcache_subsys); + init_bch_cgroup(&bcache_default_cgroup); + + cgroup_add_cftypes(&bcache_subsys, bch_files); +#endif + return 0; +} diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h new file mode 100644 index 000000000000..254d9ab5707c --- /dev/null +++ b/drivers/md/bcache/request.h @@ -0,0 +1,62 @@ +#ifndef _BCACHE_REQUEST_H_ +#define _BCACHE_REQUEST_H_ + +#include + +struct search { + /* Stack frame for bio_complete */ + struct closure cl; + + struct bcache_device *d; + struct task_struct *task; + + struct bbio bio; + struct bio *orig_bio; + struct bio *cache_miss; + unsigned cache_bio_sectors; + + unsigned recoverable:1; + unsigned unaligned_bvec:1; + + unsigned write:1; + unsigned writeback:1; + + /* IO error returned to s->bio */ + short error; + unsigned long start_time; + + /* Anything past op->keys won't get zeroed in do_bio_hook */ + struct btree_op op; +}; + +void bch_cache_read_endio(struct bio *, int); +int bch_get_congested(struct cache_set *); +void bch_insert_data(struct closure *cl); +void bch_btree_insert_async(struct closure *); +void bch_cache_read_endio(struct bio *, int); + +void bch_open_buckets_free(struct cache_set *); +int bch_open_buckets_alloc(struct cache_set *); + +void bch_cached_dev_request_init(struct cached_dev *dc); +void bch_flash_dev_request_init(struct bcache_device *d); + +extern struct kmem_cache *bch_search_cache, *bch_passthrough_cache; + +struct bch_cgroup { +#ifdef CONFIG_CGROUP_BCACHE + struct cgroup_subsys_state css; +#endif + /* + * We subtract one from the index into bch_cache_modes[], so that + * default == -1; this makes it so the rest match up with d->cache_mode, + * and we use d->cache_mode if cgrp->cache_mode < 0 + */ + short cache_mode; + bool verify; + struct cache_stat_collector stats; +}; + +struct bch_cgroup *bch_bio_to_cgroup(struct bio *bio); + +#endif /* _BCACHE_REQUEST_H_ */ diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c new file mode 100644 index 000000000000..bf6cf9518c89 --- /dev/null +++ b/drivers/md/bcache/stats.c @@ -0,0 +1,245 @@ +/* + * bcache stats code + * + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "stats.h" +#include "btree.h" +#include "request.h" +#include "sysfs.h" + +/* + * We keep absolute totals of various statistics, and addionally a set of three + * rolling averages. + * + * Every so often, a timer goes off and rescales the rolling averages. + * accounting_rescale[] is how many times the timer has to go off before we + * rescale each set of numbers; that gets us half lives of 5 minutes, one hour, + * and one day. + * + * accounting_delay is how often the timer goes off - 22 times in 5 minutes, + * and accounting_weight is what we use to rescale: + * + * pow(31 / 32, 22) ~= 1/2 + * + * So that we don't have to increment each set of numbers every time we (say) + * get a cache hit, we increment a single atomic_t in acc->collector, and when + * the rescale function runs it resets the atomic counter to 0 and adds its + * old value to each of the exported numbers. + * + * To reduce rounding error, the numbers in struct cache_stats are all + * stored left shifted by 16, and scaled back in the sysfs show() function. + */ + +static const unsigned DAY_RESCALE = 288; +static const unsigned HOUR_RESCALE = 12; +static const unsigned FIVE_MINUTE_RESCALE = 1; +static const unsigned accounting_delay = (HZ * 300) / 22; +static const unsigned accounting_weight = 32; + +/* sysfs reading/writing */ + +read_attribute(cache_hits); +read_attribute(cache_misses); +read_attribute(cache_bypass_hits); +read_attribute(cache_bypass_misses); +read_attribute(cache_hit_ratio); +read_attribute(cache_readaheads); +read_attribute(cache_miss_collisions); +read_attribute(bypassed); + +SHOW(bch_stats) +{ + struct cache_stats *s = + container_of(kobj, struct cache_stats, kobj); +#define var(stat) (s->stat >> 16) + var_print(cache_hits); + var_print(cache_misses); + var_print(cache_bypass_hits); + var_print(cache_bypass_misses); + + sysfs_print(cache_hit_ratio, + DIV_SAFE(var(cache_hits) * 100, + var(cache_hits) + var(cache_misses))); + + var_print(cache_readaheads); + var_print(cache_miss_collisions); + sysfs_hprint(bypassed, var(sectors_bypassed) << 9); +#undef var + return 0; +} + +STORE(bch_stats) +{ + return size; +} + +static void bch_stats_release(struct kobject *k) +{ +} + +static struct attribute *bch_stats_files[] = { + &sysfs_cache_hits, + &sysfs_cache_misses, + &sysfs_cache_bypass_hits, + &sysfs_cache_bypass_misses, + &sysfs_cache_hit_ratio, + &sysfs_cache_readaheads, + &sysfs_cache_miss_collisions, + &sysfs_bypassed, + NULL +}; +static KTYPE(bch_stats); + +static void scale_accounting(unsigned long data); + +void bch_cache_accounting_init(struct cache_accounting *acc, struct closure *parent) +{ + kobject_init(&acc->total.kobj, &bch_stats_ktype); + kobject_init(&acc->five_minute.kobj, &bch_stats_ktype); + kobject_init(&acc->hour.kobj, &bch_stats_ktype); + kobject_init(&acc->day.kobj, &bch_stats_ktype); + + closure_init(&acc->cl, parent); + init_timer(&acc->timer); + acc->timer.expires = jiffies + accounting_delay; + acc->timer.data = (unsigned long) acc; + acc->timer.function = scale_accounting; + add_timer(&acc->timer); +} + +int bch_cache_accounting_add_kobjs(struct cache_accounting *acc, + struct kobject *parent) +{ + int ret = kobject_add(&acc->total.kobj, parent, + "stats_total"); + ret = ret ?: kobject_add(&acc->five_minute.kobj, parent, + "stats_five_minute"); + ret = ret ?: kobject_add(&acc->hour.kobj, parent, + "stats_hour"); + ret = ret ?: kobject_add(&acc->day.kobj, parent, + "stats_day"); + return ret; +} + +void bch_cache_accounting_clear(struct cache_accounting *acc) +{ + memset(&acc->total.cache_hits, + 0, + sizeof(unsigned long) * 7); +} + +void bch_cache_accounting_destroy(struct cache_accounting *acc) +{ + kobject_put(&acc->total.kobj); + kobject_put(&acc->five_minute.kobj); + kobject_put(&acc->hour.kobj); + kobject_put(&acc->day.kobj); + + atomic_set(&acc->closing, 1); + if (del_timer_sync(&acc->timer)) + closure_return(&acc->cl); +} + +/* EWMA scaling */ + +static void scale_stat(unsigned long *stat) +{ + *stat = ewma_add(*stat, 0, accounting_weight, 0); +} + +static void scale_stats(struct cache_stats *stats, unsigned long rescale_at) +{ + if (++stats->rescale == rescale_at) { + stats->rescale = 0; + scale_stat(&stats->cache_hits); + scale_stat(&stats->cache_misses); + scale_stat(&stats->cache_bypass_hits); + scale_stat(&stats->cache_bypass_misses); + scale_stat(&stats->cache_readaheads); + scale_stat(&stats->cache_miss_collisions); + scale_stat(&stats->sectors_bypassed); + } +} + +static void scale_accounting(unsigned long data) +{ + struct cache_accounting *acc = (struct cache_accounting *) data; + +#define move_stat(name) do { \ + unsigned t = atomic_xchg(&acc->collector.name, 0); \ + t <<= 16; \ + acc->five_minute.name += t; \ + acc->hour.name += t; \ + acc->day.name += t; \ + acc->total.name += t; \ +} while (0) + + move_stat(cache_hits); + move_stat(cache_misses); + move_stat(cache_bypass_hits); + move_stat(cache_bypass_misses); + move_stat(cache_readaheads); + move_stat(cache_miss_collisions); + move_stat(sectors_bypassed); + + scale_stats(&acc->total, 0); + scale_stats(&acc->day, DAY_RESCALE); + scale_stats(&acc->hour, HOUR_RESCALE); + scale_stats(&acc->five_minute, FIVE_MINUTE_RESCALE); + + acc->timer.expires += accounting_delay; + + if (!atomic_read(&acc->closing)) + add_timer(&acc->timer); + else + closure_return(&acc->cl); +} + +static void mark_cache_stats(struct cache_stat_collector *stats, + bool hit, bool bypass) +{ + if (!bypass) + if (hit) + atomic_inc(&stats->cache_hits); + else + atomic_inc(&stats->cache_misses); + else + if (hit) + atomic_inc(&stats->cache_bypass_hits); + else + atomic_inc(&stats->cache_bypass_misses); +} + +void bch_mark_cache_accounting(struct search *s, bool hit, bool bypass) +{ + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + mark_cache_stats(&dc->accounting.collector, hit, bypass); + mark_cache_stats(&s->op.c->accounting.collector, hit, bypass); +#ifdef CONFIG_CGROUP_BCACHE + mark_cache_stats(&(bch_bio_to_cgroup(s->orig_bio)->stats), hit, bypass); +#endif +} + +void bch_mark_cache_readahead(struct search *s) +{ + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + atomic_inc(&dc->accounting.collector.cache_readaheads); + atomic_inc(&s->op.c->accounting.collector.cache_readaheads); +} + +void bch_mark_cache_miss_collision(struct search *s) +{ + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + atomic_inc(&dc->accounting.collector.cache_miss_collisions); + atomic_inc(&s->op.c->accounting.collector.cache_miss_collisions); +} + +void bch_mark_sectors_bypassed(struct search *s, int sectors) +{ + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + atomic_add(sectors, &dc->accounting.collector.sectors_bypassed); + atomic_add(sectors, &s->op.c->accounting.collector.sectors_bypassed); +} diff --git a/drivers/md/bcache/stats.h b/drivers/md/bcache/stats.h new file mode 100644 index 000000000000..c7c7a8fd29fe --- /dev/null +++ b/drivers/md/bcache/stats.h @@ -0,0 +1,58 @@ +#ifndef _BCACHE_STATS_H_ +#define _BCACHE_STATS_H_ + +struct cache_stat_collector { + atomic_t cache_hits; + atomic_t cache_misses; + atomic_t cache_bypass_hits; + atomic_t cache_bypass_misses; + atomic_t cache_readaheads; + atomic_t cache_miss_collisions; + atomic_t sectors_bypassed; +}; + +struct cache_stats { + struct kobject kobj; + + unsigned long cache_hits; + unsigned long cache_misses; + unsigned long cache_bypass_hits; + unsigned long cache_bypass_misses; + unsigned long cache_readaheads; + unsigned long cache_miss_collisions; + unsigned long sectors_bypassed; + + unsigned rescale; +}; + +struct cache_accounting { + struct closure cl; + struct timer_list timer; + atomic_t closing; + + struct cache_stat_collector collector; + + struct cache_stats total; + struct cache_stats five_minute; + struct cache_stats hour; + struct cache_stats day; +}; + +struct search; + +void bch_cache_accounting_init(struct cache_accounting *acc, + struct closure *parent); + +int bch_cache_accounting_add_kobjs(struct cache_accounting *acc, + struct kobject *parent); + +void bch_cache_accounting_clear(struct cache_accounting *acc); + +void bch_cache_accounting_destroy(struct cache_accounting *acc); + +void bch_mark_cache_accounting(struct search *s, bool hit, bool bypass); +void bch_mark_cache_readahead(struct search *s); +void bch_mark_cache_miss_collision(struct search *s); +void bch_mark_sectors_bypassed(struct search *s, int sectors); + +#endif /* _BCACHE_STATS_H_ */ diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c new file mode 100644 index 000000000000..31ef47f1f3b6 --- /dev/null +++ b/drivers/md/bcache/super.c @@ -0,0 +1,1941 @@ +/* + * bcache setup/teardown code, and some metadata io - read a superblock and + * figure out what to do with it. + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" +#include "request.h" + +#include +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Kent Overstreet "); + +static const char bcache_magic[] = { + 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, + 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 +}; + +static const char invalid_uuid[] = { + 0xa0, 0x3e, 0xf8, 0xed, 0x3e, 0xe1, 0xb8, 0x78, + 0xc8, 0x50, 0xfc, 0x5e, 0xcb, 0x16, 0xcd, 0x99 +}; + +/* Default is -1; we skip past it for struct cached_dev's cache mode */ +const char * const bch_cache_modes[] = { + "default", + "writethrough", + "writeback", + "writearound", + "none", + NULL +}; + +struct uuid_entry_v0 { + uint8_t uuid[16]; + uint8_t label[32]; + uint32_t first_reg; + uint32_t last_reg; + uint32_t invalidated; + uint32_t pad; +}; + +static struct kobject *bcache_kobj; +struct mutex bch_register_lock; +LIST_HEAD(bch_cache_sets); +static LIST_HEAD(uncached_devices); + +static int bcache_major, bcache_minor; +static wait_queue_head_t unregister_wait; +struct workqueue_struct *bcache_wq; + +#define BTREE_MAX_PAGES (256 * 1024 / PAGE_SIZE) + +static void bio_split_pool_free(struct bio_split_pool *p) +{ + if (p->bio_split) + bioset_free(p->bio_split); + +} + +static int bio_split_pool_init(struct bio_split_pool *p) +{ + p->bio_split = bioset_create(4, 0); + if (!p->bio_split) + return -ENOMEM; + + p->bio_split_hook = mempool_create_kmalloc_pool(4, + sizeof(struct bio_split_hook)); + if (!p->bio_split_hook) + return -ENOMEM; + + return 0; +} + +/* Superblock */ + +static const char *read_super(struct cache_sb *sb, struct block_device *bdev, + struct page **res) +{ + const char *err; + struct cache_sb *s; + struct buffer_head *bh = __bread(bdev, 1, SB_SIZE); + unsigned i; + + if (!bh) + return "IO error"; + + s = (struct cache_sb *) bh->b_data; + + sb->offset = le64_to_cpu(s->offset); + sb->version = le64_to_cpu(s->version); + + memcpy(sb->magic, s->magic, 16); + memcpy(sb->uuid, s->uuid, 16); + memcpy(sb->set_uuid, s->set_uuid, 16); + memcpy(sb->label, s->label, SB_LABEL_SIZE); + + sb->flags = le64_to_cpu(s->flags); + sb->seq = le64_to_cpu(s->seq); + + sb->nbuckets = le64_to_cpu(s->nbuckets); + sb->block_size = le16_to_cpu(s->block_size); + sb->bucket_size = le16_to_cpu(s->bucket_size); + + sb->nr_in_set = le16_to_cpu(s->nr_in_set); + sb->nr_this_dev = le16_to_cpu(s->nr_this_dev); + sb->last_mount = le32_to_cpu(s->last_mount); + + sb->first_bucket = le16_to_cpu(s->first_bucket); + sb->keys = le16_to_cpu(s->keys); + + for (i = 0; i < SB_JOURNAL_BUCKETS; i++) + sb->d[i] = le64_to_cpu(s->d[i]); + + pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u", + sb->version, sb->flags, sb->seq, sb->keys); + + err = "Not a bcache superblock"; + if (sb->offset != SB_SECTOR) + goto err; + + if (memcmp(sb->magic, bcache_magic, 16)) + goto err; + + err = "Too many journal buckets"; + if (sb->keys > SB_JOURNAL_BUCKETS) + goto err; + + err = "Bad checksum"; + if (s->csum != csum_set(s)) + goto err; + + err = "Bad UUID"; + if (is_zero(sb->uuid, 16)) + goto err; + + err = "Unsupported superblock version"; + if (sb->version > BCACHE_SB_VERSION) + goto err; + + err = "Bad block/bucket size"; + if (!is_power_of_2(sb->block_size) || sb->block_size > PAGE_SECTORS || + !is_power_of_2(sb->bucket_size) || sb->bucket_size < PAGE_SECTORS) + goto err; + + err = "Too many buckets"; + if (sb->nbuckets > LONG_MAX) + goto err; + + err = "Not enough buckets"; + if (sb->nbuckets < 1 << 7) + goto err; + + err = "Invalid superblock: device too small"; + if (get_capacity(bdev->bd_disk) < sb->bucket_size * sb->nbuckets) + goto err; + + if (sb->version == CACHE_BACKING_DEV) + goto out; + + err = "Bad UUID"; + if (is_zero(sb->set_uuid, 16)) + goto err; + + err = "Bad cache device number in set"; + if (!sb->nr_in_set || + sb->nr_in_set <= sb->nr_this_dev || + sb->nr_in_set > MAX_CACHES_PER_SET) + goto err; + + err = "Journal buckets not sequential"; + for (i = 0; i < sb->keys; i++) + if (sb->d[i] != sb->first_bucket + i) + goto err; + + err = "Too many journal buckets"; + if (sb->first_bucket + sb->keys > sb->nbuckets) + goto err; + + err = "Invalid superblock: first bucket comes before end of super"; + if (sb->first_bucket * sb->bucket_size < 16) + goto err; +out: + sb->last_mount = get_seconds(); + err = NULL; + + get_page(bh->b_page); + *res = bh->b_page; +err: + put_bh(bh); + return err; +} + +static void write_bdev_super_endio(struct bio *bio, int error) +{ + struct cached_dev *dc = bio->bi_private; + /* XXX: error checking */ + + closure_put(&dc->sb_write.cl); +} + +static void __write_super(struct cache_sb *sb, struct bio *bio) +{ + struct cache_sb *out = page_address(bio->bi_io_vec[0].bv_page); + unsigned i; + + bio->bi_sector = SB_SECTOR; + bio->bi_rw = REQ_SYNC|REQ_META; + bio->bi_size = SB_SIZE; + bio_map(bio, NULL); + + out->offset = cpu_to_le64(sb->offset); + out->version = cpu_to_le64(sb->version); + + memcpy(out->uuid, sb->uuid, 16); + memcpy(out->set_uuid, sb->set_uuid, 16); + memcpy(out->label, sb->label, SB_LABEL_SIZE); + + out->flags = cpu_to_le64(sb->flags); + out->seq = cpu_to_le64(sb->seq); + + out->last_mount = cpu_to_le32(sb->last_mount); + out->first_bucket = cpu_to_le16(sb->first_bucket); + out->keys = cpu_to_le16(sb->keys); + + for (i = 0; i < sb->keys; i++) + out->d[i] = cpu_to_le64(sb->d[i]); + + out->csum = csum_set(out); + + pr_debug("ver %llu, flags %llu, seq %llu", + sb->version, sb->flags, sb->seq); + + submit_bio(REQ_WRITE, bio); +} + +void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) +{ + struct closure *cl = &dc->sb_write.cl; + struct bio *bio = &dc->sb_bio; + + closure_lock(&dc->sb_write, parent); + + bio_reset(bio); + bio->bi_bdev = dc->bdev; + bio->bi_end_io = write_bdev_super_endio; + bio->bi_private = dc; + + closure_get(cl); + __write_super(&dc->sb, bio); + + closure_return(cl); +} + +static void write_super_endio(struct bio *bio, int error) +{ + struct cache *ca = bio->bi_private; + + bch_count_io_errors(ca, error, "writing superblock"); + closure_put(&ca->set->sb_write.cl); +} + +void bcache_write_super(struct cache_set *c) +{ + struct closure *cl = &c->sb_write.cl; + struct cache *ca; + unsigned i; + + closure_lock(&c->sb_write, &c->cl); + + c->sb.seq++; + + for_each_cache(ca, c, i) { + struct bio *bio = &ca->sb_bio; + + ca->sb.version = BCACHE_SB_VERSION; + ca->sb.seq = c->sb.seq; + ca->sb.last_mount = c->sb.last_mount; + + SET_CACHE_SYNC(&ca->sb, CACHE_SYNC(&c->sb)); + + bio_reset(bio); + bio->bi_bdev = ca->bdev; + bio->bi_end_io = write_super_endio; + bio->bi_private = ca; + + closure_get(cl); + __write_super(&ca->sb, bio); + } + + closure_return(cl); +} + +/* UUID io */ + +static void uuid_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + struct cache_set *c = container_of(cl, struct cache_set, uuid_write.cl); + + cache_set_err_on(error, c, "accessing uuids"); + bch_bbio_free(bio, c); + closure_put(cl); +} + +static void uuid_io(struct cache_set *c, unsigned long rw, + struct bkey *k, struct closure *parent) +{ + struct closure *cl = &c->uuid_write.cl; + struct uuid_entry *u; + unsigned i; + + BUG_ON(!parent); + closure_lock(&c->uuid_write, parent); + + for (i = 0; i < KEY_PTRS(k); i++) { + struct bio *bio = bch_bbio_alloc(c); + + bio->bi_rw = REQ_SYNC|REQ_META|rw; + bio->bi_size = KEY_SIZE(k) << 9; + + bio->bi_end_io = uuid_endio; + bio->bi_private = cl; + bio_map(bio, c->uuids); + + bch_submit_bbio(bio, c, k, i); + + if (!(rw & WRITE)) + break; + } + + pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", + pkey(&c->uuid_bucket)); + + for (u = c->uuids; u < c->uuids + c->nr_uuids; u++) + if (!is_zero(u->uuid, 16)) + pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u", + u - c->uuids, u->uuid, u->label, + u->first_reg, u->last_reg, u->invalidated); + + closure_return(cl); +} + +static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl) +{ + struct bkey *k = &j->uuid_bucket; + + if (__bch_ptr_invalid(c, 1, k)) + return "bad uuid pointer"; + + bkey_copy(&c->uuid_bucket, k); + uuid_io(c, READ_SYNC, k, cl); + + if (j->version < BCACHE_JSET_VERSION_UUIDv1) { + struct uuid_entry_v0 *u0 = (void *) c->uuids; + struct uuid_entry *u1 = (void *) c->uuids; + int i; + + closure_sync(cl); + + /* + * Since the new uuid entry is bigger than the old, we have to + * convert starting at the highest memory address and work down + * in order to do it in place + */ + + for (i = c->nr_uuids - 1; + i >= 0; + --i) { + memcpy(u1[i].uuid, u0[i].uuid, 16); + memcpy(u1[i].label, u0[i].label, 32); + + u1[i].first_reg = u0[i].first_reg; + u1[i].last_reg = u0[i].last_reg; + u1[i].invalidated = u0[i].invalidated; + + u1[i].flags = 0; + u1[i].sectors = 0; + } + } + + return NULL; +} + +static int __uuid_write(struct cache_set *c) +{ + BKEY_PADDED(key) k; + struct closure cl; + closure_init_stack(&cl); + + lockdep_assert_held(&bch_register_lock); + + if (bch_bucket_alloc_set(c, WATERMARK_METADATA, &k.key, 1, &cl)) + return 1; + + SET_KEY_SIZE(&k.key, c->sb.bucket_size); + uuid_io(c, REQ_WRITE, &k.key, &cl); + closure_sync(&cl); + + bkey_copy(&c->uuid_bucket, &k.key); + __bkey_put(c, &k.key); + return 0; +} + +int bch_uuid_write(struct cache_set *c) +{ + int ret = __uuid_write(c); + + if (!ret) + bch_journal_meta(c, NULL); + + return ret; +} + +static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid) +{ + struct uuid_entry *u; + + for (u = c->uuids; + u < c->uuids + c->nr_uuids; u++) + if (!memcmp(u->uuid, uuid, 16)) + return u; + + return NULL; +} + +static struct uuid_entry *uuid_find_empty(struct cache_set *c) +{ + static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + return uuid_find(c, zero_uuid); +} + +/* + * Bucket priorities/gens: + * + * For each bucket, we store on disk its + * 8 bit gen + * 16 bit priority + * + * See alloc.c for an explanation of the gen. The priority is used to implement + * lru (and in the future other) cache replacement policies; for most purposes + * it's just an opaque integer. + * + * The gens and the priorities don't have a whole lot to do with each other, and + * it's actually the gens that must be written out at specific times - it's no + * big deal if the priorities don't get written, if we lose them we just reuse + * buckets in suboptimal order. + * + * On disk they're stored in a packed array, and in as many buckets are required + * to fit them all. The buckets we use to store them form a list; the journal + * header points to the first bucket, the first bucket points to the second + * bucket, et cetera. + * + * This code is used by the allocation code; periodically (whenever it runs out + * of buckets to allocate from) the allocation code will invalidate some + * buckets, but it can't use those buckets until their new gens are safely on + * disk. + */ + +static void prio_endio(struct bio *bio, int error) +{ + struct cache *ca = bio->bi_private; + + cache_set_err_on(error, ca->set, "accessing priorities"); + bch_bbio_free(bio, ca->set); + closure_put(&ca->prio); +} + +static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw) +{ + struct closure *cl = &ca->prio; + struct bio *bio = bch_bbio_alloc(ca->set); + + closure_init_stack(cl); + + bio->bi_sector = bucket * ca->sb.bucket_size; + bio->bi_bdev = ca->bdev; + bio->bi_rw = REQ_SYNC|REQ_META|rw; + bio->bi_size = bucket_bytes(ca); + + bio->bi_end_io = prio_endio; + bio->bi_private = ca; + bio_map(bio, ca->disk_buckets); + + closure_bio_submit(bio, &ca->prio, ca); + closure_sync(cl); +} + +#define buckets_free(c) "free %zu, free_inc %zu, unused %zu", \ + fifo_used(&c->free), fifo_used(&c->free_inc), fifo_used(&c->unused) + +void bch_prio_write(struct cache *ca) +{ + int i; + struct bucket *b; + struct closure cl; + + closure_init_stack(&cl); + + lockdep_assert_held(&ca->set->bucket_lock); + + for (b = ca->buckets; + b < ca->buckets + ca->sb.nbuckets; b++) + b->disk_gen = b->gen; + + ca->disk_buckets->seq++; + + atomic_long_add(ca->sb.bucket_size * prio_buckets(ca), + &ca->meta_sectors_written); + + pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free), + fifo_used(&ca->free_inc), fifo_used(&ca->unused)); + blktrace_msg(ca, "Starting priorities: " buckets_free(ca)); + + for (i = prio_buckets(ca) - 1; i >= 0; --i) { + long bucket; + struct prio_set *p = ca->disk_buckets; + struct bucket_disk *d = p->data, *end = d + prios_per_bucket(ca); + + for (b = ca->buckets + i * prios_per_bucket(ca); + b < ca->buckets + ca->sb.nbuckets && d < end; + b++, d++) { + d->prio = cpu_to_le16(b->prio); + d->gen = b->gen; + } + + p->next_bucket = ca->prio_buckets[i + 1]; + p->magic = pset_magic(ca); + p->csum = crc64(&p->magic, bucket_bytes(ca) - 8); + + bucket = bch_bucket_alloc(ca, WATERMARK_PRIO, &cl); + BUG_ON(bucket == -1); + + mutex_unlock(&ca->set->bucket_lock); + prio_io(ca, bucket, REQ_WRITE); + mutex_lock(&ca->set->bucket_lock); + + ca->prio_buckets[i] = bucket; + atomic_dec_bug(&ca->buckets[bucket].pin); + } + + mutex_unlock(&ca->set->bucket_lock); + + bch_journal_meta(ca->set, &cl); + closure_sync(&cl); + + mutex_lock(&ca->set->bucket_lock); + + ca->need_save_prio = 0; + + /* + * Don't want the old priorities to get garbage collected until after we + * finish writing the new ones, and they're journalled + */ + for (i = 0; i < prio_buckets(ca); i++) + ca->prio_last_buckets[i] = ca->prio_buckets[i]; +} + +static void prio_read(struct cache *ca, uint64_t bucket) +{ + struct prio_set *p = ca->disk_buckets; + struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d; + struct bucket *b; + unsigned bucket_nr = 0; + + for (b = ca->buckets; + b < ca->buckets + ca->sb.nbuckets; + b++, d++) { + if (d == end) { + ca->prio_buckets[bucket_nr] = bucket; + ca->prio_last_buckets[bucket_nr] = bucket; + bucket_nr++; + + prio_io(ca, bucket, READ_SYNC); + + if (p->csum != crc64(&p->magic, bucket_bytes(ca) - 8)) + pr_warn("bad csum reading priorities"); + + if (p->magic != pset_magic(ca)) + pr_warn("bad magic reading priorities"); + + bucket = p->next_bucket; + d = p->data; + } + + b->prio = le16_to_cpu(d->prio); + b->gen = b->disk_gen = b->last_gc = b->gc_gen = d->gen; + } +} + +/* Bcache device */ + +static int open_dev(struct block_device *b, fmode_t mode) +{ + struct bcache_device *d = b->bd_disk->private_data; + if (atomic_read(&d->closing)) + return -ENXIO; + + closure_get(&d->cl); + return 0; +} + +static int release_dev(struct gendisk *b, fmode_t mode) +{ + struct bcache_device *d = b->private_data; + closure_put(&d->cl); + return 0; +} + +static int ioctl_dev(struct block_device *b, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct bcache_device *d = b->bd_disk->private_data; + return d->ioctl(d, mode, cmd, arg); +} + +static const struct block_device_operations bcache_ops = { + .open = open_dev, + .release = release_dev, + .ioctl = ioctl_dev, + .owner = THIS_MODULE, +}; + +void bcache_device_stop(struct bcache_device *d) +{ + if (!atomic_xchg(&d->closing, 1)) + closure_queue(&d->cl); +} + +static void bcache_device_detach(struct bcache_device *d) +{ + lockdep_assert_held(&bch_register_lock); + + if (atomic_read(&d->detaching)) { + struct uuid_entry *u = d->c->uuids + d->id; + + SET_UUID_FLASH_ONLY(u, 0); + memcpy(u->uuid, invalid_uuid, 16); + u->invalidated = cpu_to_le32(get_seconds()); + bch_uuid_write(d->c); + + atomic_set(&d->detaching, 0); + } + + d->c->devices[d->id] = NULL; + closure_put(&d->c->caching); + d->c = NULL; +} + +static void bcache_device_attach(struct bcache_device *d, struct cache_set *c, + unsigned id) +{ + BUG_ON(test_bit(CACHE_SET_STOPPING, &c->flags)); + + d->id = id; + d->c = c; + c->devices[id] = d; + + closure_get(&c->caching); +} + +static void bcache_device_link(struct bcache_device *d, struct cache_set *c, + const char *name) +{ + snprintf(d->name, BCACHEDEVNAME_SIZE, + "%s%u", name, d->id); + + WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") || + sysfs_create_link(&c->kobj, &d->kobj, d->name), + "Couldn't create device <-> cache set symlinks"); +} + +static void bcache_device_free(struct bcache_device *d) +{ + lockdep_assert_held(&bch_register_lock); + + pr_info("%s stopped", d->disk->disk_name); + + if (d->c) + bcache_device_detach(d); + + if (d->disk) + del_gendisk(d->disk); + if (d->disk && d->disk->queue) + blk_cleanup_queue(d->disk->queue); + if (d->disk) + put_disk(d->disk); + + bio_split_pool_free(&d->bio_split_hook); + if (d->unaligned_bvec) + mempool_destroy(d->unaligned_bvec); + if (d->bio_split) + bioset_free(d->bio_split); + + closure_debug_destroy(&d->cl); +} + +static int bcache_device_init(struct bcache_device *d, unsigned block_size) +{ + struct request_queue *q; + + if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || + !(d->unaligned_bvec = mempool_create_kmalloc_pool(1, + sizeof(struct bio_vec) * BIO_MAX_PAGES)) || + bio_split_pool_init(&d->bio_split_hook)) + + return -ENOMEM; + + d->disk = alloc_disk(1); + if (!d->disk) + return -ENOMEM; + + snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor); + + d->disk->major = bcache_major; + d->disk->first_minor = bcache_minor++; + d->disk->fops = &bcache_ops; + d->disk->private_data = d; + + q = blk_alloc_queue(GFP_KERNEL); + if (!q) + return -ENOMEM; + + blk_queue_make_request(q, NULL); + d->disk->queue = q; + q->queuedata = d; + q->backing_dev_info.congested_data = d; + q->limits.max_hw_sectors = UINT_MAX; + q->limits.max_sectors = UINT_MAX; + q->limits.max_segment_size = UINT_MAX; + q->limits.max_segments = BIO_MAX_PAGES; + q->limits.max_discard_sectors = UINT_MAX; + q->limits.io_min = block_size; + q->limits.logical_block_size = block_size; + q->limits.physical_block_size = block_size; + set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags); + set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags); + + return 0; +} + +/* Cached device */ + +static void calc_cached_dev_sectors(struct cache_set *c) +{ + uint64_t sectors = 0; + struct cached_dev *dc; + + list_for_each_entry(dc, &c->cached_devs, list) + sectors += bdev_sectors(dc->bdev); + + c->cached_dev_sectors = sectors; +} + +void bch_cached_dev_run(struct cached_dev *dc) +{ + struct bcache_device *d = &dc->disk; + + if (atomic_xchg(&dc->running, 1)) + return; + + if (!d->c && + BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) { + struct closure cl; + closure_init_stack(&cl); + + SET_BDEV_STATE(&dc->sb, BDEV_STATE_STALE); + bch_write_bdev_super(dc, &cl); + closure_sync(&cl); + } + + add_disk(d->disk); +#if 0 + char *env[] = { "SYMLINK=label" , NULL }; + kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env); +#endif + if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || + sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache")) + pr_debug("error creating sysfs link"); +} + +static void cached_dev_detach_finish(struct work_struct *w) +{ + struct cached_dev *dc = container_of(w, struct cached_dev, detach); + char buf[BDEVNAME_SIZE]; + struct closure cl; + closure_init_stack(&cl); + + BUG_ON(!atomic_read(&dc->disk.detaching)); + BUG_ON(atomic_read(&dc->count)); + + sysfs_remove_link(&dc->disk.c->kobj, dc->disk.name); + sysfs_remove_link(&dc->disk.kobj, "cache"); + + mutex_lock(&bch_register_lock); + + memset(&dc->sb.set_uuid, 0, 16); + SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE); + + bch_write_bdev_super(dc, &cl); + closure_sync(&cl); + + bcache_device_detach(&dc->disk); + list_move(&dc->list, &uncached_devices); + + mutex_unlock(&bch_register_lock); + + pr_info("Caching disabled for %s", bdevname(dc->bdev, buf)); + + /* Drop ref we took in cached_dev_detach() */ + closure_put(&dc->disk.cl); +} + +void bch_cached_dev_detach(struct cached_dev *dc) +{ + lockdep_assert_held(&bch_register_lock); + + if (atomic_read(&dc->disk.closing)) + return; + + if (atomic_xchg(&dc->disk.detaching, 1)) + return; + + /* + * Block the device from being closed and freed until we're finished + * detaching + */ + closure_get(&dc->disk.cl); + + bch_writeback_queue(dc); + cached_dev_put(dc); +} + +int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) +{ + uint32_t rtime = cpu_to_le32(get_seconds()); + struct uuid_entry *u; + char buf[BDEVNAME_SIZE]; + + bdevname(dc->bdev, buf); + + if (memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16)) + return -ENOENT; + + if (dc->disk.c) { + pr_err("Can't attach %s: already attached", buf); + return -EINVAL; + } + + if (test_bit(CACHE_SET_STOPPING, &c->flags)) { + pr_err("Can't attach %s: shutting down", buf); + return -EINVAL; + } + + if (dc->sb.block_size < c->sb.block_size) { + /* Will die */ + pr_err("Couldn't attach %s: block size " + "less than set's block size", buf); + return -EINVAL; + } + + u = uuid_find(c, dc->sb.uuid); + + if (u && + (BDEV_STATE(&dc->sb) == BDEV_STATE_STALE || + BDEV_STATE(&dc->sb) == BDEV_STATE_NONE)) { + memcpy(u->uuid, invalid_uuid, 16); + u->invalidated = cpu_to_le32(get_seconds()); + u = NULL; + } + + if (!u) { + if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { + pr_err("Couldn't find uuid for %s in set", buf); + return -ENOENT; + } + + u = uuid_find_empty(c); + if (!u) { + pr_err("Not caching %s, no room for UUID", buf); + return -EINVAL; + } + } + + /* Deadlocks since we're called via sysfs... + sysfs_remove_file(&dc->kobj, &sysfs_attach); + */ + + if (is_zero(u->uuid, 16)) { + struct closure cl; + closure_init_stack(&cl); + + memcpy(u->uuid, dc->sb.uuid, 16); + memcpy(u->label, dc->sb.label, SB_LABEL_SIZE); + u->first_reg = u->last_reg = rtime; + bch_uuid_write(c); + + memcpy(dc->sb.set_uuid, c->sb.set_uuid, 16); + SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN); + + bch_write_bdev_super(dc, &cl); + closure_sync(&cl); + } else { + u->last_reg = rtime; + bch_uuid_write(c); + } + + bcache_device_attach(&dc->disk, c, u - c->uuids); + bcache_device_link(&dc->disk, c, "bdev"); + list_move(&dc->list, &c->cached_devs); + calc_cached_dev_sectors(c); + + smp_wmb(); + /* + * dc->c must be set before dc->count != 0 - paired with the mb in + * cached_dev_get() + */ + atomic_set(&dc->count, 1); + + if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { + atomic_set(&dc->has_dirty, 1); + atomic_inc(&dc->count); + bch_writeback_queue(dc); + } + + bch_cached_dev_run(dc); + + pr_info("Caching %s as %s on set %pU", + bdevname(dc->bdev, buf), dc->disk.disk->disk_name, + dc->disk.c->sb.set_uuid); + return 0; +} + +void bch_cached_dev_release(struct kobject *kobj) +{ + struct cached_dev *dc = container_of(kobj, struct cached_dev, + disk.kobj); + kfree(dc); + module_put(THIS_MODULE); +} + +static void cached_dev_free(struct closure *cl) +{ + struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); + + cancel_delayed_work_sync(&dc->writeback_rate_update); + + mutex_lock(&bch_register_lock); + + bcache_device_free(&dc->disk); + list_del(&dc->list); + + mutex_unlock(&bch_register_lock); + + if (!IS_ERR_OR_NULL(dc->bdev)) { + blk_sync_queue(bdev_get_queue(dc->bdev)); + blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + } + + wake_up(&unregister_wait); + + kobject_put(&dc->disk.kobj); +} + +static void cached_dev_flush(struct closure *cl) +{ + struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); + struct bcache_device *d = &dc->disk; + + bch_cache_accounting_destroy(&dc->accounting); + kobject_del(&d->kobj); + + continue_at(cl, cached_dev_free, system_wq); +} + +static int cached_dev_init(struct cached_dev *dc, unsigned block_size) +{ + int err; + struct io *io; + + closure_init(&dc->disk.cl, NULL); + set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq); + + __module_get(THIS_MODULE); + INIT_LIST_HEAD(&dc->list); + kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype); + + bch_cache_accounting_init(&dc->accounting, &dc->disk.cl); + + err = bcache_device_init(&dc->disk, block_size); + if (err) + goto err; + + spin_lock_init(&dc->io_lock); + closure_init_unlocked(&dc->sb_write); + INIT_WORK(&dc->detach, cached_dev_detach_finish); + + dc->sequential_merge = true; + dc->sequential_cutoff = 4 << 20; + + INIT_LIST_HEAD(&dc->io_lru); + dc->sb_bio.bi_max_vecs = 1; + dc->sb_bio.bi_io_vec = dc->sb_bio.bi_inline_vecs; + + for (io = dc->io; io < dc->io + RECENT_IO; io++) { + list_add(&io->lru, &dc->io_lru); + hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); + } + + bch_writeback_init_cached_dev(dc); + return 0; +err: + bcache_device_stop(&dc->disk); + return err; +} + +/* Cached device - bcache superblock */ + +static const char *register_bdev(struct cache_sb *sb, struct page *sb_page, + struct block_device *bdev, + struct cached_dev *dc) +{ + char name[BDEVNAME_SIZE]; + const char *err = "cannot allocate memory"; + struct gendisk *g; + struct cache_set *c; + + if (!dc || cached_dev_init(dc, sb->block_size << 9) != 0) + return err; + + memcpy(&dc->sb, sb, sizeof(struct cache_sb)); + dc->sb_bio.bi_io_vec[0].bv_page = sb_page; + dc->bdev = bdev; + dc->bdev->bd_holder = dc; + + g = dc->disk.disk; + + set_capacity(g, dc->bdev->bd_part->nr_sects - 16); + + bch_cached_dev_request_init(dc); + + err = "error creating kobject"; + if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj, + "bcache")) + goto err; + if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) + goto err; + + list_add(&dc->list, &uncached_devices); + list_for_each_entry(c, &bch_cache_sets, list) + bch_cached_dev_attach(dc, c); + + if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE || + BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) + bch_cached_dev_run(dc); + + return NULL; +err: + kobject_put(&dc->disk.kobj); + pr_notice("error opening %s: %s", bdevname(bdev, name), err); + /* + * Return NULL instead of an error because kobject_put() cleans + * everything up + */ + return NULL; +} + +/* Flash only volumes */ + +void bch_flash_dev_release(struct kobject *kobj) +{ + struct bcache_device *d = container_of(kobj, struct bcache_device, + kobj); + kfree(d); +} + +static void flash_dev_free(struct closure *cl) +{ + struct bcache_device *d = container_of(cl, struct bcache_device, cl); + bcache_device_free(d); + kobject_put(&d->kobj); +} + +static void flash_dev_flush(struct closure *cl) +{ + struct bcache_device *d = container_of(cl, struct bcache_device, cl); + + sysfs_remove_link(&d->c->kobj, d->name); + sysfs_remove_link(&d->kobj, "cache"); + kobject_del(&d->kobj); + continue_at(cl, flash_dev_free, system_wq); +} + +static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) +{ + struct bcache_device *d = kzalloc(sizeof(struct bcache_device), + GFP_KERNEL); + if (!d) + return -ENOMEM; + + closure_init(&d->cl, NULL); + set_closure_fn(&d->cl, flash_dev_flush, system_wq); + + kobject_init(&d->kobj, &bch_flash_dev_ktype); + + if (bcache_device_init(d, block_bytes(c))) + goto err; + + bcache_device_attach(d, c, u - c->uuids); + set_capacity(d->disk, u->sectors); + bch_flash_dev_request_init(d); + add_disk(d->disk); + + if (kobject_add(&d->kobj, &disk_to_dev(d->disk)->kobj, "bcache")) + goto err; + + bcache_device_link(d, c, "volume"); + + return 0; +err: + kobject_put(&d->kobj); + return -ENOMEM; +} + +static int flash_devs_run(struct cache_set *c) +{ + int ret = 0; + struct uuid_entry *u; + + for (u = c->uuids; + u < c->uuids + c->nr_uuids && !ret; + u++) + if (UUID_FLASH_ONLY(u)) + ret = flash_dev_run(c, u); + + return ret; +} + +int bch_flash_dev_create(struct cache_set *c, uint64_t size) +{ + struct uuid_entry *u; + + if (test_bit(CACHE_SET_STOPPING, &c->flags)) + return -EINTR; + + u = uuid_find_empty(c); + if (!u) { + pr_err("Can't create volume, no room for UUID"); + return -EINVAL; + } + + get_random_bytes(u->uuid, 16); + memset(u->label, 0, 32); + u->first_reg = u->last_reg = cpu_to_le32(get_seconds()); + + SET_UUID_FLASH_ONLY(u, 1); + u->sectors = size >> 9; + + bch_uuid_write(c); + + return flash_dev_run(c, u); +} + +/* Cache set */ + +__printf(2, 3) +bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) +{ + va_list args; + + if (test_bit(CACHE_SET_STOPPING, &c->flags)) + return false; + + /* XXX: we can be called from atomic context + acquire_console_sem(); + */ + + printk(KERN_ERR "bcache: error on %pU: ", c->sb.set_uuid); + + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); + + printk(", disabling caching\n"); + + bch_cache_set_unregister(c); + return true; +} + +void bch_cache_set_release(struct kobject *kobj) +{ + struct cache_set *c = container_of(kobj, struct cache_set, kobj); + kfree(c); + module_put(THIS_MODULE); +} + +static void cache_set_free(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, cl); + struct cache *ca; + unsigned i; + + if (!IS_ERR_OR_NULL(c->debug)) + debugfs_remove(c->debug); + + bch_open_buckets_free(c); + bch_btree_cache_free(c); + bch_journal_free(c); + + for_each_cache(ca, c, i) + if (ca) + kobject_put(&ca->kobj); + + free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); + free_pages((unsigned long) c->sort, ilog2(bucket_pages(c))); + + kfree(c->fill_iter); + if (c->bio_split) + bioset_free(c->bio_split); + if (c->bio_meta) + mempool_destroy(c->bio_meta); + if (c->search) + mempool_destroy(c->search); + kfree(c->devices); + + mutex_lock(&bch_register_lock); + list_del(&c->list); + mutex_unlock(&bch_register_lock); + + pr_info("Cache set %pU unregistered", c->sb.set_uuid); + wake_up(&unregister_wait); + + closure_debug_destroy(&c->cl); + kobject_put(&c->kobj); +} + +static void cache_set_flush(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, caching); + struct btree *b; + + /* Shut down allocator threads */ + set_bit(CACHE_SET_STOPPING_2, &c->flags); + wake_up(&c->alloc_wait); + + bch_cache_accounting_destroy(&c->accounting); + + kobject_put(&c->internal); + kobject_del(&c->kobj); + + if (!IS_ERR_OR_NULL(c->root)) + list_add(&c->root->list, &c->btree_cache); + + /* Should skip this if we're unregistering because of an error */ + list_for_each_entry(b, &c->btree_cache, list) + if (btree_node_dirty(b)) + bch_btree_write(b, true, NULL); + + closure_return(cl); +} + +static void __cache_set_unregister(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, caching); + struct cached_dev *dc, *t; + size_t i; + + mutex_lock(&bch_register_lock); + + if (test_bit(CACHE_SET_UNREGISTERING, &c->flags)) + list_for_each_entry_safe(dc, t, &c->cached_devs, list) + bch_cached_dev_detach(dc); + + for (i = 0; i < c->nr_uuids; i++) + if (c->devices[i] && UUID_FLASH_ONLY(&c->uuids[i])) + bcache_device_stop(c->devices[i]); + + mutex_unlock(&bch_register_lock); + + continue_at(cl, cache_set_flush, system_wq); +} + +void bch_cache_set_stop(struct cache_set *c) +{ + if (!test_and_set_bit(CACHE_SET_STOPPING, &c->flags)) + closure_queue(&c->caching); +} + +void bch_cache_set_unregister(struct cache_set *c) +{ + set_bit(CACHE_SET_UNREGISTERING, &c->flags); + bch_cache_set_stop(c); +} + +#define alloc_bucket_pages(gfp, c) \ + ((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(c)))) + +struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) +{ + int iter_size; + struct cache_set *c = kzalloc(sizeof(struct cache_set), GFP_KERNEL); + if (!c) + return NULL; + + __module_get(THIS_MODULE); + closure_init(&c->cl, NULL); + set_closure_fn(&c->cl, cache_set_free, system_wq); + + closure_init(&c->caching, &c->cl); + set_closure_fn(&c->caching, __cache_set_unregister, system_wq); + + /* Maybe create continue_at_noreturn() and use it here? */ + closure_set_stopped(&c->cl); + closure_put(&c->cl); + + kobject_init(&c->kobj, &bch_cache_set_ktype); + kobject_init(&c->internal, &bch_cache_set_internal_ktype); + + bch_cache_accounting_init(&c->accounting, &c->cl); + + memcpy(c->sb.set_uuid, sb->set_uuid, 16); + c->sb.block_size = sb->block_size; + c->sb.bucket_size = sb->bucket_size; + c->sb.nr_in_set = sb->nr_in_set; + c->sb.last_mount = sb->last_mount; + c->bucket_bits = ilog2(sb->bucket_size); + c->block_bits = ilog2(sb->block_size); + c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry); + + c->btree_pages = c->sb.bucket_size / PAGE_SECTORS; + if (c->btree_pages > BTREE_MAX_PAGES) + c->btree_pages = max_t(int, c->btree_pages / 4, + BTREE_MAX_PAGES); + + init_waitqueue_head(&c->alloc_wait); + mutex_init(&c->bucket_lock); + mutex_init(&c->fill_lock); + mutex_init(&c->sort_lock); + spin_lock_init(&c->sort_time_lock); + closure_init_unlocked(&c->sb_write); + closure_init_unlocked(&c->uuid_write); + spin_lock_init(&c->btree_read_time_lock); + bch_moving_init_cache_set(c); + + INIT_LIST_HEAD(&c->list); + INIT_LIST_HEAD(&c->cached_devs); + INIT_LIST_HEAD(&c->btree_cache); + INIT_LIST_HEAD(&c->btree_cache_freeable); + INIT_LIST_HEAD(&c->btree_cache_freed); + INIT_LIST_HEAD(&c->data_buckets); + + c->search = mempool_create_slab_pool(32, bch_search_cache); + if (!c->search) + goto err; + + iter_size = (sb->bucket_size / sb->block_size + 1) * + sizeof(struct btree_iter_set); + + if (!(c->devices = kzalloc(c->nr_uuids * sizeof(void *), GFP_KERNEL)) || + !(c->bio_meta = mempool_create_kmalloc_pool(2, + sizeof(struct bbio) + sizeof(struct bio_vec) * + bucket_pages(c))) || + !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || + !(c->fill_iter = kmalloc(iter_size, GFP_KERNEL)) || + !(c->sort = alloc_bucket_pages(GFP_KERNEL, c)) || + !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || + bch_journal_alloc(c) || + bch_btree_cache_alloc(c) || + bch_open_buckets_alloc(c)) + goto err; + + c->fill_iter->size = sb->bucket_size / sb->block_size; + + c->congested_read_threshold_us = 2000; + c->congested_write_threshold_us = 20000; + c->error_limit = 8 << IO_ERROR_SHIFT; + + return c; +err: + bch_cache_set_unregister(c); + return NULL; +} + +static void run_cache_set(struct cache_set *c) +{ + const char *err = "cannot allocate memory"; + struct cached_dev *dc, *t; + struct cache *ca; + unsigned i; + + struct btree_op op; + bch_btree_op_init_stack(&op); + op.lock = SHRT_MAX; + + for_each_cache(ca, c, i) + c->nbuckets += ca->sb.nbuckets; + + if (CACHE_SYNC(&c->sb)) { + LIST_HEAD(journal); + struct bkey *k; + struct jset *j; + + err = "cannot allocate memory for journal"; + if (bch_journal_read(c, &journal, &op)) + goto err; + + pr_debug("btree_journal_read() done"); + + err = "no journal entries found"; + if (list_empty(&journal)) + goto err; + + j = &list_entry(journal.prev, struct journal_replay, list)->j; + + err = "IO error reading priorities"; + for_each_cache(ca, c, i) + prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev]); + + /* + * If prio_read() fails it'll call cache_set_error and we'll + * tear everything down right away, but if we perhaps checked + * sooner we could avoid journal replay. + */ + + k = &j->btree_root; + + err = "bad btree root"; + if (__bch_ptr_invalid(c, j->btree_level + 1, k)) + goto err; + + err = "error reading btree root"; + c->root = bch_btree_node_get(c, k, j->btree_level, &op); + if (IS_ERR_OR_NULL(c->root)) + goto err; + + list_del_init(&c->root->list); + rw_unlock(true, c->root); + + err = uuid_read(c, j, &op.cl); + if (err) + goto err; + + err = "error in recovery"; + if (bch_btree_check(c, &op)) + goto err; + + bch_journal_mark(c, &journal); + bch_btree_gc_finish(c); + pr_debug("btree_check() done"); + + /* + * bcache_journal_next() can't happen sooner, or + * btree_gc_finish() will give spurious errors about last_gc > + * gc_gen - this is a hack but oh well. + */ + bch_journal_next(&c->journal); + + for_each_cache(ca, c, i) + closure_call(&ca->alloc, bch_allocator_thread, + system_wq, &c->cl); + + /* + * First place it's safe to allocate: btree_check() and + * btree_gc_finish() have to run before we have buckets to + * allocate, and bch_bucket_alloc_set() might cause a journal + * entry to be written so bcache_journal_next() has to be called + * first. + * + * If the uuids were in the old format we have to rewrite them + * before the next journal entry is written: + */ + if (j->version < BCACHE_JSET_VERSION_UUID) + __uuid_write(c); + + bch_journal_replay(c, &journal, &op); + } else { + pr_notice("invalidating existing data"); + /* Don't want invalidate_buckets() to queue a gc yet */ + closure_lock(&c->gc, NULL); + + for_each_cache(ca, c, i) { + unsigned j; + + ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7, + 2, SB_JOURNAL_BUCKETS); + + for (j = 0; j < ca->sb.keys; j++) + ca->sb.d[j] = ca->sb.first_bucket + j; + } + + bch_btree_gc_finish(c); + + for_each_cache(ca, c, i) + closure_call(&ca->alloc, bch_allocator_thread, + ca->alloc_workqueue, &c->cl); + + mutex_lock(&c->bucket_lock); + for_each_cache(ca, c, i) + bch_prio_write(ca); + mutex_unlock(&c->bucket_lock); + + wake_up(&c->alloc_wait); + + err = "cannot allocate new UUID bucket"; + if (__uuid_write(c)) + goto err_unlock_gc; + + err = "cannot allocate new btree root"; + c->root = bch_btree_node_alloc(c, 0, &op.cl); + if (IS_ERR_OR_NULL(c->root)) + goto err_unlock_gc; + + bkey_copy_key(&c->root->key, &MAX_KEY); + bch_btree_write(c->root, true, &op); + + bch_btree_set_root(c->root); + rw_unlock(true, c->root); + + /* + * We don't want to write the first journal entry until + * everything is set up - fortunately journal entries won't be + * written until the SET_CACHE_SYNC() here: + */ + SET_CACHE_SYNC(&c->sb, true); + + bch_journal_next(&c->journal); + bch_journal_meta(c, &op.cl); + + /* Unlock */ + closure_set_stopped(&c->gc.cl); + closure_put(&c->gc.cl); + } + + closure_sync(&op.cl); + c->sb.last_mount = get_seconds(); + bcache_write_super(c); + + list_for_each_entry_safe(dc, t, &uncached_devices, list) + bch_cached_dev_attach(dc, c); + + flash_devs_run(c); + + return; +err_unlock_gc: + closure_set_stopped(&c->gc.cl); + closure_put(&c->gc.cl); +err: + closure_sync(&op.cl); + /* XXX: test this, it's broken */ + bch_cache_set_error(c, err); +} + +static bool can_attach_cache(struct cache *ca, struct cache_set *c) +{ + return ca->sb.block_size == c->sb.block_size && + ca->sb.bucket_size == c->sb.block_size && + ca->sb.nr_in_set == c->sb.nr_in_set; +} + +static const char *register_cache_set(struct cache *ca) +{ + char buf[12]; + const char *err = "cannot allocate memory"; + struct cache_set *c; + + list_for_each_entry(c, &bch_cache_sets, list) + if (!memcmp(c->sb.set_uuid, ca->sb.set_uuid, 16)) { + if (c->cache[ca->sb.nr_this_dev]) + return "duplicate cache set member"; + + if (!can_attach_cache(ca, c)) + return "cache sb does not match set"; + + if (!CACHE_SYNC(&ca->sb)) + SET_CACHE_SYNC(&c->sb, false); + + goto found; + } + + c = bch_cache_set_alloc(&ca->sb); + if (!c) + return err; + + err = "error creating kobject"; + if (kobject_add(&c->kobj, bcache_kobj, "%pU", c->sb.set_uuid) || + kobject_add(&c->internal, &c->kobj, "internal")) + goto err; + + if (bch_cache_accounting_add_kobjs(&c->accounting, &c->kobj)) + goto err; + + bch_debug_init_cache_set(c); + + list_add(&c->list, &bch_cache_sets); +found: + sprintf(buf, "cache%i", ca->sb.nr_this_dev); + if (sysfs_create_link(&ca->kobj, &c->kobj, "set") || + sysfs_create_link(&c->kobj, &ca->kobj, buf)) + goto err; + + if (ca->sb.seq > c->sb.seq) { + c->sb.version = ca->sb.version; + memcpy(c->sb.set_uuid, ca->sb.set_uuid, 16); + c->sb.flags = ca->sb.flags; + c->sb.seq = ca->sb.seq; + pr_debug("set version = %llu", c->sb.version); + } + + ca->set = c; + ca->set->cache[ca->sb.nr_this_dev] = ca; + c->cache_by_alloc[c->caches_loaded++] = ca; + + if (c->caches_loaded == c->sb.nr_in_set) + run_cache_set(c); + + return NULL; +err: + bch_cache_set_unregister(c); + return err; +} + +/* Cache device */ + +void bch_cache_release(struct kobject *kobj) +{ + struct cache *ca = container_of(kobj, struct cache, kobj); + + if (ca->set) + ca->set->cache[ca->sb.nr_this_dev] = NULL; + + bch_cache_allocator_exit(ca); + + bio_split_pool_free(&ca->bio_split_hook); + + if (ca->alloc_workqueue) + destroy_workqueue(ca->alloc_workqueue); + + free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca))); + kfree(ca->prio_buckets); + vfree(ca->buckets); + + free_heap(&ca->heap); + free_fifo(&ca->unused); + free_fifo(&ca->free_inc); + free_fifo(&ca->free); + + if (ca->sb_bio.bi_inline_vecs[0].bv_page) + put_page(ca->sb_bio.bi_io_vec[0].bv_page); + + if (!IS_ERR_OR_NULL(ca->bdev)) { + blk_sync_queue(bdev_get_queue(ca->bdev)); + blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + } + + kfree(ca); + module_put(THIS_MODULE); +} + +static int cache_alloc(struct cache_sb *sb, struct cache *ca) +{ + size_t free; + struct bucket *b; + + if (!ca) + return -ENOMEM; + + __module_get(THIS_MODULE); + kobject_init(&ca->kobj, &bch_cache_ktype); + + memcpy(&ca->sb, sb, sizeof(struct cache_sb)); + + INIT_LIST_HEAD(&ca->discards); + + bio_init(&ca->sb_bio); + ca->sb_bio.bi_max_vecs = 1; + ca->sb_bio.bi_io_vec = ca->sb_bio.bi_inline_vecs; + + bio_init(&ca->journal.bio); + ca->journal.bio.bi_max_vecs = 8; + ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs; + + free = roundup_pow_of_two(ca->sb.nbuckets) >> 9; + free = max_t(size_t, free, (prio_buckets(ca) + 8) * 2); + + if (!init_fifo(&ca->free, free, GFP_KERNEL) || + !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) || + !init_fifo(&ca->unused, free << 2, GFP_KERNEL) || + !init_heap(&ca->heap, free << 3, GFP_KERNEL) || + !(ca->buckets = vmalloc(sizeof(struct bucket) * + ca->sb.nbuckets)) || + !(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) * + 2, GFP_KERNEL)) || + !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) || + !(ca->alloc_workqueue = alloc_workqueue("bch_allocator", 0, 1)) || + bio_split_pool_init(&ca->bio_split_hook)) + goto err; + + ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca); + + memset(ca->buckets, 0, ca->sb.nbuckets * sizeof(struct bucket)); + for_each_bucket(b, ca) + atomic_set(&b->pin, 0); + + if (bch_cache_allocator_init(ca)) + goto err; + + return 0; +err: + kobject_put(&ca->kobj); + return -ENOMEM; +} + +static const char *register_cache(struct cache_sb *sb, struct page *sb_page, + struct block_device *bdev, struct cache *ca) +{ + char name[BDEVNAME_SIZE]; + const char *err = "cannot allocate memory"; + + if (cache_alloc(sb, ca) != 0) + return err; + + ca->sb_bio.bi_io_vec[0].bv_page = sb_page; + ca->bdev = bdev; + ca->bdev->bd_holder = ca; + + if (blk_queue_discard(bdev_get_queue(ca->bdev))) + ca->discard = CACHE_DISCARD(&ca->sb); + + err = "error creating kobject"; + if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) + goto err; + + err = register_cache_set(ca); + if (err) + goto err; + + pr_info("registered cache device %s", bdevname(bdev, name)); + + return NULL; +err: + kobject_put(&ca->kobj); + pr_info("error opening %s: %s", bdevname(bdev, name), err); + /* Return NULL instead of an error because kobject_put() cleans + * everything up + */ + return NULL; +} + +/* Global interfaces/init */ + +static ssize_t register_bcache(struct kobject *, struct kobj_attribute *, + const char *, size_t); + +kobj_attribute_write(register, register_bcache); +kobj_attribute_write(register_quiet, register_bcache); + +static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, + const char *buffer, size_t size) +{ + ssize_t ret = size; + const char *err = "cannot allocate memory"; + char *path = NULL; + struct cache_sb *sb = NULL; + struct block_device *bdev = NULL; + struct page *sb_page = NULL; + + if (!try_module_get(THIS_MODULE)) + return -EBUSY; + + mutex_lock(&bch_register_lock); + + if (!(path = kstrndup(buffer, size, GFP_KERNEL)) || + !(sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL))) + goto err; + + err = "failed to open device"; + bdev = blkdev_get_by_path(strim(path), + FMODE_READ|FMODE_WRITE|FMODE_EXCL, + sb); + if (bdev == ERR_PTR(-EBUSY)) + err = "device busy"; + + if (IS_ERR(bdev) || + set_blocksize(bdev, 4096)) + goto err; + + err = read_super(sb, bdev, &sb_page); + if (err) + goto err_close; + + if (sb->version == CACHE_BACKING_DEV) { + struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); + + err = register_bdev(sb, sb_page, bdev, dc); + } else { + struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); + + err = register_cache(sb, sb_page, bdev, ca); + } + + if (err) { + /* register_(bdev|cache) will only return an error if they + * didn't get far enough to create the kobject - if they did, + * the kobject destructor will do this cleanup. + */ + put_page(sb_page); +err_close: + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); +err: + if (attr != &ksysfs_register_quiet) + pr_info("error opening %s: %s", path, err); + ret = -EINVAL; + } + + kfree(sb); + kfree(path); + mutex_unlock(&bch_register_lock); + module_put(THIS_MODULE); + return ret; +} + +static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) +{ + if (code == SYS_DOWN || + code == SYS_HALT || + code == SYS_POWER_OFF) { + DEFINE_WAIT(wait); + unsigned long start = jiffies; + bool stopped = false; + + struct cache_set *c, *tc; + struct cached_dev *dc, *tdc; + + mutex_lock(&bch_register_lock); + + if (list_empty(&bch_cache_sets) && + list_empty(&uncached_devices)) + goto out; + + pr_info("Stopping all devices:"); + + list_for_each_entry_safe(c, tc, &bch_cache_sets, list) + bch_cache_set_stop(c); + + list_for_each_entry_safe(dc, tdc, &uncached_devices, list) + bcache_device_stop(&dc->disk); + + /* What's a condition variable? */ + while (1) { + long timeout = start + 2 * HZ - jiffies; + + stopped = list_empty(&bch_cache_sets) && + list_empty(&uncached_devices); + + if (timeout < 0 || stopped) + break; + + prepare_to_wait(&unregister_wait, &wait, + TASK_UNINTERRUPTIBLE); + + mutex_unlock(&bch_register_lock); + schedule_timeout(timeout); + mutex_lock(&bch_register_lock); + } + + finish_wait(&unregister_wait, &wait); + + if (stopped) + pr_info("All devices stopped"); + else + pr_notice("Timeout waiting for devices to be closed"); +out: + mutex_unlock(&bch_register_lock); + } + + return NOTIFY_DONE; +} + +static struct notifier_block reboot = { + .notifier_call = bcache_reboot, + .priority = INT_MAX, /* before any real devices */ +}; + +static void bcache_exit(void) +{ + bch_debug_exit(); + bch_writeback_exit(); + bch_request_exit(); + bch_btree_exit(); + if (bcache_kobj) + kobject_put(bcache_kobj); + if (bcache_wq) + destroy_workqueue(bcache_wq); + unregister_blkdev(bcache_major, "bcache"); + unregister_reboot_notifier(&reboot); +} + +static int __init bcache_init(void) +{ + static const struct attribute *files[] = { + &ksysfs_register.attr, + &ksysfs_register_quiet.attr, + NULL + }; + + mutex_init(&bch_register_lock); + init_waitqueue_head(&unregister_wait); + register_reboot_notifier(&reboot); + + bcache_major = register_blkdev(0, "bcache"); + if (bcache_major < 0) + return bcache_major; + + if (!(bcache_wq = create_workqueue("bcache")) || + !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) || + sysfs_create_files(bcache_kobj, files) || + bch_btree_init() || + bch_request_init() || + bch_writeback_init() || + bch_debug_init(bcache_kobj)) + goto err; + + return 0; +err: + bcache_exit(); + return -ENOMEM; +} + +module_exit(bcache_exit); +module_init(bcache_init); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c new file mode 100644 index 000000000000..5c7e77073b1f --- /dev/null +++ b/drivers/md/bcache/sysfs.c @@ -0,0 +1,817 @@ +/* + * bcache sysfs interfaces + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "sysfs.h" +#include "btree.h" +#include "request.h" + +#include + +static const char * const cache_replacement_policies[] = { + "lru", + "fifo", + "random", + NULL +}; + +write_attribute(attach); +write_attribute(detach); +write_attribute(unregister); +write_attribute(stop); +write_attribute(clear_stats); +write_attribute(trigger_gc); +write_attribute(prune_cache); +write_attribute(flash_vol_create); + +read_attribute(bucket_size); +read_attribute(block_size); +read_attribute(nbuckets); +read_attribute(tree_depth); +read_attribute(root_usage_percent); +read_attribute(priority_stats); +read_attribute(btree_cache_size); +read_attribute(btree_cache_max_chain); +read_attribute(cache_available_percent); +read_attribute(written); +read_attribute(btree_written); +read_attribute(metadata_written); +read_attribute(active_journal_entries); + +sysfs_time_stats_attribute(btree_gc, sec, ms); +sysfs_time_stats_attribute(btree_split, sec, us); +sysfs_time_stats_attribute(btree_sort, ms, us); +sysfs_time_stats_attribute(btree_read, ms, us); +sysfs_time_stats_attribute(try_harder, ms, us); + +read_attribute(btree_nodes); +read_attribute(btree_used_percent); +read_attribute(average_key_size); +read_attribute(dirty_data); +read_attribute(bset_tree_stats); + +read_attribute(state); +read_attribute(cache_read_races); +read_attribute(writeback_keys_done); +read_attribute(writeback_keys_failed); +read_attribute(io_errors); +read_attribute(congested); +rw_attribute(congested_read_threshold_us); +rw_attribute(congested_write_threshold_us); + +rw_attribute(sequential_cutoff); +rw_attribute(sequential_merge); +rw_attribute(data_csum); +rw_attribute(cache_mode); +rw_attribute(writeback_metadata); +rw_attribute(writeback_running); +rw_attribute(writeback_percent); +rw_attribute(writeback_delay); +rw_attribute(writeback_rate); + +rw_attribute(writeback_rate_update_seconds); +rw_attribute(writeback_rate_d_term); +rw_attribute(writeback_rate_p_term_inverse); +rw_attribute(writeback_rate_d_smooth); +read_attribute(writeback_rate_debug); + +rw_attribute(synchronous); +rw_attribute(journal_delay_ms); +rw_attribute(discard); +rw_attribute(running); +rw_attribute(label); +rw_attribute(readahead); +rw_attribute(io_error_limit); +rw_attribute(io_error_halflife); +rw_attribute(verify); +rw_attribute(key_merging_disabled); +rw_attribute(gc_always_rewrite); +rw_attribute(freelist_percent); +rw_attribute(cache_replacement_policy); +rw_attribute(btree_shrinker_disabled); +rw_attribute(copy_gc_enabled); +rw_attribute(size); + +SHOW(__bch_cached_dev) +{ + struct cached_dev *dc = container_of(kobj, struct cached_dev, + disk.kobj); + const char *states[] = { "no cache", "clean", "dirty", "inconsistent" }; + +#define var(stat) (dc->stat) + + if (attr == &sysfs_cache_mode) + return snprint_string_list(buf, PAGE_SIZE, + bch_cache_modes + 1, + BDEV_CACHE_MODE(&dc->sb)); + + sysfs_printf(data_csum, "%i", dc->disk.data_csum); + var_printf(verify, "%i"); + var_printf(writeback_metadata, "%i"); + var_printf(writeback_running, "%i"); + var_print(writeback_delay); + var_print(writeback_percent); + sysfs_print(writeback_rate, dc->writeback_rate.rate); + + var_print(writeback_rate_update_seconds); + var_print(writeback_rate_d_term); + var_print(writeback_rate_p_term_inverse); + var_print(writeback_rate_d_smooth); + + if (attr == &sysfs_writeback_rate_debug) { + char dirty[20]; + char derivative[20]; + char target[20]; + hprint(dirty, + atomic_long_read(&dc->disk.sectors_dirty) << 9); + hprint(derivative, dc->writeback_rate_derivative << 9); + hprint(target, dc->writeback_rate_target << 9); + + return sprintf(buf, + "rate:\t\t%u\n" + "change:\t\t%i\n" + "dirty:\t\t%s\n" + "derivative:\t%s\n" + "target:\t\t%s\n", + dc->writeback_rate.rate, + dc->writeback_rate_change, + dirty, derivative, target); + } + + sysfs_hprint(dirty_data, + atomic_long_read(&dc->disk.sectors_dirty) << 9); + + var_printf(sequential_merge, "%i"); + var_hprint(sequential_cutoff); + var_hprint(readahead); + + sysfs_print(running, atomic_read(&dc->running)); + sysfs_print(state, states[BDEV_STATE(&dc->sb)]); + + if (attr == &sysfs_label) { + memcpy(buf, dc->sb.label, SB_LABEL_SIZE); + buf[SB_LABEL_SIZE + 1] = '\0'; + strcat(buf, "\n"); + return strlen(buf); + } + +#undef var + return 0; +} +SHOW_LOCKED(bch_cached_dev) + +STORE(__cached_dev) +{ + struct cached_dev *dc = container_of(kobj, struct cached_dev, + disk.kobj); + unsigned v = size; + struct cache_set *c; + +#define d_strtoul(var) sysfs_strtoul(var, dc->var) +#define d_strtoi_h(var) sysfs_hatoi(var, dc->var) + + sysfs_strtoul(data_csum, dc->disk.data_csum); + d_strtoul(verify); + d_strtoul(writeback_metadata); + d_strtoul(writeback_running); + d_strtoul(writeback_delay); + sysfs_strtoul_clamp(writeback_rate, + dc->writeback_rate.rate, 1, 1000000); + sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40); + + d_strtoul(writeback_rate_update_seconds); + d_strtoul(writeback_rate_d_term); + d_strtoul(writeback_rate_p_term_inverse); + sysfs_strtoul_clamp(writeback_rate_p_term_inverse, + dc->writeback_rate_p_term_inverse, 1, INT_MAX); + d_strtoul(writeback_rate_d_smooth); + + d_strtoul(sequential_merge); + d_strtoi_h(sequential_cutoff); + d_strtoi_h(readahead); + + if (attr == &sysfs_clear_stats) + bch_cache_accounting_clear(&dc->accounting); + + if (attr == &sysfs_running && + strtoul_or_return(buf)) + bch_cached_dev_run(dc); + + if (attr == &sysfs_cache_mode) { + ssize_t v = read_string_list(buf, bch_cache_modes + 1); + + if (v < 0) + return v; + + if ((unsigned) v != BDEV_CACHE_MODE(&dc->sb)) { + SET_BDEV_CACHE_MODE(&dc->sb, v); + bch_write_bdev_super(dc, NULL); + } + } + + if (attr == &sysfs_label) { + memcpy(dc->sb.label, buf, SB_LABEL_SIZE); + bch_write_bdev_super(dc, NULL); + if (dc->disk.c) { + memcpy(dc->disk.c->uuids[dc->disk.id].label, + buf, SB_LABEL_SIZE); + bch_uuid_write(dc->disk.c); + } + } + + if (attr == &sysfs_attach) { + if (parse_uuid(buf, dc->sb.set_uuid) < 16) + return -EINVAL; + + list_for_each_entry(c, &bch_cache_sets, list) { + v = bch_cached_dev_attach(dc, c); + if (!v) + return size; + } + + pr_err("Can't attach %s: cache set not found", buf); + size = v; + } + + if (attr == &sysfs_detach && dc->disk.c) + bch_cached_dev_detach(dc); + + if (attr == &sysfs_stop) + bcache_device_stop(&dc->disk); + + return size; +} + +STORE(bch_cached_dev) +{ + struct cached_dev *dc = container_of(kobj, struct cached_dev, + disk.kobj); + + mutex_lock(&bch_register_lock); + size = __cached_dev_store(kobj, attr, buf, size); + + if (attr == &sysfs_writeback_running) + bch_writeback_queue(dc); + + if (attr == &sysfs_writeback_percent) + schedule_delayed_work(&dc->writeback_rate_update, + dc->writeback_rate_update_seconds * HZ); + + mutex_unlock(&bch_register_lock); + return size; +} + +static struct attribute *bch_cached_dev_files[] = { + &sysfs_attach, + &sysfs_detach, + &sysfs_stop, +#if 0 + &sysfs_data_csum, +#endif + &sysfs_cache_mode, + &sysfs_writeback_metadata, + &sysfs_writeback_running, + &sysfs_writeback_delay, + &sysfs_writeback_percent, + &sysfs_writeback_rate, + &sysfs_writeback_rate_update_seconds, + &sysfs_writeback_rate_d_term, + &sysfs_writeback_rate_p_term_inverse, + &sysfs_writeback_rate_d_smooth, + &sysfs_writeback_rate_debug, + &sysfs_dirty_data, + &sysfs_sequential_cutoff, + &sysfs_sequential_merge, + &sysfs_clear_stats, + &sysfs_running, + &sysfs_state, + &sysfs_label, + &sysfs_readahead, +#ifdef CONFIG_BCACHE_DEBUG + &sysfs_verify, +#endif + NULL +}; +KTYPE(bch_cached_dev); + +SHOW(bch_flash_dev) +{ + struct bcache_device *d = container_of(kobj, struct bcache_device, + kobj); + struct uuid_entry *u = &d->c->uuids[d->id]; + + sysfs_printf(data_csum, "%i", d->data_csum); + sysfs_hprint(size, u->sectors << 9); + + if (attr == &sysfs_label) { + memcpy(buf, u->label, SB_LABEL_SIZE); + buf[SB_LABEL_SIZE + 1] = '\0'; + strcat(buf, "\n"); + return strlen(buf); + } + + return 0; +} + +STORE(__bch_flash_dev) +{ + struct bcache_device *d = container_of(kobj, struct bcache_device, + kobj); + struct uuid_entry *u = &d->c->uuids[d->id]; + + sysfs_strtoul(data_csum, d->data_csum); + + if (attr == &sysfs_size) { + uint64_t v; + strtoi_h_or_return(buf, v); + + u->sectors = v >> 9; + bch_uuid_write(d->c); + set_capacity(d->disk, u->sectors); + } + + if (attr == &sysfs_label) { + memcpy(u->label, buf, SB_LABEL_SIZE); + bch_uuid_write(d->c); + } + + if (attr == &sysfs_unregister) { + atomic_set(&d->detaching, 1); + bcache_device_stop(d); + } + + return size; +} +STORE_LOCKED(bch_flash_dev) + +static struct attribute *bch_flash_dev_files[] = { + &sysfs_unregister, +#if 0 + &sysfs_data_csum, +#endif + &sysfs_label, + &sysfs_size, + NULL +}; +KTYPE(bch_flash_dev); + +SHOW(__bch_cache_set) +{ + unsigned root_usage(struct cache_set *c) + { + unsigned bytes = 0; + struct bkey *k; + struct btree *b; + struct btree_iter iter; + + goto lock_root; + + do { + rw_unlock(false, b); +lock_root: + b = c->root; + rw_lock(false, b, b->level); + } while (b != c->root); + + for_each_key_filter(b, k, &iter, bch_ptr_bad) + bytes += bkey_bytes(k); + + rw_unlock(false, b); + + return (bytes * 100) / btree_bytes(c); + } + + size_t cache_size(struct cache_set *c) + { + size_t ret = 0; + struct btree *b; + + mutex_lock(&c->bucket_lock); + list_for_each_entry(b, &c->btree_cache, list) + ret += 1 << (b->page_order + PAGE_SHIFT); + + mutex_unlock(&c->bucket_lock); + return ret; + } + + unsigned cache_max_chain(struct cache_set *c) + { + unsigned ret = 0; + struct hlist_head *h; + + mutex_lock(&c->bucket_lock); + + for (h = c->bucket_hash; + h < c->bucket_hash + (1 << BUCKET_HASH_BITS); + h++) { + unsigned i = 0; + struct hlist_node *p; + + hlist_for_each(p, h) + i++; + + ret = max(ret, i); + } + + mutex_unlock(&c->bucket_lock); + return ret; + } + + unsigned btree_used(struct cache_set *c) + { + return div64_u64(c->gc_stats.key_bytes * 100, + (c->gc_stats.nodes ?: 1) * btree_bytes(c)); + } + + unsigned average_key_size(struct cache_set *c) + { + return c->gc_stats.nkeys + ? div64_u64(c->gc_stats.data, c->gc_stats.nkeys) + : 0; + } + + struct cache_set *c = container_of(kobj, struct cache_set, kobj); + + sysfs_print(synchronous, CACHE_SYNC(&c->sb)); + sysfs_print(journal_delay_ms, c->journal_delay_ms); + sysfs_hprint(bucket_size, bucket_bytes(c)); + sysfs_hprint(block_size, block_bytes(c)); + sysfs_print(tree_depth, c->root->level); + sysfs_print(root_usage_percent, root_usage(c)); + + sysfs_hprint(btree_cache_size, cache_size(c)); + sysfs_print(btree_cache_max_chain, cache_max_chain(c)); + sysfs_print(cache_available_percent, 100 - c->gc_stats.in_use); + + sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms); + sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us); + sysfs_print_time_stats(&c->sort_time, btree_sort, ms, us); + sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us); + sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us); + + sysfs_print(btree_used_percent, btree_used(c)); + sysfs_print(btree_nodes, c->gc_stats.nodes); + sysfs_hprint(dirty_data, c->gc_stats.dirty); + sysfs_hprint(average_key_size, average_key_size(c)); + + sysfs_print(cache_read_races, + atomic_long_read(&c->cache_read_races)); + + sysfs_print(writeback_keys_done, + atomic_long_read(&c->writeback_keys_done)); + sysfs_print(writeback_keys_failed, + atomic_long_read(&c->writeback_keys_failed)); + + /* See count_io_errors for why 88 */ + sysfs_print(io_error_halflife, c->error_decay * 88); + sysfs_print(io_error_limit, c->error_limit >> IO_ERROR_SHIFT); + + sysfs_hprint(congested, + ((uint64_t) bch_get_congested(c)) << 9); + sysfs_print(congested_read_threshold_us, + c->congested_read_threshold_us); + sysfs_print(congested_write_threshold_us, + c->congested_write_threshold_us); + + sysfs_print(active_journal_entries, fifo_used(&c->journal.pin)); + sysfs_printf(verify, "%i", c->verify); + sysfs_printf(key_merging_disabled, "%i", c->key_merging_disabled); + sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite); + sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled); + sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); + + if (attr == &sysfs_bset_tree_stats) + return bch_bset_print_stats(c, buf); + + return 0; +} +SHOW_LOCKED(bch_cache_set) + +STORE(__bch_cache_set) +{ + struct cache_set *c = container_of(kobj, struct cache_set, kobj); + + if (attr == &sysfs_unregister) + bch_cache_set_unregister(c); + + if (attr == &sysfs_stop) + bch_cache_set_stop(c); + + if (attr == &sysfs_synchronous) { + bool sync = strtoul_or_return(buf); + + if (sync != CACHE_SYNC(&c->sb)) { + SET_CACHE_SYNC(&c->sb, sync); + bcache_write_super(c); + } + } + + if (attr == &sysfs_flash_vol_create) { + int r; + uint64_t v; + strtoi_h_or_return(buf, v); + + r = bch_flash_dev_create(c, v); + if (r) + return r; + } + + if (attr == &sysfs_clear_stats) { + atomic_long_set(&c->writeback_keys_done, 0); + atomic_long_set(&c->writeback_keys_failed, 0); + + memset(&c->gc_stats, 0, sizeof(struct gc_stat)); + bch_cache_accounting_clear(&c->accounting); + } + + if (attr == &sysfs_trigger_gc) + bch_queue_gc(c); + + if (attr == &sysfs_prune_cache) { + struct shrink_control sc; + sc.gfp_mask = GFP_KERNEL; + sc.nr_to_scan = strtoul_or_return(buf); + c->shrink.shrink(&c->shrink, &sc); + } + + sysfs_strtoul(congested_read_threshold_us, + c->congested_read_threshold_us); + sysfs_strtoul(congested_write_threshold_us, + c->congested_write_threshold_us); + + if (attr == &sysfs_io_error_limit) + c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT; + + /* See count_io_errors() for why 88 */ + if (attr == &sysfs_io_error_halflife) + c->error_decay = strtoul_or_return(buf) / 88; + + sysfs_strtoul(journal_delay_ms, c->journal_delay_ms); + sysfs_strtoul(verify, c->verify); + sysfs_strtoul(key_merging_disabled, c->key_merging_disabled); + sysfs_strtoul(gc_always_rewrite, c->gc_always_rewrite); + sysfs_strtoul(btree_shrinker_disabled, c->shrinker_disabled); + sysfs_strtoul(copy_gc_enabled, c->copy_gc_enabled); + + return size; +} +STORE_LOCKED(bch_cache_set) + +SHOW(bch_cache_set_internal) +{ + struct cache_set *c = container_of(kobj, struct cache_set, internal); + return bch_cache_set_show(&c->kobj, attr, buf); +} + +STORE(bch_cache_set_internal) +{ + struct cache_set *c = container_of(kobj, struct cache_set, internal); + return bch_cache_set_store(&c->kobj, attr, buf, size); +} + +static void bch_cache_set_internal_release(struct kobject *k) +{ +} + +static struct attribute *bch_cache_set_files[] = { + &sysfs_unregister, + &sysfs_stop, + &sysfs_synchronous, + &sysfs_journal_delay_ms, + &sysfs_flash_vol_create, + + &sysfs_bucket_size, + &sysfs_block_size, + &sysfs_tree_depth, + &sysfs_root_usage_percent, + &sysfs_btree_cache_size, + &sysfs_cache_available_percent, + + &sysfs_average_key_size, + &sysfs_dirty_data, + + &sysfs_io_error_limit, + &sysfs_io_error_halflife, + &sysfs_congested, + &sysfs_congested_read_threshold_us, + &sysfs_congested_write_threshold_us, + &sysfs_clear_stats, + NULL +}; +KTYPE(bch_cache_set); + +static struct attribute *bch_cache_set_internal_files[] = { + &sysfs_active_journal_entries, + + sysfs_time_stats_attribute_list(btree_gc, sec, ms) + sysfs_time_stats_attribute_list(btree_split, sec, us) + sysfs_time_stats_attribute_list(btree_sort, ms, us) + sysfs_time_stats_attribute_list(btree_read, ms, us) + sysfs_time_stats_attribute_list(try_harder, ms, us) + + &sysfs_btree_nodes, + &sysfs_btree_used_percent, + &sysfs_btree_cache_max_chain, + + &sysfs_bset_tree_stats, + &sysfs_cache_read_races, + &sysfs_writeback_keys_done, + &sysfs_writeback_keys_failed, + + &sysfs_trigger_gc, + &sysfs_prune_cache, +#ifdef CONFIG_BCACHE_DEBUG + &sysfs_verify, + &sysfs_key_merging_disabled, +#endif + &sysfs_gc_always_rewrite, + &sysfs_btree_shrinker_disabled, + &sysfs_copy_gc_enabled, + NULL +}; +KTYPE(bch_cache_set_internal); + +SHOW(__bch_cache) +{ + struct cache *ca = container_of(kobj, struct cache, kobj); + + sysfs_hprint(bucket_size, bucket_bytes(ca)); + sysfs_hprint(block_size, block_bytes(ca)); + sysfs_print(nbuckets, ca->sb.nbuckets); + sysfs_print(discard, ca->discard); + sysfs_hprint(written, atomic_long_read(&ca->sectors_written) << 9); + sysfs_hprint(btree_written, + atomic_long_read(&ca->btree_sectors_written) << 9); + sysfs_hprint(metadata_written, + (atomic_long_read(&ca->meta_sectors_written) + + atomic_long_read(&ca->btree_sectors_written)) << 9); + + sysfs_print(io_errors, + atomic_read(&ca->io_errors) >> IO_ERROR_SHIFT); + + sysfs_print(freelist_percent, ca->free.size * 100 / + ((size_t) ca->sb.nbuckets)); + + if (attr == &sysfs_cache_replacement_policy) + return snprint_string_list(buf, PAGE_SIZE, + cache_replacement_policies, + CACHE_REPLACEMENT(&ca->sb)); + + if (attr == &sysfs_priority_stats) { + int cmp(const void *l, const void *r) + { return *((uint16_t *) r) - *((uint16_t *) l); } + + /* Number of quantiles we compute */ + const unsigned nq = 31; + + size_t n = ca->sb.nbuckets, i, unused, btree; + uint64_t sum = 0; + uint16_t q[nq], *p, *cached; + ssize_t ret; + + cached = p = vmalloc(ca->sb.nbuckets * sizeof(uint16_t)); + if (!p) + return -ENOMEM; + + mutex_lock(&ca->set->bucket_lock); + for (i = ca->sb.first_bucket; i < n; i++) + p[i] = ca->buckets[i].prio; + mutex_unlock(&ca->set->bucket_lock); + + sort(p, n, sizeof(uint16_t), cmp, NULL); + + while (n && + !cached[n - 1]) + --n; + + unused = ca->sb.nbuckets - n; + + while (cached < p + n && + *cached == BTREE_PRIO) + cached++; + + btree = cached - p; + n -= btree; + + for (i = 0; i < n; i++) + sum += INITIAL_PRIO - cached[i]; + + if (n) + do_div(sum, n); + + for (i = 0; i < nq; i++) + q[i] = INITIAL_PRIO - cached[n * (i + 1) / (nq + 1)]; + + vfree(p); + + ret = snprintf(buf, PAGE_SIZE, + "Unused: %zu%%\n" + "Metadata: %zu%%\n" + "Average: %llu\n" + "Sectors per Q: %zu\n" + "Quantiles: [", + unused * 100 / (size_t) ca->sb.nbuckets, + btree * 100 / (size_t) ca->sb.nbuckets, sum, + n * ca->sb.bucket_size / (nq + 1)); + + for (i = 0; i < nq && ret < (ssize_t) PAGE_SIZE; i++) + ret += snprintf(buf + ret, PAGE_SIZE - ret, + i < nq - 1 ? "%u " : "%u]\n", q[i]); + + buf[PAGE_SIZE - 1] = '\0'; + return ret; + } + + return 0; +} +SHOW_LOCKED(bch_cache) + +STORE(__bch_cache) +{ + struct cache *ca = container_of(kobj, struct cache, kobj); + + if (attr == &sysfs_discard) { + bool v = strtoul_or_return(buf); + + if (blk_queue_discard(bdev_get_queue(ca->bdev))) + ca->discard = v; + + if (v != CACHE_DISCARD(&ca->sb)) { + SET_CACHE_DISCARD(&ca->sb, v); + bcache_write_super(ca->set); + } + } + + if (attr == &sysfs_cache_replacement_policy) { + ssize_t v = read_string_list(buf, cache_replacement_policies); + + if (v < 0) + return v; + + if ((unsigned) v != CACHE_REPLACEMENT(&ca->sb)) { + mutex_lock(&ca->set->bucket_lock); + SET_CACHE_REPLACEMENT(&ca->sb, v); + mutex_unlock(&ca->set->bucket_lock); + + bcache_write_super(ca->set); + } + } + + if (attr == &sysfs_freelist_percent) { + DECLARE_FIFO(long, free); + long i; + size_t p = strtoul_or_return(buf); + + p = clamp_t(size_t, + ((size_t) ca->sb.nbuckets * p) / 100, + roundup_pow_of_two(ca->sb.nbuckets) >> 9, + ca->sb.nbuckets / 2); + + if (!init_fifo_exact(&free, p, GFP_KERNEL)) + return -ENOMEM; + + mutex_lock(&ca->set->bucket_lock); + + fifo_move(&free, &ca->free); + fifo_swap(&free, &ca->free); + + mutex_unlock(&ca->set->bucket_lock); + + while (fifo_pop(&free, i)) + atomic_dec(&ca->buckets[i].pin); + + free_fifo(&free); + } + + if (attr == &sysfs_clear_stats) { + atomic_long_set(&ca->sectors_written, 0); + atomic_long_set(&ca->btree_sectors_written, 0); + atomic_long_set(&ca->meta_sectors_written, 0); + atomic_set(&ca->io_count, 0); + atomic_set(&ca->io_errors, 0); + } + + return size; +} +STORE_LOCKED(bch_cache) + +static struct attribute *bch_cache_files[] = { + &sysfs_bucket_size, + &sysfs_block_size, + &sysfs_nbuckets, + &sysfs_priority_stats, + &sysfs_discard, + &sysfs_written, + &sysfs_btree_written, + &sysfs_metadata_written, + &sysfs_io_errors, + &sysfs_clear_stats, + &sysfs_freelist_percent, + &sysfs_cache_replacement_policy, + NULL +}; +KTYPE(bch_cache); diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h new file mode 100644 index 000000000000..34e4ba1184fe --- /dev/null +++ b/drivers/md/bcache/sysfs.h @@ -0,0 +1,110 @@ +#ifndef _BCACHE_SYSFS_H_ +#define _BCACHE_SYSFS_H_ + +#define KTYPE(type) \ +struct kobj_type type ## _ktype = { \ + .release = type ## _release, \ + .sysfs_ops = &((const struct sysfs_ops) { \ + .show = type ## _show, \ + .store = type ## _store \ + }), \ + .default_attrs = type ## _files \ +} + +#define SHOW(fn) \ +static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\ + char *buf) \ + +#define STORE(fn) \ +static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\ + const char *buf, size_t size) \ + +#define SHOW_LOCKED(fn) \ +SHOW(fn) \ +{ \ + ssize_t ret; \ + mutex_lock(&bch_register_lock); \ + ret = __ ## fn ## _show(kobj, attr, buf); \ + mutex_unlock(&bch_register_lock); \ + return ret; \ +} + +#define STORE_LOCKED(fn) \ +STORE(fn) \ +{ \ + ssize_t ret; \ + mutex_lock(&bch_register_lock); \ + ret = __ ## fn ## _store(kobj, attr, buf, size); \ + mutex_unlock(&bch_register_lock); \ + return ret; \ +} + +#define __sysfs_attribute(_name, _mode) \ + static struct attribute sysfs_##_name = \ + { .name = #_name, .mode = _mode } + +#define write_attribute(n) __sysfs_attribute(n, S_IWUSR) +#define read_attribute(n) __sysfs_attribute(n, S_IRUGO) +#define rw_attribute(n) __sysfs_attribute(n, S_IRUGO|S_IWUSR) + +#define sysfs_printf(file, fmt, ...) \ +do { \ + if (attr == &sysfs_ ## file) \ + return snprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__); \ +} while (0) + +#define sysfs_print(file, var) \ +do { \ + if (attr == &sysfs_ ## file) \ + return snprint(buf, PAGE_SIZE, var); \ +} while (0) + +#define sysfs_hprint(file, val) \ +do { \ + if (attr == &sysfs_ ## file) { \ + ssize_t ret = hprint(buf, val); \ + strcat(buf, "\n"); \ + return ret + 1; \ + } \ +} while (0) + +#define var_printf(_var, fmt) sysfs_printf(_var, fmt, var(_var)) +#define var_print(_var) sysfs_print(_var, var(_var)) +#define var_hprint(_var) sysfs_hprint(_var, var(_var)) + +#define sysfs_strtoul(file, var) \ +do { \ + if (attr == &sysfs_ ## file) \ + return strtoul_safe(buf, var) ?: (ssize_t) size; \ +} while (0) + +#define sysfs_strtoul_clamp(file, var, min, max) \ +do { \ + if (attr == &sysfs_ ## file) \ + return strtoul_safe_clamp(buf, var, min, max) \ + ?: (ssize_t) size; \ +} while (0) + +#define strtoul_or_return(cp) \ +({ \ + unsigned long _v; \ + int _r = kstrtoul(cp, 10, &_v); \ + if (_r) \ + return _r; \ + _v; \ +}) + +#define strtoi_h_or_return(cp, v) \ +do { \ + int _r = strtoi_h(cp, &v); \ + if (_r) \ + return _r; \ +} while (0) + +#define sysfs_hatoi(file, var) \ +do { \ + if (attr == &sysfs_ ## file) \ + return strtoi_h(buf, &var) ?: (ssize_t) size; \ +} while (0) + +#endif /* _BCACHE_SYSFS_H_ */ diff --git a/drivers/md/bcache/trace.c b/drivers/md/bcache/trace.c new file mode 100644 index 000000000000..983f9bb411bc --- /dev/null +++ b/drivers/md/bcache/trace.c @@ -0,0 +1,26 @@ +#include "bcache.h" +#include "btree.h" +#include "request.h" + +#include + +#define CREATE_TRACE_POINTS +#include + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_start); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_end); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_passthrough); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_hit); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_miss); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_retry); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writethrough); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_skip); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_read); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_write); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_dirty); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_dirty); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_start); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end); diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c new file mode 100644 index 000000000000..dcec2e4f84ad --- /dev/null +++ b/drivers/md/bcache/util.c @@ -0,0 +1,389 @@ +/* + * random utiility code, for bcache but in theory not specific to bcache + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" + +#define simple_strtoint(c, end, base) simple_strtol(c, end, base) +#define simple_strtouint(c, end, base) simple_strtoul(c, end, base) + +#define STRTO_H(name, type) \ +int name ## _h(const char *cp, type *res) \ +{ \ + int u = 0; \ + char *e; \ + type i = simple_ ## name(cp, &e, 10); \ + \ + switch (tolower(*e)) { \ + default: \ + return -EINVAL; \ + case 'y': \ + case 'z': \ + u++; \ + case 'e': \ + u++; \ + case 'p': \ + u++; \ + case 't': \ + u++; \ + case 'g': \ + u++; \ + case 'm': \ + u++; \ + case 'k': \ + u++; \ + if (e++ == cp) \ + return -EINVAL; \ + case '\n': \ + case '\0': \ + if (*e == '\n') \ + e++; \ + } \ + \ + if (*e) \ + return -EINVAL; \ + \ + while (u--) { \ + if ((type) ~0 > 0 && \ + (type) ~0 / 1024 <= i) \ + return -EINVAL; \ + if ((i > 0 && ANYSINT_MAX(type) / 1024 < i) || \ + (i < 0 && -ANYSINT_MAX(type) / 1024 > i)) \ + return -EINVAL; \ + i *= 1024; \ + } \ + \ + *res = i; \ + return 0; \ +} \ +EXPORT_SYMBOL_GPL(name ## _h); + +STRTO_H(strtoint, int) +STRTO_H(strtouint, unsigned int) +STRTO_H(strtoll, long long) +STRTO_H(strtoull, unsigned long long) + +ssize_t hprint(char *buf, int64_t v) +{ + static const char units[] = "?kMGTPEZY"; + char dec[3] = ""; + int u, t = 0; + + for (u = 0; v >= 1024 || v <= -1024; u++) { + t = v & ~(~0 << 10); + v >>= 10; + } + + if (!u) + return sprintf(buf, "%llu", v); + + if (v < 100 && v > -100) + sprintf(dec, ".%i", t / 100); + + return sprintf(buf, "%lli%s%c", v, dec, units[u]); +} +EXPORT_SYMBOL_GPL(hprint); + +ssize_t snprint_string_list(char *buf, size_t size, const char * const list[], + size_t selected) +{ + char *out = buf; + size_t i; + + for (i = 0; list[i]; i++) + out += snprintf(out, buf + size - out, + i == selected ? "[%s] " : "%s ", list[i]); + + out[-1] = '\n'; + return out - buf; +} +EXPORT_SYMBOL_GPL(snprint_string_list); + +ssize_t read_string_list(const char *buf, const char * const list[]) +{ + size_t i; + char *s, *d = kstrndup(buf, PAGE_SIZE - 1, GFP_KERNEL); + if (!d) + return -ENOMEM; + + s = strim(d); + + for (i = 0; list[i]; i++) + if (!strcmp(list[i], s)) + break; + + kfree(d); + + if (!list[i]) + return -EINVAL; + + return i; +} +EXPORT_SYMBOL_GPL(read_string_list); + +bool is_zero(const char *p, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) + if (p[i]) + return false; + return true; +} +EXPORT_SYMBOL_GPL(is_zero); + +int parse_uuid(const char *s, char *uuid) +{ + size_t i, j, x; + memset(uuid, 0, 16); + + for (i = 0, j = 0; + i < strspn(s, "-0123456789:ABCDEFabcdef") && j < 32; + i++) { + x = s[i] | 32; + + switch (x) { + case '0'...'9': + x -= '0'; + break; + case 'a'...'f': + x -= 'a' - 10; + break; + default: + continue; + } + + if (!(j & 1)) + x <<= 4; + uuid[j++ >> 1] |= x; + } + return i; +} +EXPORT_SYMBOL_GPL(parse_uuid); + +void time_stats_update(struct time_stats *stats, uint64_t start_time) +{ + uint64_t now = local_clock(); + uint64_t duration = time_after64(now, start_time) + ? now - start_time : 0; + uint64_t last = time_after64(now, stats->last) + ? now - stats->last : 0; + + stats->max_duration = max(stats->max_duration, duration); + + if (stats->last) { + ewma_add(stats->average_duration, duration, 8, 8); + + if (stats->average_frequency) + ewma_add(stats->average_frequency, last, 8, 8); + else + stats->average_frequency = last << 8; + } else { + stats->average_duration = duration << 8; + } + + stats->last = now ?: 1; +} +EXPORT_SYMBOL_GPL(time_stats_update); + +unsigned next_delay(struct ratelimit *d, uint64_t done) +{ + uint64_t now = local_clock(); + + d->next += div_u64(done, d->rate); + + return time_after64(d->next, now) + ? div_u64(d->next - now, NSEC_PER_SEC / HZ) + : 0; +} +EXPORT_SYMBOL_GPL(next_delay); + +void bio_map(struct bio *bio, void *base) +{ + size_t size = bio->bi_size; + struct bio_vec *bv = bio->bi_io_vec; + + BUG_ON(!bio->bi_size); + BUG_ON(bio->bi_vcnt); + + bv->bv_offset = base ? ((unsigned long) base) % PAGE_SIZE : 0; + goto start; + + for (; size; bio->bi_vcnt++, bv++) { + bv->bv_offset = 0; +start: bv->bv_len = min_t(size_t, PAGE_SIZE - bv->bv_offset, + size); + if (base) { + bv->bv_page = is_vmalloc_addr(base) + ? vmalloc_to_page(base) + : virt_to_page(base); + + base += bv->bv_len; + } + + size -= bv->bv_len; + } +} +EXPORT_SYMBOL_GPL(bio_map); + +int bio_alloc_pages(struct bio *bio, gfp_t gfp) +{ + int i; + struct bio_vec *bv; + + bio_for_each_segment(bv, bio, i) { + bv->bv_page = alloc_page(gfp); + if (!bv->bv_page) { + while (bv-- != bio->bi_io_vec + bio->bi_idx) + __free_page(bv->bv_page); + return -ENOMEM; + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(bio_alloc_pages); + +/* + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any + * use permitted, subject to terms of PostgreSQL license; see.) + + * If we have a 64-bit integer type, then a 64-bit CRC looks just like the + * usual sort of implementation. (See Ross Williams' excellent introduction + * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from + * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.) + * If we have no working 64-bit type, then fake it with two 32-bit registers. + * + * The present implementation is a normal (not "reflected", in Williams' + * terms) 64-bit CRC, using initial all-ones register contents and a final + * bit inversion. The chosen polynomial is borrowed from the DLT1 spec + * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM): + * + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + + * x^7 + x^4 + x + 1 +*/ + +static const uint64_t crc_table[256] = { + 0x0000000000000000, 0x42F0E1EBA9EA3693, 0x85E1C3D753D46D26, + 0xC711223CFA3E5BB5, 0x493366450E42ECDF, 0x0BC387AEA7A8DA4C, + 0xCCD2A5925D9681F9, 0x8E224479F47CB76A, 0x9266CC8A1C85D9BE, + 0xD0962D61B56FEF2D, 0x17870F5D4F51B498, 0x5577EEB6E6BB820B, + 0xDB55AACF12C73561, 0x99A54B24BB2D03F2, 0x5EB4691841135847, + 0x1C4488F3E8F96ED4, 0x663D78FF90E185EF, 0x24CD9914390BB37C, + 0xE3DCBB28C335E8C9, 0xA12C5AC36ADFDE5A, 0x2F0E1EBA9EA36930, + 0x6DFEFF5137495FA3, 0xAAEFDD6DCD770416, 0xE81F3C86649D3285, + 0xF45BB4758C645C51, 0xB6AB559E258E6AC2, 0x71BA77A2DFB03177, + 0x334A9649765A07E4, 0xBD68D2308226B08E, 0xFF9833DB2BCC861D, + 0x388911E7D1F2DDA8, 0x7A79F00C7818EB3B, 0xCC7AF1FF21C30BDE, + 0x8E8A101488293D4D, 0x499B3228721766F8, 0x0B6BD3C3DBFD506B, + 0x854997BA2F81E701, 0xC7B97651866BD192, 0x00A8546D7C558A27, + 0x4258B586D5BFBCB4, 0x5E1C3D753D46D260, 0x1CECDC9E94ACE4F3, + 0xDBFDFEA26E92BF46, 0x990D1F49C77889D5, 0x172F5B3033043EBF, + 0x55DFBADB9AEE082C, 0x92CE98E760D05399, 0xD03E790CC93A650A, + 0xAA478900B1228E31, 0xE8B768EB18C8B8A2, 0x2FA64AD7E2F6E317, + 0x6D56AB3C4B1CD584, 0xE374EF45BF6062EE, 0xA1840EAE168A547D, + 0x66952C92ECB40FC8, 0x2465CD79455E395B, 0x3821458AADA7578F, + 0x7AD1A461044D611C, 0xBDC0865DFE733AA9, 0xFF3067B657990C3A, + 0x711223CFA3E5BB50, 0x33E2C2240A0F8DC3, 0xF4F3E018F031D676, + 0xB60301F359DBE0E5, 0xDA050215EA6C212F, 0x98F5E3FE438617BC, + 0x5FE4C1C2B9B84C09, 0x1D14202910527A9A, 0x93366450E42ECDF0, + 0xD1C685BB4DC4FB63, 0x16D7A787B7FAA0D6, 0x5427466C1E109645, + 0x4863CE9FF6E9F891, 0x0A932F745F03CE02, 0xCD820D48A53D95B7, + 0x8F72ECA30CD7A324, 0x0150A8DAF8AB144E, 0x43A04931514122DD, + 0x84B16B0DAB7F7968, 0xC6418AE602954FFB, 0xBC387AEA7A8DA4C0, + 0xFEC89B01D3679253, 0x39D9B93D2959C9E6, 0x7B2958D680B3FF75, + 0xF50B1CAF74CF481F, 0xB7FBFD44DD257E8C, 0x70EADF78271B2539, + 0x321A3E938EF113AA, 0x2E5EB66066087D7E, 0x6CAE578BCFE24BED, + 0xABBF75B735DC1058, 0xE94F945C9C3626CB, 0x676DD025684A91A1, + 0x259D31CEC1A0A732, 0xE28C13F23B9EFC87, 0xA07CF2199274CA14, + 0x167FF3EACBAF2AF1, 0x548F120162451C62, 0x939E303D987B47D7, + 0xD16ED1D631917144, 0x5F4C95AFC5EDC62E, 0x1DBC74446C07F0BD, + 0xDAAD56789639AB08, 0x985DB7933FD39D9B, 0x84193F60D72AF34F, + 0xC6E9DE8B7EC0C5DC, 0x01F8FCB784FE9E69, 0x43081D5C2D14A8FA, + 0xCD2A5925D9681F90, 0x8FDAB8CE70822903, 0x48CB9AF28ABC72B6, + 0x0A3B7B1923564425, 0x70428B155B4EAF1E, 0x32B26AFEF2A4998D, + 0xF5A348C2089AC238, 0xB753A929A170F4AB, 0x3971ED50550C43C1, + 0x7B810CBBFCE67552, 0xBC902E8706D82EE7, 0xFE60CF6CAF321874, + 0xE224479F47CB76A0, 0xA0D4A674EE214033, 0x67C58448141F1B86, + 0x253565A3BDF52D15, 0xAB1721DA49899A7F, 0xE9E7C031E063ACEC, + 0x2EF6E20D1A5DF759, 0x6C0603E6B3B7C1CA, 0xF6FAE5C07D3274CD, + 0xB40A042BD4D8425E, 0x731B26172EE619EB, 0x31EBC7FC870C2F78, + 0xBFC9838573709812, 0xFD39626EDA9AAE81, 0x3A28405220A4F534, + 0x78D8A1B9894EC3A7, 0x649C294A61B7AD73, 0x266CC8A1C85D9BE0, + 0xE17DEA9D3263C055, 0xA38D0B769B89F6C6, 0x2DAF4F0F6FF541AC, + 0x6F5FAEE4C61F773F, 0xA84E8CD83C212C8A, 0xEABE6D3395CB1A19, + 0x90C79D3FEDD3F122, 0xD2377CD44439C7B1, 0x15265EE8BE079C04, + 0x57D6BF0317EDAA97, 0xD9F4FB7AE3911DFD, 0x9B041A914A7B2B6E, + 0x5C1538ADB04570DB, 0x1EE5D94619AF4648, 0x02A151B5F156289C, + 0x4051B05E58BC1E0F, 0x87409262A28245BA, 0xC5B073890B687329, + 0x4B9237F0FF14C443, 0x0962D61B56FEF2D0, 0xCE73F427ACC0A965, + 0x8C8315CC052A9FF6, 0x3A80143F5CF17F13, 0x7870F5D4F51B4980, + 0xBF61D7E80F251235, 0xFD913603A6CF24A6, 0x73B3727A52B393CC, + 0x31439391FB59A55F, 0xF652B1AD0167FEEA, 0xB4A25046A88DC879, + 0xA8E6D8B54074A6AD, 0xEA16395EE99E903E, 0x2D071B6213A0CB8B, + 0x6FF7FA89BA4AFD18, 0xE1D5BEF04E364A72, 0xA3255F1BE7DC7CE1, + 0x64347D271DE22754, 0x26C49CCCB40811C7, 0x5CBD6CC0CC10FAFC, + 0x1E4D8D2B65FACC6F, 0xD95CAF179FC497DA, 0x9BAC4EFC362EA149, + 0x158E0A85C2521623, 0x577EEB6E6BB820B0, 0x906FC95291867B05, + 0xD29F28B9386C4D96, 0xCEDBA04AD0952342, 0x8C2B41A1797F15D1, + 0x4B3A639D83414E64, 0x09CA82762AAB78F7, 0x87E8C60FDED7CF9D, + 0xC51827E4773DF90E, 0x020905D88D03A2BB, 0x40F9E43324E99428, + 0x2CFFE7D5975E55E2, 0x6E0F063E3EB46371, 0xA91E2402C48A38C4, + 0xEBEEC5E96D600E57, 0x65CC8190991CB93D, 0x273C607B30F68FAE, + 0xE02D4247CAC8D41B, 0xA2DDA3AC6322E288, 0xBE992B5F8BDB8C5C, + 0xFC69CAB42231BACF, 0x3B78E888D80FE17A, 0x7988096371E5D7E9, + 0xF7AA4D1A85996083, 0xB55AACF12C735610, 0x724B8ECDD64D0DA5, + 0x30BB6F267FA73B36, 0x4AC29F2A07BFD00D, 0x08327EC1AE55E69E, + 0xCF235CFD546BBD2B, 0x8DD3BD16FD818BB8, 0x03F1F96F09FD3CD2, + 0x41011884A0170A41, 0x86103AB85A2951F4, 0xC4E0DB53F3C36767, + 0xD8A453A01B3A09B3, 0x9A54B24BB2D03F20, 0x5D45907748EE6495, + 0x1FB5719CE1045206, 0x919735E51578E56C, 0xD367D40EBC92D3FF, + 0x1476F63246AC884A, 0x568617D9EF46BED9, 0xE085162AB69D5E3C, + 0xA275F7C11F7768AF, 0x6564D5FDE549331A, 0x279434164CA30589, + 0xA9B6706FB8DFB2E3, 0xEB46918411358470, 0x2C57B3B8EB0BDFC5, + 0x6EA7525342E1E956, 0x72E3DAA0AA188782, 0x30133B4B03F2B111, + 0xF7021977F9CCEAA4, 0xB5F2F89C5026DC37, 0x3BD0BCE5A45A6B5D, + 0x79205D0E0DB05DCE, 0xBE317F32F78E067B, 0xFCC19ED95E6430E8, + 0x86B86ED5267CDBD3, 0xC4488F3E8F96ED40, 0x0359AD0275A8B6F5, + 0x41A94CE9DC428066, 0xCF8B0890283E370C, 0x8D7BE97B81D4019F, + 0x4A6ACB477BEA5A2A, 0x089A2AACD2006CB9, 0x14DEA25F3AF9026D, + 0x562E43B4931334FE, 0x913F6188692D6F4B, 0xD3CF8063C0C759D8, + 0x5DEDC41A34BBEEB2, 0x1F1D25F19D51D821, 0xD80C07CD676F8394, + 0x9AFCE626CE85B507 +}; + +uint64_t crc64_update(uint64_t crc, const void *_data, size_t len) +{ + const unsigned char *data = _data; + + while (len--) { + int i = ((int) (crc >> 56) ^ *data++) & 0xFF; + crc = crc_table[i] ^ (crc << 8); + } + + return crc; +} +EXPORT_SYMBOL(crc64_update); + +uint64_t crc64(const void *data, size_t len) +{ + uint64_t crc = 0xffffffffffffffff; + + crc = crc64_update(crc, data, len); + + return crc ^ 0xffffffffffffffff; +} +EXPORT_SYMBOL(crc64); diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h new file mode 100644 index 000000000000..56705fdcc149 --- /dev/null +++ b/drivers/md/bcache/util.h @@ -0,0 +1,589 @@ + +#ifndef _BCACHE_UTIL_H +#define _BCACHE_UTIL_H + +#include +#include +#include +#include +#include +#include + +#include "closure.h" + +#define PAGE_SECTORS (PAGE_SIZE / 512) + +struct closure; + +#include + +#ifdef CONFIG_BCACHE_EDEBUG + +#define atomic_dec_bug(v) BUG_ON(atomic_dec_return(v) < 0) +#define atomic_inc_bug(v, i) BUG_ON(atomic_inc_return(v) <= i) + +#else /* EDEBUG */ + +#define atomic_dec_bug(v) atomic_dec(v) +#define atomic_inc_bug(v, i) atomic_inc(v) + +#endif + +#define BITMASK(name, type, field, offset, size) \ +static inline uint64_t name(const type *k) \ +{ return (k->field >> offset) & ~(((uint64_t) ~0) << size); } \ + \ +static inline void SET_##name(type *k, uint64_t v) \ +{ \ + k->field &= ~(~((uint64_t) ~0 << size) << offset); \ + k->field |= v << offset; \ +} + +#define DECLARE_HEAP(type, name) \ + struct { \ + size_t size, used; \ + type *data; \ + } name + +#define init_heap(heap, _size, gfp) \ +({ \ + size_t _bytes; \ + (heap)->used = 0; \ + (heap)->size = (_size); \ + _bytes = (heap)->size * sizeof(*(heap)->data); \ + (heap)->data = NULL; \ + if (_bytes < KMALLOC_MAX_SIZE) \ + (heap)->data = kmalloc(_bytes, (gfp)); \ + if ((!(heap)->data) && ((gfp) & GFP_KERNEL)) \ + (heap)->data = vmalloc(_bytes); \ + (heap)->data; \ +}) + +#define free_heap(heap) \ +do { \ + if (is_vmalloc_addr((heap)->data)) \ + vfree((heap)->data); \ + else \ + kfree((heap)->data); \ + (heap)->data = NULL; \ +} while (0) + +#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j]) + +#define heap_sift(h, i, cmp) \ +do { \ + size_t _r, _j = i; \ + \ + for (; _j * 2 + 1 < (h)->used; _j = _r) { \ + _r = _j * 2 + 1; \ + if (_r + 1 < (h)->used && \ + cmp((h)->data[_r], (h)->data[_r + 1])) \ + _r++; \ + \ + if (cmp((h)->data[_r], (h)->data[_j])) \ + break; \ + heap_swap(h, _r, _j); \ + } \ +} while (0) + +#define heap_sift_down(h, i, cmp) \ +do { \ + while (i) { \ + size_t p = (i - 1) / 2; \ + if (cmp((h)->data[i], (h)->data[p])) \ + break; \ + heap_swap(h, i, p); \ + i = p; \ + } \ +} while (0) + +#define heap_add(h, d, cmp) \ +({ \ + bool _r = !heap_full(h); \ + if (_r) { \ + size_t _i = (h)->used++; \ + (h)->data[_i] = d; \ + \ + heap_sift_down(h, _i, cmp); \ + heap_sift(h, _i, cmp); \ + } \ + _r; \ +}) + +#define heap_pop(h, d, cmp) \ +({ \ + bool _r = (h)->used; \ + if (_r) { \ + (d) = (h)->data[0]; \ + (h)->used--; \ + heap_swap(h, 0, (h)->used); \ + heap_sift(h, 0, cmp); \ + } \ + _r; \ +}) + +#define heap_peek(h) ((h)->size ? (h)->data[0] : NULL) + +#define heap_full(h) ((h)->used == (h)->size) + +#define DECLARE_FIFO(type, name) \ + struct { \ + size_t front, back, size, mask; \ + type *data; \ + } name + +#define fifo_for_each(c, fifo, iter) \ + for (iter = (fifo)->front; \ + c = (fifo)->data[iter], iter != (fifo)->back; \ + iter = (iter + 1) & (fifo)->mask) + +#define __init_fifo(fifo, gfp) \ +({ \ + size_t _allocated_size, _bytes; \ + BUG_ON(!(fifo)->size); \ + \ + _allocated_size = roundup_pow_of_two((fifo)->size + 1); \ + _bytes = _allocated_size * sizeof(*(fifo)->data); \ + \ + (fifo)->mask = _allocated_size - 1; \ + (fifo)->front = (fifo)->back = 0; \ + (fifo)->data = NULL; \ + \ + if (_bytes < KMALLOC_MAX_SIZE) \ + (fifo)->data = kmalloc(_bytes, (gfp)); \ + if ((!(fifo)->data) && ((gfp) & GFP_KERNEL)) \ + (fifo)->data = vmalloc(_bytes); \ + (fifo)->data; \ +}) + +#define init_fifo_exact(fifo, _size, gfp) \ +({ \ + (fifo)->size = (_size); \ + __init_fifo(fifo, gfp); \ +}) + +#define init_fifo(fifo, _size, gfp) \ +({ \ + (fifo)->size = (_size); \ + if ((fifo)->size > 4) \ + (fifo)->size = roundup_pow_of_two((fifo)->size) - 1; \ + __init_fifo(fifo, gfp); \ +}) + +#define free_fifo(fifo) \ +do { \ + if (is_vmalloc_addr((fifo)->data)) \ + vfree((fifo)->data); \ + else \ + kfree((fifo)->data); \ + (fifo)->data = NULL; \ +} while (0) + +#define fifo_used(fifo) (((fifo)->back - (fifo)->front) & (fifo)->mask) +#define fifo_free(fifo) ((fifo)->size - fifo_used(fifo)) + +#define fifo_empty(fifo) (!fifo_used(fifo)) +#define fifo_full(fifo) (!fifo_free(fifo)) + +#define fifo_front(fifo) ((fifo)->data[(fifo)->front]) +#define fifo_back(fifo) \ + ((fifo)->data[((fifo)->back - 1) & (fifo)->mask]) + +#define fifo_idx(fifo, p) (((p) - &fifo_front(fifo)) & (fifo)->mask) + +#define fifo_push_back(fifo, i) \ +({ \ + bool _r = !fifo_full((fifo)); \ + if (_r) { \ + (fifo)->data[(fifo)->back++] = (i); \ + (fifo)->back &= (fifo)->mask; \ + } \ + _r; \ +}) + +#define fifo_pop_front(fifo, i) \ +({ \ + bool _r = !fifo_empty((fifo)); \ + if (_r) { \ + (i) = (fifo)->data[(fifo)->front++]; \ + (fifo)->front &= (fifo)->mask; \ + } \ + _r; \ +}) + +#define fifo_push_front(fifo, i) \ +({ \ + bool _r = !fifo_full((fifo)); \ + if (_r) { \ + --(fifo)->front; \ + (fifo)->front &= (fifo)->mask; \ + (fifo)->data[(fifo)->front] = (i); \ + } \ + _r; \ +}) + +#define fifo_pop_back(fifo, i) \ +({ \ + bool _r = !fifo_empty((fifo)); \ + if (_r) { \ + --(fifo)->back; \ + (fifo)->back &= (fifo)->mask; \ + (i) = (fifo)->data[(fifo)->back] \ + } \ + _r; \ +}) + +#define fifo_push(fifo, i) fifo_push_back(fifo, (i)) +#define fifo_pop(fifo, i) fifo_pop_front(fifo, (i)) + +#define fifo_swap(l, r) \ +do { \ + swap((l)->front, (r)->front); \ + swap((l)->back, (r)->back); \ + swap((l)->size, (r)->size); \ + swap((l)->mask, (r)->mask); \ + swap((l)->data, (r)->data); \ +} while (0) + +#define fifo_move(dest, src) \ +do { \ + typeof(*((dest)->data)) _t; \ + while (!fifo_full(dest) && \ + fifo_pop(src, _t)) \ + fifo_push(dest, _t); \ +} while (0) + +/* + * Simple array based allocator - preallocates a number of elements and you can + * never allocate more than that, also has no locking. + * + * Handy because if you know you only need a fixed number of elements you don't + * have to worry about memory allocation failure, and sometimes a mempool isn't + * what you want. + * + * We treat the free elements as entries in a singly linked list, and the + * freelist as a stack - allocating and freeing push and pop off the freelist. + */ + +#define DECLARE_ARRAY_ALLOCATOR(type, name, size) \ + struct { \ + type *freelist; \ + type data[size]; \ + } name + +#define array_alloc(array) \ +({ \ + typeof((array)->freelist) _ret = (array)->freelist; \ + \ + if (_ret) \ + (array)->freelist = *((typeof((array)->freelist) *) _ret);\ + \ + _ret; \ +}) + +#define array_free(array, ptr) \ +do { \ + typeof((array)->freelist) _ptr = ptr; \ + \ + *((typeof((array)->freelist) *) _ptr) = (array)->freelist; \ + (array)->freelist = _ptr; \ +} while (0) + +#define array_allocator_init(array) \ +do { \ + typeof((array)->freelist) _i; \ + \ + BUILD_BUG_ON(sizeof((array)->data[0]) < sizeof(void *)); \ + (array)->freelist = NULL; \ + \ + for (_i = (array)->data; \ + _i < (array)->data + ARRAY_SIZE((array)->data); \ + _i++) \ + array_free(array, _i); \ +} while (0) + +#define array_freelist_empty(array) ((array)->freelist == NULL) + +#define ANYSINT_MAX(t) \ + ((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1) + +int strtoint_h(const char *, int *); +int strtouint_h(const char *, unsigned int *); +int strtoll_h(const char *, long long *); +int strtoull_h(const char *, unsigned long long *); + +static inline int strtol_h(const char *cp, long *res) +{ +#if BITS_PER_LONG == 32 + return strtoint_h(cp, (int *) res); +#else + return strtoll_h(cp, (long long *) res); +#endif +} + +static inline int strtoul_h(const char *cp, long *res) +{ +#if BITS_PER_LONG == 32 + return strtouint_h(cp, (unsigned int *) res); +#else + return strtoull_h(cp, (unsigned long long *) res); +#endif +} + +#define strtoi_h(cp, res) \ + (__builtin_types_compatible_p(typeof(*res), int) \ + ? strtoint_h(cp, (void *) res) \ + : __builtin_types_compatible_p(typeof(*res), long) \ + ? strtol_h(cp, (void *) res) \ + : __builtin_types_compatible_p(typeof(*res), long long) \ + ? strtoll_h(cp, (void *) res) \ + : __builtin_types_compatible_p(typeof(*res), unsigned int) \ + ? strtouint_h(cp, (void *) res) \ + : __builtin_types_compatible_p(typeof(*res), unsigned long) \ + ? strtoul_h(cp, (void *) res) \ + : __builtin_types_compatible_p(typeof(*res), unsigned long long)\ + ? strtoull_h(cp, (void *) res) : -EINVAL) + +#define strtoul_safe(cp, var) \ +({ \ + unsigned long _v; \ + int _r = kstrtoul(cp, 10, &_v); \ + if (!_r) \ + var = _v; \ + _r; \ +}) + +#define strtoul_safe_clamp(cp, var, min, max) \ +({ \ + unsigned long _v; \ + int _r = kstrtoul(cp, 10, &_v); \ + if (!_r) \ + var = clamp_t(typeof(var), _v, min, max); \ + _r; \ +}) + +#define snprint(buf, size, var) \ + snprintf(buf, size, \ + __builtin_types_compatible_p(typeof(var), int) \ + ? "%i\n" : \ + __builtin_types_compatible_p(typeof(var), unsigned) \ + ? "%u\n" : \ + __builtin_types_compatible_p(typeof(var), long) \ + ? "%li\n" : \ + __builtin_types_compatible_p(typeof(var), unsigned long)\ + ? "%lu\n" : \ + __builtin_types_compatible_p(typeof(var), int64_t) \ + ? "%lli\n" : \ + __builtin_types_compatible_p(typeof(var), uint64_t) \ + ? "%llu\n" : \ + __builtin_types_compatible_p(typeof(var), const char *) \ + ? "%s\n" : "%i\n", var) + +ssize_t hprint(char *buf, int64_t v); + +bool is_zero(const char *p, size_t n); +int parse_uuid(const char *s, char *uuid); + +ssize_t snprint_string_list(char *buf, size_t size, const char * const list[], + size_t selected); + +ssize_t read_string_list(const char *buf, const char * const list[]); + +struct time_stats { + /* + * all fields are in nanoseconds, averages are ewmas stored left shifted + * by 8 + */ + uint64_t max_duration; + uint64_t average_duration; + uint64_t average_frequency; + uint64_t last; +}; + +void time_stats_update(struct time_stats *stats, uint64_t time); + +#define NSEC_PER_ns 1L +#define NSEC_PER_us NSEC_PER_USEC +#define NSEC_PER_ms NSEC_PER_MSEC +#define NSEC_PER_sec NSEC_PER_SEC + +#define __print_time_stat(stats, name, stat, units) \ + sysfs_print(name ## _ ## stat ## _ ## units, \ + div_u64((stats)->stat >> 8, NSEC_PER_ ## units)) + +#define sysfs_print_time_stats(stats, name, \ + frequency_units, \ + duration_units) \ +do { \ + __print_time_stat(stats, name, \ + average_frequency, frequency_units); \ + __print_time_stat(stats, name, \ + average_duration, duration_units); \ + __print_time_stat(stats, name, \ + max_duration, duration_units); \ + \ + sysfs_print(name ## _last_ ## frequency_units, (stats)->last \ + ? div_s64(local_clock() - (stats)->last, \ + NSEC_PER_ ## frequency_units) \ + : -1LL); \ +} while (0) + +#define sysfs_time_stats_attribute(name, \ + frequency_units, \ + duration_units) \ +read_attribute(name ## _average_frequency_ ## frequency_units); \ +read_attribute(name ## _average_duration_ ## duration_units); \ +read_attribute(name ## _max_duration_ ## duration_units); \ +read_attribute(name ## _last_ ## frequency_units) + +#define sysfs_time_stats_attribute_list(name, \ + frequency_units, \ + duration_units) \ +&sysfs_ ## name ## _average_frequency_ ## frequency_units, \ +&sysfs_ ## name ## _average_duration_ ## duration_units, \ +&sysfs_ ## name ## _max_duration_ ## duration_units, \ +&sysfs_ ## name ## _last_ ## frequency_units, + +#define ewma_add(ewma, val, weight, factor) \ +({ \ + (ewma) *= (weight) - 1; \ + (ewma) += (val) << factor; \ + (ewma) /= (weight); \ + (ewma) >> factor; \ +}) + +struct ratelimit { + uint64_t next; + unsigned rate; +}; + +static inline void ratelimit_reset(struct ratelimit *d) +{ + d->next = local_clock(); +} + +unsigned next_delay(struct ratelimit *d, uint64_t done); + +#define __DIV_SAFE(n, d, zero) \ +({ \ + typeof(n) _n = (n); \ + typeof(d) _d = (d); \ + _d ? _n / _d : zero; \ +}) + +#define DIV_SAFE(n, d) __DIV_SAFE(n, d, 0) + +#define container_of_or_null(ptr, type, member) \ +({ \ + typeof(ptr) _ptr = ptr; \ + _ptr ? container_of(_ptr, type, member) : NULL; \ +}) + +#define RB_INSERT(root, new, member, cmp) \ +({ \ + __label__ dup; \ + struct rb_node **n = &(root)->rb_node, *parent = NULL; \ + typeof(new) this; \ + int res, ret = -1; \ + \ + while (*n) { \ + parent = *n; \ + this = container_of(*n, typeof(*(new)), member); \ + res = cmp(new, this); \ + if (!res) \ + goto dup; \ + n = res < 0 \ + ? &(*n)->rb_left \ + : &(*n)->rb_right; \ + } \ + \ + rb_link_node(&(new)->member, parent, n); \ + rb_insert_color(&(new)->member, root); \ + ret = 0; \ +dup: \ + ret; \ +}) + +#define RB_SEARCH(root, search, member, cmp) \ +({ \ + struct rb_node *n = (root)->rb_node; \ + typeof(&(search)) this, ret = NULL; \ + int res; \ + \ + while (n) { \ + this = container_of(n, typeof(search), member); \ + res = cmp(&(search), this); \ + if (!res) { \ + ret = this; \ + break; \ + } \ + n = res < 0 \ + ? n->rb_left \ + : n->rb_right; \ + } \ + ret; \ +}) + +#define RB_GREATER(root, search, member, cmp) \ +({ \ + struct rb_node *n = (root)->rb_node; \ + typeof(&(search)) this, ret = NULL; \ + int res; \ + \ + while (n) { \ + this = container_of(n, typeof(search), member); \ + res = cmp(&(search), this); \ + if (res < 0) { \ + ret = this; \ + n = n->rb_left; \ + } else \ + n = n->rb_right; \ + } \ + ret; \ +}) + +#define RB_FIRST(root, type, member) \ + container_of_or_null(rb_first(root), type, member) + +#define RB_LAST(root, type, member) \ + container_of_or_null(rb_last(root), type, member) + +#define RB_NEXT(ptr, member) \ + container_of_or_null(rb_next(&(ptr)->member), typeof(*ptr), member) + +#define RB_PREV(ptr, member) \ + container_of_or_null(rb_prev(&(ptr)->member), typeof(*ptr), member) + +/* Does linear interpolation between powers of two */ +static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) +{ + unsigned fract = x & ~(~0 << fract_bits); + + x >>= fract_bits; + x = 1 << x; + x += (x * fract) >> fract_bits; + + return x; +} + +#define bio_end(bio) ((bio)->bi_sector + bio_sectors(bio)) + +void bio_map(struct bio *bio, void *base); + +int bio_alloc_pages(struct bio *bio, gfp_t gfp); + +static inline sector_t bdev_sectors(struct block_device *bdev) +{ + return bdev->bd_inode->i_size >> 9; +} + +#define closure_bio_submit(bio, cl, dev) \ +do { \ + closure_get(cl); \ + bch_generic_make_request(bio, &(dev)->bio_split_hook); \ +} while (0) + +uint64_t crc64_update(uint64_t, const void *, size_t); +uint64_t crc64(const void *, size_t); + +#endif /* _BCACHE_UTIL_H */ diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c new file mode 100644 index 000000000000..a80ee5373fd8 --- /dev/null +++ b/drivers/md/bcache/writeback.c @@ -0,0 +1,414 @@ +/* + * background writeback - scan btree for dirty data and write it to the backing + * device + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" + +static struct workqueue_struct *dirty_wq; + +static void read_dirty(struct closure *); + +struct dirty_io { + struct closure cl; + struct cached_dev *dc; + struct bio bio; +}; + +/* Rate limiting */ + +static void __update_writeback_rate(struct cached_dev *dc) +{ + struct cache_set *c = dc->disk.c; + uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size; + uint64_t cache_dirty_target = + div_u64(cache_sectors * dc->writeback_percent, 100); + + int64_t target = div64_u64(cache_dirty_target * bdev_sectors(dc->bdev), + c->cached_dev_sectors); + + /* PD controller */ + + int change = 0; + int64_t error; + int64_t dirty = atomic_long_read(&dc->disk.sectors_dirty); + int64_t derivative = dirty - dc->disk.sectors_dirty_last; + + dc->disk.sectors_dirty_last = dirty; + + derivative *= dc->writeback_rate_d_term; + derivative = clamp(derivative, -dirty, dirty); + + derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative, + dc->writeback_rate_d_smooth, 0); + + /* Avoid divide by zero */ + if (!target) + goto out; + + error = div64_s64((dirty + derivative - target) << 8, target); + + change = div_s64((dc->writeback_rate.rate * error) >> 8, + dc->writeback_rate_p_term_inverse); + + /* Don't increase writeback rate if the device isn't keeping up */ + if (change > 0 && + time_after64(local_clock(), + dc->writeback_rate.next + 10 * NSEC_PER_MSEC)) + change = 0; + + dc->writeback_rate.rate = + clamp_t(int64_t, dc->writeback_rate.rate + change, + 1, NSEC_PER_MSEC); +out: + dc->writeback_rate_derivative = derivative; + dc->writeback_rate_change = change; + dc->writeback_rate_target = target; + + schedule_delayed_work(&dc->writeback_rate_update, + dc->writeback_rate_update_seconds * HZ); +} + +static void update_writeback_rate(struct work_struct *work) +{ + struct cached_dev *dc = container_of(to_delayed_work(work), + struct cached_dev, + writeback_rate_update); + + down_read(&dc->writeback_lock); + + if (atomic_read(&dc->has_dirty) && + dc->writeback_percent) + __update_writeback_rate(dc); + + up_read(&dc->writeback_lock); +} + +static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) +{ + if (atomic_read(&dc->disk.detaching) || + !dc->writeback_percent) + return 0; + + return next_delay(&dc->writeback_rate, sectors * 10000000ULL); +} + +/* Background writeback */ + +static bool dirty_pred(struct keybuf *buf, struct bkey *k) +{ + return KEY_DIRTY(k); +} + +static void dirty_init(struct keybuf_key *w) +{ + struct dirty_io *io = w->private; + struct bio *bio = &io->bio; + + bio_init(bio); + if (!io->dc->writeback_percent) + bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); + + bio->bi_size = KEY_SIZE(&w->key) << 9; + bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS); + bio->bi_private = w; + bio->bi_io_vec = bio->bi_inline_vecs; + bio_map(bio, NULL); +} + +static void refill_dirty(struct closure *cl) +{ + struct cached_dev *dc = container_of(cl, struct cached_dev, + writeback.cl); + struct keybuf *buf = &dc->writeback_keys; + bool searched_from_start = false; + struct bkey end = MAX_KEY; + SET_KEY_INODE(&end, dc->disk.id); + + if (!atomic_read(&dc->disk.detaching) && + !dc->writeback_running) + closure_return(cl); + + down_write(&dc->writeback_lock); + + if (!atomic_read(&dc->has_dirty)) { + SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN); + bch_write_bdev_super(dc, NULL); + + up_write(&dc->writeback_lock); + closure_return(cl); + } + + if (bkey_cmp(&buf->last_scanned, &end) >= 0) { + buf->last_scanned = KEY(dc->disk.id, 0, 0); + searched_from_start = true; + } + + bch_refill_keybuf(dc->disk.c, buf, &end); + + if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) { + /* Searched the entire btree - delay awhile */ + + if (RB_EMPTY_ROOT(&buf->keys)) { + atomic_set(&dc->has_dirty, 0); + cached_dev_put(dc); + } + + if (!atomic_read(&dc->disk.detaching)) + closure_delay(&dc->writeback, dc->writeback_delay * HZ); + } + + up_write(&dc->writeback_lock); + + ratelimit_reset(&dc->writeback_rate); + + /* Punt to workqueue only so we don't recurse and blow the stack */ + continue_at(cl, read_dirty, dirty_wq); +} + +void bch_writeback_queue(struct cached_dev *dc) +{ + if (closure_trylock(&dc->writeback.cl, &dc->disk.cl)) { + if (!atomic_read(&dc->disk.detaching)) + closure_delay(&dc->writeback, dc->writeback_delay * HZ); + + continue_at(&dc->writeback.cl, refill_dirty, dirty_wq); + } +} + +void bch_writeback_add(struct cached_dev *dc, unsigned sectors) +{ + atomic_long_add(sectors, &dc->disk.sectors_dirty); + + if (!atomic_read(&dc->has_dirty) && + !atomic_xchg(&dc->has_dirty, 1)) { + atomic_inc(&dc->count); + + if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) { + SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY); + /* XXX: should do this synchronously */ + bch_write_bdev_super(dc, NULL); + } + + bch_writeback_queue(dc); + + if (dc->writeback_percent) + schedule_delayed_work(&dc->writeback_rate_update, + dc->writeback_rate_update_seconds * HZ); + } +} + +/* Background writeback - IO loop */ + +static void dirty_io_destructor(struct closure *cl) +{ + struct dirty_io *io = container_of(cl, struct dirty_io, cl); + kfree(io); +} + +static void write_dirty_finish(struct closure *cl) +{ + struct dirty_io *io = container_of(cl, struct dirty_io, cl); + struct keybuf_key *w = io->bio.bi_private; + struct cached_dev *dc = io->dc; + struct bio_vec *bv = bio_iovec_idx(&io->bio, io->bio.bi_vcnt); + + while (bv-- != io->bio.bi_io_vec) + __free_page(bv->bv_page); + + /* This is kind of a dumb way of signalling errors. */ + if (KEY_DIRTY(&w->key)) { + unsigned i; + struct btree_op op; + bch_btree_op_init_stack(&op); + + op.type = BTREE_REPLACE; + bkey_copy(&op.replace, &w->key); + + SET_KEY_DIRTY(&w->key, false); + bch_keylist_add(&op.keys, &w->key); + + for (i = 0; i < KEY_PTRS(&w->key); i++) + atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin); + + pr_debug("clearing %s", pkey(&w->key)); + bch_btree_insert(&op, dc->disk.c); + closure_sync(&op.cl); + + atomic_long_inc(op.insert_collision + ? &dc->disk.c->writeback_keys_failed + : &dc->disk.c->writeback_keys_done); + } + + bch_keybuf_del(&dc->writeback_keys, w); + atomic_dec_bug(&dc->in_flight); + + closure_wake_up(&dc->writeback_wait); + + closure_return_with_destructor(cl, dirty_io_destructor); +} + +static void dirty_endio(struct bio *bio, int error) +{ + struct keybuf_key *w = bio->bi_private; + struct dirty_io *io = w->private; + + if (error) + SET_KEY_DIRTY(&w->key, false); + + closure_put(&io->cl); +} + +static void write_dirty(struct closure *cl) +{ + struct dirty_io *io = container_of(cl, struct dirty_io, cl); + struct keybuf_key *w = io->bio.bi_private; + + dirty_init(w); + io->bio.bi_rw = WRITE; + io->bio.bi_sector = KEY_START(&w->key); + io->bio.bi_bdev = io->dc->bdev; + io->bio.bi_end_io = dirty_endio; + + trace_bcache_write_dirty(&io->bio); + closure_bio_submit(&io->bio, cl, &io->dc->disk); + + continue_at(cl, write_dirty_finish, dirty_wq); +} + +static void read_dirty_endio(struct bio *bio, int error) +{ + struct keybuf_key *w = bio->bi_private; + struct dirty_io *io = w->private; + + bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0), + error, "reading dirty data from cache"); + + dirty_endio(bio, error); +} + +static void read_dirty_submit(struct closure *cl) +{ + struct dirty_io *io = container_of(cl, struct dirty_io, cl); + + trace_bcache_read_dirty(&io->bio); + closure_bio_submit(&io->bio, cl, &io->dc->disk); + + continue_at(cl, write_dirty, dirty_wq); +} + +static void read_dirty(struct closure *cl) +{ + struct cached_dev *dc = container_of(cl, struct cached_dev, + writeback.cl); + unsigned delay = writeback_delay(dc, 0); + struct keybuf_key *w; + struct dirty_io *io; + + /* + * XXX: if we error, background writeback just spins. Should use some + * mempools. + */ + + while (1) { + w = bch_keybuf_next(&dc->writeback_keys); + if (!w) + break; + + BUG_ON(ptr_stale(dc->disk.c, &w->key, 0)); + + if (delay > 0 && + (KEY_START(&w->key) != dc->last_read || + jiffies_to_msecs(delay) > 50)) { + w->private = NULL; + + closure_delay(&dc->writeback, delay); + continue_at(cl, read_dirty, dirty_wq); + } + + dc->last_read = KEY_OFFSET(&w->key); + + io = kzalloc(sizeof(struct dirty_io) + sizeof(struct bio_vec) + * DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), + GFP_KERNEL); + if (!io) + goto err; + + w->private = io; + io->dc = dc; + + dirty_init(w); + io->bio.bi_sector = PTR_OFFSET(&w->key, 0); + io->bio.bi_bdev = PTR_CACHE(dc->disk.c, + &w->key, 0)->bdev; + io->bio.bi_rw = READ; + io->bio.bi_end_io = read_dirty_endio; + + if (bio_alloc_pages(&io->bio, GFP_KERNEL)) + goto err_free; + + pr_debug("%s", pkey(&w->key)); + + closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); + + delay = writeback_delay(dc, KEY_SIZE(&w->key)); + + atomic_inc(&dc->in_flight); + + if (!closure_wait_event(&dc->writeback_wait, cl, + atomic_read(&dc->in_flight) < 64)) + continue_at(cl, read_dirty, dirty_wq); + } + + if (0) { +err_free: + kfree(w->private); +err: + bch_keybuf_del(&dc->writeback_keys, w); + } + + refill_dirty(cl); +} + +void bch_writeback_init_cached_dev(struct cached_dev *dc) +{ + closure_init_unlocked(&dc->writeback); + init_rwsem(&dc->writeback_lock); + + bch_keybuf_init(&dc->writeback_keys, dirty_pred); + + dc->writeback_metadata = true; + dc->writeback_running = true; + dc->writeback_percent = 10; + dc->writeback_delay = 30; + dc->writeback_rate.rate = 1024; + + dc->writeback_rate_update_seconds = 30; + dc->writeback_rate_d_term = 16; + dc->writeback_rate_p_term_inverse = 64; + dc->writeback_rate_d_smooth = 8; + + INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate); + schedule_delayed_work(&dc->writeback_rate_update, + dc->writeback_rate_update_seconds * HZ); +} + +void bch_writeback_exit(void) +{ + if (dirty_wq) + destroy_workqueue(dirty_wq); +} + +int __init bch_writeback_init(void) +{ + dirty_wq = create_singlethread_workqueue("bcache_writeback"); + if (!dirty_wq) + return -ENOMEM; + + return 0; +} diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index f204a7a9cf38..6e7ec64b69ab 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -78,3 +78,9 @@ SUBSYS(hugetlb) #endif /* */ + +#ifdef CONFIG_CGROUP_BCACHE +SUBSYS(bcache) +#endif + +/* */ diff --git a/include/linux/sched.h b/include/linux/sched.h index d35d2b6ddbfb..a8482d063bc3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1576,6 +1576,10 @@ struct task_struct { #ifdef CONFIG_UPROBES struct uprobe_task *utask; #endif +#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) + unsigned int sequential_io; + unsigned int sequential_io_avg; +#endif }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h new file mode 100644 index 000000000000..3cc5a0b278c3 --- /dev/null +++ b/include/trace/events/bcache.h @@ -0,0 +1,271 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bcache + +#if !defined(_TRACE_BCACHE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BCACHE_H + +#include + +struct search; + +DECLARE_EVENT_CLASS(bcache_request, + + TP_PROTO(struct search *s, struct bio *bio), + + TP_ARGS(s, bio), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(unsigned int, orig_major ) + __field(unsigned int, orig_minor ) + __field(sector_t, sector ) + __field(dev_t, orig_sector ) + __field(unsigned int, nr_sector ) + __array(char, rwbs, 6 ) + __array(char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->orig_major = s->d->disk->major; + __entry->orig_minor = s->d->disk->first_minor; + __entry->sector = bio->bi_sector; + __entry->orig_sector = bio->bi_sector - 16; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d @ %llu)", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm, + __entry->orig_major, __entry->orig_minor, + (unsigned long long)__entry->orig_sector) +); + +DEFINE_EVENT(bcache_request, bcache_request_start, + + TP_PROTO(struct search *s, struct bio *bio), + + TP_ARGS(s, bio) +); + +DEFINE_EVENT(bcache_request, bcache_request_end, + + TP_PROTO(struct search *s, struct bio *bio), + + TP_ARGS(s, bio) +); + +DECLARE_EVENT_CLASS(bcache_bio, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(sector_t, sector ) + __field(unsigned int, nr_sector ) + __array(char, rwbs, 6 ) + __array(char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + + +DEFINE_EVENT(bcache_bio, bcache_passthrough, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_cache_hit, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_cache_miss, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_read_retry, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_writethrough, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_writeback, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_write_skip, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_btree_read, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_btree_write, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_write_dirty, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_read_dirty, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_write_moving, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_read_moving, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DEFINE_EVENT(bcache_bio, bcache_journal_write, + + TP_PROTO(struct bio *bio), + + TP_ARGS(bio) +); + +DECLARE_EVENT_CLASS(bcache_cache_bio, + + TP_PROTO(struct bio *bio, + sector_t orig_sector, + struct block_device* orig_bdev), + + TP_ARGS(bio, orig_sector, orig_bdev), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(dev_t, orig_dev ) + __field(sector_t, sector ) + __field(sector_t, orig_sector ) + __field(unsigned int, nr_sector ) + __array(char, rwbs, 6 ) + __array(char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->orig_dev = orig_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->orig_sector = orig_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d %llu)", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm, + MAJOR(__entry->orig_dev), MINOR(__entry->orig_dev), + (unsigned long long)__entry->orig_sector) +); + +DEFINE_EVENT(bcache_cache_bio, bcache_cache_insert, + + TP_PROTO(struct bio *bio, + sector_t orig_sector, + struct block_device *orig_bdev), + + TP_ARGS(bio, orig_sector, orig_bdev) +); + +DECLARE_EVENT_CLASS(bcache_gc, + + TP_PROTO(uint8_t *uuid), + + TP_ARGS(uuid), + + TP_STRUCT__entry( + __field(uint8_t *, uuid) + ), + + TP_fast_assign( + __entry->uuid = uuid; + ), + + TP_printk("%pU", __entry->uuid) +); + + +DEFINE_EVENT(bcache_gc, bcache_gc_start, + + TP_PROTO(uint8_t *uuid), + + TP_ARGS(uuid) +); + +DEFINE_EVENT(bcache_gc, bcache_gc_end, + + TP_PROTO(uint8_t *uuid), + + TP_ARGS(uuid) +); + +#endif /* _TRACE_BCACHE_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/fork.c b/kernel/fork.c index 1766d324d5e3..7b54fb62332c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1303,6 +1303,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif +#ifdef CONFIG_BCACHE + p->sequential_io = 0; + p->sequential_io_avg = 0; +#endif /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p); -- GitLab From 8b7719e08a42079d333f902fdbf5823ea9636d65 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 25 Mar 2013 15:41:55 +0100 Subject: [PATCH 0186/3163] EDAC, mc_sysfs.c: Fix string array pointer types Those should be const ptr to a const string, fix them. Signed-off-by: Borislav Petkov --- drivers/edac/edac_mc_sysfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 5899a76eec3b..f6e81b285b4a 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -87,7 +87,7 @@ static struct device *mci_pdev; /* * various constants for Memory Controllers */ -static const char *mem_types[] = { +static const char * const mem_types[] = { [MEM_EMPTY] = "Empty", [MEM_RESERVED] = "Reserved", [MEM_UNKNOWN] = "Unknown", @@ -107,7 +107,7 @@ static const char *mem_types[] = { [MEM_RDDR3] = "Registered-DDR3" }; -static const char *dev_types[] = { +static const char * const dev_types[] = { [DEV_UNKNOWN] = "Unknown", [DEV_X1] = "x1", [DEV_X2] = "x2", @@ -118,7 +118,7 @@ static const char *dev_types[] = { [DEV_X64] = "x64" }; -static const char *edac_caps[] = { +static const char * const edac_caps[] = { [EDAC_UNKNOWN] = "Unknown", [EDAC_NONE] = "None", [EDAC_RESERVED] = "Reserved", -- GitLab From 07e86ccb543bb1e748f32d6f0f18913d3f58d988 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 25 Mar 2013 11:46:43 -0700 Subject: [PATCH 0187/3163] bcache: Build fixes from test robot config: make ARCH=i386 allmodconfig All error/warnings: drivers/md/bcache/bset.c: In function 'bch_ptr_bad': >> drivers/md/bcache/bset.c:164:2: warning: format '%li' expects argument of type 'long int', but argument 4 has type 'size_t' [-Wformat] -- drivers/md/bcache/debug.c: In function 'bch_pbtree': >> drivers/md/bcache/debug.c:86:4: warning: format '%li' expects argument of type 'long int', but argument 4 has type 'size_t' [-Wformat] -- drivers/md/bcache/btree.c: In function 'bch_btree_read_done': >> drivers/md/bcache/btree.c:245:8: warning: format '%lu' expects argument of type 'long unsigned int', but argument 4 has type 'size_t' [-Wformat] -- drivers/md/bcache/closure.o: In function `closure_debug_init': >> (.init.text+0x0): multiple definition of `init_module' >> drivers/md/bcache/super.o:super.c:(.init.text+0x0): first defined here Signed-off-by: Kent Overstreet Cc: Fengguang Wu Cc: linux-bcache@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/bset.c | 2 +- drivers/md/bcache/btree.c | 2 +- drivers/md/bcache/closure.c | 4 +--- drivers/md/bcache/closure.h | 2 ++ drivers/md/bcache/super.c | 1 + 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index bb0f7ae14b3c..79fe1f0190f7 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -161,7 +161,7 @@ bool bch_ptr_bad(struct btree *b, const struct bkey *k) #ifdef CONFIG_BCACHE_EDEBUG bug: mutex_unlock(&b->c->bucket_lock); - btree_bug(b, "inconsistent pointer %s: bucket %li pin %i " + btree_bug(b, "inconsistent pointer %s: bucket %zu pin %i " "prio %i gen %i last_gc %i mark %llu gc_gen %i", pkey(k), PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index e7bc917ef0d7..af9ea4a96330 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -240,7 +240,7 @@ void bch_btree_read_done(struct closure *cl) closure_return(cl); err: set_btree_node_io_error(b); - bch_cache_set_error(b->c, "%s at bucket %lu, block %zu, %u keys", + bch_cache_set_error(b->c, "%s at bucket %zu, block %zu, %u keys", err, PTR_BUCKET_NR(b->c, &b->key, 0), index(i, b), i->keys); goto out; diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index d6fbec0f8484..f54f7c9981d4 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -334,14 +334,12 @@ static const struct file_operations debug_ops = { .release = single_release }; -int __init closure_debug_init(void) +void __init closure_debug_init(void) { debug = debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops); return 0; } -module_init(closure_debug_init); - #endif MODULE_AUTHOR("Kent Overstreet "); diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index 3f31d599ea56..00039924ea9d 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -313,11 +313,13 @@ void __closure_flush_sync(struct closure *cl, struct timer_list *timer); #ifdef CONFIG_BCACHE_CLOSURES_DEBUG +void closure_debug_init(void); void closure_debug_create(struct closure *cl); void closure_debug_destroy(struct closure *cl); #else +static inline void closure_debug_init(void) {} static inline void closure_debug_create(struct closure *cl) {} static inline void closure_debug_destroy(struct closure *cl) {} diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 31ef47f1f3b6..7b8efc770871 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1917,6 +1917,7 @@ static int __init bcache_init(void) mutex_init(&bch_register_lock); init_waitqueue_head(&unregister_wait); register_reboot_notifier(&reboot); + closure_debug_init(); bcache_major = register_blkdev(0, "bcache"); if (bcache_major < 0) -- GitLab From b1a67b0f4c747ca10c96ebb24f04e2a74b3c298d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 25 Mar 2013 11:46:44 -0700 Subject: [PATCH 0188/3163] bcache: Style/checkpatch fixes Took out some nested functions, and fixed some more checkpatch complaints. Signed-off-by: Kent Overstreet Cc: linux-bcache@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/alloc.c | 22 ++++++---------------- drivers/md/bcache/bcache.h | 10 +++++----- drivers/md/bcache/bset.c | 9 +++++---- drivers/md/bcache/btree.c | 4 ++-- drivers/md/bcache/debug.c | 8 ++++---- drivers/md/bcache/journal.c | 8 ++++---- drivers/md/bcache/movinggc.c | 24 ++++++++++++------------ drivers/md/bcache/request.c | 12 +++++++----- drivers/md/bcache/stats.c | 3 ++- drivers/md/bcache/super.c | 7 ++++--- 10 files changed, 51 insertions(+), 56 deletions(-) diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index ed18115e078e..2879487d036a 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -229,24 +229,14 @@ static void invalidate_one_bucket(struct cache *ca, struct bucket *b) fifo_push(&ca->free_inc, b - ca->buckets); } -static void invalidate_buckets_lru(struct cache *ca) -{ - unsigned bucket_prio(struct bucket *b) - { - return ((unsigned) (b->prio - ca->set->min_prio)) * - GC_SECTORS_USED(b); - } - - bool bucket_max_cmp(struct bucket *l, struct bucket *r) - { - return bucket_prio(l) < bucket_prio(r); - } +#define bucket_prio(b) \ + (((unsigned) (b->prio - ca->set->min_prio)) * GC_SECTORS_USED(b)) - bool bucket_min_cmp(struct bucket *l, struct bucket *r) - { - return bucket_prio(l) > bucket_prio(r); - } +#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r)) +#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r)) +static void invalidate_buckets_lru(struct cache *ca) +{ struct bucket *b; ssize_t i; diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d01a553f63f3..b2846e70149b 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -644,8 +644,8 @@ struct gc_stat { * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e. * flushing dirty data). * - * CACHE_SET_STOPPING_2 gets set at the last phase, when it's time to shut down the - * allocation thread. + * CACHE_SET_STOPPING_2 gets set at the last phase, when it's time to shut down + * the allocation thread. */ #define CACHE_SET_UNREGISTERING 0 #define CACHE_SET_STOPPING 1 @@ -1012,11 +1012,11 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c, * searches - it told you where a key started. It's not used anymore, * and can probably be safely dropped. */ -#define KEY(dev, sector, len) (struct bkey) \ -{ \ +#define KEY(dev, sector, len) \ +((struct bkey) { \ .high = (1ULL << 63) | ((uint64_t) (len) << 20) | (dev), \ .low = (sector) \ -} +}) static inline void bkey_init(struct bkey *k) { diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 79fe1f0190f7..4dc9cb4efacb 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -161,9 +161,9 @@ bool bch_ptr_bad(struct btree *b, const struct bkey *k) #ifdef CONFIG_BCACHE_EDEBUG bug: mutex_unlock(&b->c->bucket_lock); - btree_bug(b, "inconsistent pointer %s: bucket %zu pin %i " - "prio %i gen %i last_gc %i mark %llu gc_gen %i", pkey(k), - PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), + btree_bug(b, +"inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i", + pkey(k), PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); return true; #endif @@ -1049,7 +1049,8 @@ void bch_btree_sort_partial(struct btree *b, unsigned start) for (i = start; i <= b->nsets; i++) keys += b->sets[i].data->keys; - order = roundup_pow_of_two(__set_bytes(b->sets->data, keys)) / PAGE_SIZE; + order = roundup_pow_of_two(__set_bytes(b->sets->data, + keys)) / PAGE_SIZE; if (order) order = ilog2(order); } diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index af9ea4a96330..24b678059091 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1021,8 +1021,8 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level, goto err_free; if (!b) { - cache_bug(c, "Tried to allocate bucket" - " that was in btree cache"); + cache_bug(c, + "Tried to allocate bucket that was in btree cache"); __bkey_put(c, &k.key); goto retry; } diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 4b37ef2b80e5..141a5cac11ad 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -217,8 +217,8 @@ void bch_data_verify(struct search *s) if (memcmp(p1 + bv->bv_offset, p2 + bv->bv_offset, bv->bv_len)) - printk(KERN_ERR "bcache (%s): verify failed" - " at sector %llu\n", + printk(KERN_ERR + "bcache (%s): verify failed at sector %llu\n", bdevname(dc->bdev, name), (uint64_t) s->orig_bio->bi_sector); @@ -525,8 +525,8 @@ static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a, k = bkey_next(k), l = bkey_next(l)) if (bkey_cmp(k, l) || KEY_SIZE(k) != KEY_SIZE(l)) - pr_err("key %zi differs: %s " - "!= %s", (uint64_t *) k - i->d, + pr_err("key %zi differs: %s != %s", + (uint64_t *) k - i->d, pkey(k), pkey(l)); for (j = 0; j < 3; j++) { diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index c871ffaabbb0..21fd1010cf5d 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -293,9 +293,9 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list, BUG_ON(i->pin && atomic_read(i->pin) != 1); if (n != i->j.seq) - pr_err("journal entries %llu-%llu " - "missing! (replaying %llu-%llu)\n", - n, i->j.seq - 1, start, end); + pr_err( + "journal entries %llu-%llu missing! (replaying %llu-%llu)\n", + n, i->j.seq - 1, start, end); for (k = i->j.start; k < end(&i->j); @@ -439,7 +439,7 @@ static void do_journal_discard(struct cache *ca) bio_init(bio); bio->bi_sector = bucket_to_sector(ca->set, - ca->sb.d[ja->discard_idx]); + ca->sb.d[ja->discard_idx]); bio->bi_bdev = ca->bdev; bio->bi_rw = REQ_WRITE|REQ_DISCARD; bio->bi_max_vecs = 1; diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index c69fc92b02cf..e3ec0a550b00 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -183,6 +183,16 @@ err: if (!IS_ERR_OR_NULL(w->private)) closure_return(cl); } +static bool bucket_cmp(struct bucket *l, struct bucket *r) +{ + return GC_SECTORS_USED(l) < GC_SECTORS_USED(r); +} + +static unsigned bucket_heap_top(struct cache *ca) +{ + return GC_SECTORS_USED(heap_peek(&ca->heap)); +} + void bch_moving_gc(struct closure *cl) { struct cache_set *c = container_of(cl, struct cache_set, gc.cl); @@ -190,16 +200,6 @@ void bch_moving_gc(struct closure *cl) struct bucket *b; unsigned i; - bool bucket_cmp(struct bucket *l, struct bucket *r) - { - return GC_SECTORS_USED(l) < GC_SECTORS_USED(r); - } - - unsigned top(struct cache *ca) - { - return GC_SECTORS_USED(heap_peek(&ca->heap)); - } - if (!c->copy_gc_enabled) closure_return(cl); @@ -220,7 +220,7 @@ void bch_moving_gc(struct closure *cl) sectors_to_move += GC_SECTORS_USED(b); heap_add(&ca->heap, b, bucket_cmp); } else if (bucket_cmp(b, heap_peek(&ca->heap))) { - sectors_to_move -= top(ca); + sectors_to_move -= bucket_heap_top(ca); sectors_to_move += GC_SECTORS_USED(b); ca->heap.data[0] = b; @@ -233,7 +233,7 @@ void bch_moving_gc(struct closure *cl) sectors_to_move -= GC_SECTORS_USED(b); } - ca->gc_move_threshold = top(ca); + ca->gc_move_threshold = bucket_heap_top(ca); pr_debug("threshold %u", ca->gc_move_threshold); } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 4f552de49aaa..dbda9646ef38 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1117,11 +1117,13 @@ static void add_sequential(struct task_struct *t) t->sequential_io = 0; } -static void check_should_skip(struct cached_dev *dc, struct search *s) +static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k) { - struct hlist_head *iohash(uint64_t k) - { return &dc->io_hash[hash_64(k, RECENT_IO_BITS)]; } + return &dc->io_hash[hash_64(k, RECENT_IO_BITS)]; +} +static void check_should_skip(struct cached_dev *dc, struct search *s) +{ struct cache_set *c = s->op.c; struct bio *bio = &s->bio.bio; @@ -1162,7 +1164,7 @@ static void check_should_skip(struct cached_dev *dc, struct search *s) spin_lock(&dc->io_lock); - hlist_for_each_entry(i, iohash(bio->bi_sector), hash) + hlist_for_each_entry(i, iohash(dc, bio->bi_sector), hash) if (i->last == bio->bi_sector && time_before(jiffies, i->jiffies)) goto found; @@ -1180,7 +1182,7 @@ static void check_should_skip(struct cached_dev *dc, struct search *s) s->task->sequential_io = i->sequential; hlist_del(&i->hash); - hlist_add_head(&i->hash, iohash(i->last)); + hlist_add_head(&i->hash, iohash(dc, i->last)); list_move_tail(&i->lru, &dc->io_lru); spin_unlock(&dc->io_lock); diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c index bf6cf9518c89..64e679449c2a 100644 --- a/drivers/md/bcache/stats.c +++ b/drivers/md/bcache/stats.c @@ -95,7 +95,8 @@ static KTYPE(bch_stats); static void scale_accounting(unsigned long data); -void bch_cache_accounting_init(struct cache_accounting *acc, struct closure *parent) +void bch_cache_accounting_init(struct cache_accounting *acc, + struct closure *parent) { kobject_init(&acc->total.kobj, &bch_stats_ktype); kobject_init(&acc->five_minute.kobj, &bch_stats_ktype); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 7b8efc770871..484ae6c8f43a 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -526,7 +526,8 @@ void bch_prio_write(struct cache *ca) for (i = prio_buckets(ca) - 1; i >= 0; --i) { long bucket; struct prio_set *p = ca->disk_buckets; - struct bucket_disk *d = p->data, *end = d + prios_per_bucket(ca); + struct bucket_disk *d = p->data; + struct bucket_disk *end = d + prios_per_bucket(ca); for (b = ca->buckets + i * prios_per_bucket(ca); b < ca->buckets + ca->sb.nbuckets && d < end; @@ -865,8 +866,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) if (dc->sb.block_size < c->sb.block_size) { /* Will die */ - pr_err("Couldn't attach %s: block size " - "less than set's block size", buf); + pr_err("Couldn't attach %s: block size less than set's block size", + buf); return -EINVAL; } -- GitLab From f1fb3449efd5c49b48e35746bc7283eb9c73e3a0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 25 Mar 2013 13:46:14 -0600 Subject: [PATCH 0189/3163] aoe: Fix unitialized var usage Commit 4f2ac93c175c4922bdddbfec6cad94b32cea0070 (block: Remove bi_idx references) accidently removed the bit that set bv - readd that. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: "Ed L. Cashin" Cc: fengguang.wu@intel.com Signed-off-by: Jens Axboe --- drivers/block/aoe/aoecmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 8188ad131b97..af96ca171238 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -927,7 +927,7 @@ bufinit(struct buf *buf, struct request *rq, struct bio *bio) buf->resid = bio->bi_size; buf->sector = bio->bi_sector; bio_pageinc(bio); - buf->bv = bio_iovec(bio); + buf->bv = bv = bio_iovec(bio); buf->bv_resid = bv->bv_len; WARN_ON(buf->bv_resid == 0); } -- GitLab From 2124469efa6079e6c325165fb1926159356b15c3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 25 Mar 2013 15:27:26 -0600 Subject: [PATCH 0190/3163] aoe: get rid of cached bv variable in bufinit() Less error prone if we just kill it, it's only used once anyway. Signed-off-by: Jens Axboe --- drivers/block/aoe/aoecmd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 25ef5c014fca..67d216c716da 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -919,16 +919,14 @@ bio_pagedec(struct bio *bio) static void bufinit(struct buf *buf, struct request *rq, struct bio *bio) { - struct bio_vec *bv; - memset(buf, 0, sizeof(*buf)); buf->rq = rq; buf->bio = bio; buf->resid = bio->bi_size; buf->sector = bio->bi_sector; bio_pageinc(bio); - buf->bv = bv = &bio->bi_io_vec[bio->bi_idx]; - buf->bv_resid = bv->bv_len; + buf->bv = &bio->bi_io_vec[bio->bi_idx]; + buf->bv_resid = buf->bv->bv_len; WARN_ON(buf->bv_resid == 0); } -- GitLab From 29177b896676496dbefa27bd35f19c64e47cf0d3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 25 Mar 2013 19:36:39 -0600 Subject: [PATCH 0191/3163] bcache: Fix for the build fixes Commit 82a84eaf7e51ba3da0c36cbc401034a4e943492d left a return 0 in closure_debug_init(). Whoops. Signed-off-by: Kent Overstreet Signed-off-by: Jens Axboe --- drivers/md/bcache/closure.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index f54f7c9981d4..bd05a9a8c7cf 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -337,7 +337,6 @@ static const struct file_operations debug_ops = { void __init closure_debug_init(void) { debug = debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops); - return 0; } #endif -- GitLab From fc35b35cbe24ef021ea9acfba21e54da958df747 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 8 Feb 2013 13:09:32 +0800 Subject: [PATCH 0192/3163] Thermal: cpufreq cooling: fix parsing per_cpu cpufreq_frequency_table cpufreq cooling uses different frequencies as different cooling states. But the per_cpu cpufreq_frequency_table may contain duplicate, invalid entries, and it may be in either ascending or descending order. And currently, code for parsing the per_cpu cpufreq_frequency_table is used in several places and inconsistent. Now introduce new code to 1. get the maximum cooling states 2. translate cooling state to cpu frequency 3. translate cpu frequency to cooling state in one place, with the correct logic of handling per_cpu cpufreq_frequency_table. Signed-off-by: Zhang Rui Tested-by: Amit Daniel kachhap --- drivers/thermal/cpu_cooling.c | 143 ++++++++++++++++++++++------------ 1 file changed, 93 insertions(+), 50 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 8dc44cbb3e09..9e208d300647 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -108,54 +108,109 @@ static int is_cpufreq_valid(int cpu) return !cpufreq_get_policy(&policy, cpu); } -/** - * get_cpu_frequency - get the absolute value of frequency from level. - * @cpu: cpu for which frequency is fetched. - * @level: level of frequency, equals cooling state of cpu cooling device - * e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc - */ -static unsigned int get_cpu_frequency(unsigned int cpu, unsigned long level) +enum cpufreq_cooling_property { + GET_LEVEL, + GET_FREQ, + GET_MAXL, +}; + +/* + * this is the common function to + * 1. get maximum cpu cooling states + * 2. translate frequency to cooling state + * 3. translate cooling state to frequency + * Note that the code may be not in good shape + * but it is written in this way in order to: + * a) reduce duplicate code as most of the code can be shared. + * b) make sure the logic is consistent when translating between + * cooling states and frequencies. +*/ +static int get_property(unsigned int cpu, unsigned long input, + unsigned int* output, enum cpufreq_cooling_property property) { - int ret = 0, i = 0; - unsigned long level_index; - bool descend = false; + int i, j; + unsigned long max_level = 0, level; + unsigned int freq = CPUFREQ_ENTRY_INVALID; + int descend = -1; struct cpufreq_frequency_table *table = cpufreq_frequency_get_table(cpu); + + if (!output) + return -EINVAL; + if (!table) - return ret; + return -EINVAL; - while (table[i].frequency != CPUFREQ_TABLE_END) { + + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + /* ignore invalid entries */ if (table[i].frequency == CPUFREQ_ENTRY_INVALID) continue; - /*check if table in ascending or descending order*/ - if ((table[i + 1].frequency != CPUFREQ_TABLE_END) && - (table[i + 1].frequency < table[i].frequency) - && !descend) { - descend = true; - } + /* ignore duplicate entry */ + if (freq == table[i].frequency) + continue; + + /* get the frequency order */ + if (freq != CPUFREQ_ENTRY_INVALID && descend != -1) + descend = !!(freq > table[i].frequency); - /*return if level matched and table in descending order*/ - if (descend && i == level) - return table[i].frequency; - i++; + freq = table[i].frequency; + max_level++; } - i--; - if (level > i || descend) - return ret; - level_index = i - level; + /* get max level */ + if (property == GET_MAXL) { + *output = (unsigned int)max_level; + return 0; + } - /*Scan the table in reverse order and match the level*/ - while (i >= 0) { + if (property == GET_FREQ) + level = descend ? input : (max_level - input -1); + + + for (i = 0, j = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + /* ignore invalid entry */ if (table[i].frequency == CPUFREQ_ENTRY_INVALID) continue; - /*return if level matched*/ - if (i == level_index) - return table[i].frequency; - i--; + + /* ignore duplicate entry */ + if (freq == table[i].frequency) + continue; + + /* now we have a valid frequency entry */ + freq = table[i].frequency; + + if (property == GET_LEVEL && (unsigned int)input == freq) { + /* get level by frequency */ + *output = descend ? j : (max_level - j - 1); + return 0; + } + if (property == GET_FREQ && level == j) { + /* get frequency by level */ + *output = freq; + return 0; + } + j++; } - return ret; + return -EINVAL; +} + +/** + * get_cpu_frequency - get the absolute value of frequency from level. + * @cpu: cpu for which frequency is fetched. + * @level: level of frequency, equals cooling state of cpu cooling device + * e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc + */ +static unsigned int get_cpu_frequency(unsigned int cpu, unsigned long level) +{ + int ret = 0; + unsigned int freq; + + ret = get_property(cpu, level, &freq, GET_FREQ); + if (ret) + return 0; + return freq; } /** @@ -237,29 +292,17 @@ static int cpufreq_get_max_state(struct thermal_cooling_device *cdev, struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; struct cpumask *maskPtr = &cpufreq_device->allowed_cpus; unsigned int cpu; - struct cpufreq_frequency_table *table; unsigned long count = 0; - int i = 0; + int ret; cpu = cpumask_any(maskPtr); - table = cpufreq_frequency_get_table(cpu); - if (!table) { - *state = 0; - return 0; - } - for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { - if (table[i].frequency == CPUFREQ_ENTRY_INVALID) - continue; - count++; - } + ret = get_property(cpu, 0, (unsigned int *)&count, GET_MAXL); - if (count > 0) { - *state = --count; - return 0; - } + if (count > 0) + *state = count; - return -EINVAL; + return ret; } /** -- GitLab From 57df8106932b57427df1eaaa13871857f75b1194 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 8 Feb 2013 14:52:06 +0800 Subject: [PATCH 0193/3163] Thermal: exynos: fix cooling state translation Signed-off-by: Zhang Rui Tested-by: Amit Daniel kachhap --- drivers/thermal/cpu_cooling.c | 11 +++++++++++ drivers/thermal/exynos_thermal.c | 24 ++---------------------- include/linux/cpu_cooling.h | 7 +++++++ include/linux/thermal.h | 5 ++++- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 9e208d300647..e03891b03c9b 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -196,6 +196,17 @@ static int get_property(unsigned int cpu, unsigned long input, return -EINVAL; } +unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq) +{ + unsigned int val; + + if (get_property(cpu, (unsigned long)freq, &val, GET_LEVEL)) + return THERMAL_CSTATE_INVALID; + return (unsigned long)val; +} + +EXPORT_SYMBOL(cpufreq_cooling_get_level); + /** * get_cpu_frequency - get the absolute value of frequency from level. * @cpu: cpu for which frequency is fetched. diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c index 46568c078dee..541257888c3e 100644 --- a/drivers/thermal/exynos_thermal.c +++ b/drivers/thermal/exynos_thermal.c @@ -242,26 +242,6 @@ static int exynos_get_crit_temp(struct thermal_zone_device *thermal, return ret; } -static int exynos_get_frequency_level(unsigned int cpu, unsigned int freq) -{ - int i = 0, ret = -EINVAL; - struct cpufreq_frequency_table *table = NULL; -#ifdef CONFIG_CPU_FREQ - table = cpufreq_frequency_get_table(cpu); -#endif - if (!table) - return ret; - - while (table[i].frequency != CPUFREQ_TABLE_END) { - if (table[i].frequency == CPUFREQ_ENTRY_INVALID) - continue; - if (table[i].frequency == freq) - return i; - i++; - } - return ret; -} - /* Bind callback functions for thermal zone */ static int exynos_bind(struct thermal_zone_device *thermal, struct thermal_cooling_device *cdev) @@ -288,8 +268,8 @@ static int exynos_bind(struct thermal_zone_device *thermal, /* Bind the thermal zone to the cpufreq cooling device */ for (i = 0; i < tab_size; i++) { clip_data = (struct freq_clip_table *)&(tab_ptr[i]); - level = exynos_get_frequency_level(0, clip_data->freq_clip_max); - if (level < 0) + level = cpufreq_cooling_get_level(0, clip_data->freq_clip_max); + if (level == THERMAL_CSTATE_INVALID) return 0; switch (GET_ZONE(i)) { case MONITOR_ZONE: diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index 40b4ef54cc7d..bc479b1e0fd9 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -42,6 +42,8 @@ struct thermal_cooling_device *cpufreq_cooling_register( * @cdev: thermal cooling device pointer. */ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev); + +unsigned long cpufreq_cooling_get_level(unsigned int, unsigned int); #else /* !CONFIG_CPU_THERMAL */ static inline struct thermal_cooling_device *cpufreq_cooling_register( const struct cpumask *clip_cpus) @@ -53,6 +55,11 @@ static inline void cpufreq_cooling_unregister( { return; } +static inline unsigned long cpufreq_cooling_get_level(unsigned int, + unsigned int) +{ + return THERMAL_CSTATE_INVALID; +} #endif /* CONFIG_CPU_THERMAL */ #endif /* __CPU_COOLING_H__ */ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index f0bd7f90a90d..5a3b428daaab 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -33,8 +33,11 @@ #define THERMAL_MAX_TRIPS 12 #define THERMAL_NAME_LENGTH 20 +/* invalid cooling state */ +#define THERMAL_CSTATE_INVALID -1UL + /* No upper/lower limit requirement */ -#define THERMAL_NO_LIMIT -1UL +#define THERMAL_NO_LIMIT THERMAL_CSTATE_INVALID /* Unit conversion macros */ #define KELVIN_TO_CELSIUS(t) (long)(((long)t-2732 >= 0) ? \ -- GitLab From bde00663098db4d6a25681351ffa4a87eff3d0b4 Mon Sep 17 00:00:00 2001 From: "Laurent Navet [Mali]" Date: Tue, 12 Mar 2013 10:47:50 +0000 Subject: [PATCH 0194/3163] drivers: thermal: cpu_cooling: fix checkpatch warning - WARNING: Avoid CamelCase: Signed-off-by: Laurent Navet Signed-off-by: Zhang Rui --- drivers/thermal/cpu_cooling.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 8dc44cbb3e09..be2e6b0e5349 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -168,8 +168,8 @@ static int cpufreq_apply_cooling(struct cpufreq_cooling_device *cpufreq_device, unsigned long cooling_state) { unsigned int cpuid, clip_freq; - struct cpumask *maskPtr = &cpufreq_device->allowed_cpus; - unsigned int cpu = cpumask_any(maskPtr); + struct cpumask *mask = &cpufreq_device->allowed_cpus; + unsigned int cpu = cpumask_any(mask); /* Check if the old cooling action is same as new cooling action */ @@ -184,7 +184,7 @@ static int cpufreq_apply_cooling(struct cpufreq_cooling_device *cpufreq_device, cpufreq_device->cpufreq_val = clip_freq; notify_device = cpufreq_device; - for_each_cpu(cpuid, maskPtr) { + for_each_cpu(cpuid, mask) { if (is_cpufreq_valid(cpuid)) cpufreq_update_policy(cpuid); } @@ -235,13 +235,13 @@ static int cpufreq_get_max_state(struct thermal_cooling_device *cdev, unsigned long *state) { struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; - struct cpumask *maskPtr = &cpufreq_device->allowed_cpus; + struct cpumask *mask = &cpufreq_device->allowed_cpus; unsigned int cpu; struct cpufreq_frequency_table *table; unsigned long count = 0; int i = 0; - cpu = cpumask_any(maskPtr); + cpu = cpumask_any(mask); table = cpufreq_frequency_get_table(cpu); if (!table) { *state = 0; -- GitLab From f534e9bf8074ca8c258a1ce0e5224372298976f9 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 12 Mar 2013 15:42:20 +0000 Subject: [PATCH 0195/3163] thermal: db8500: Fix checking return value of thermal_zone_device_register thermal_zone_device_register() returns ERR_PTR on error, thus use IS_ERR rather than IS_ERR_OR_NULL to check return value. Signed-off-by: Axel Lin Signed-off-by: Zhang Rui --- drivers/thermal/db8500_thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c index 61ce60a35921..6bdcec474fb1 100644 --- a/drivers/thermal/db8500_thermal.c +++ b/drivers/thermal/db8500_thermal.c @@ -447,7 +447,7 @@ static int db8500_thermal_probe(struct platform_device *pdev) pzone->therm_dev = thermal_zone_device_register("db8500_thermal_zone", ptrips->num_trips, 0, pzone, &thdev_ops, NULL, 0, 0); - if (IS_ERR_OR_NULL(pzone->therm_dev)) { + if (IS_ERR(pzone->therm_dev)) { dev_err(&pdev->dev, "Register thermal zone device failed.\n"); return PTR_ERR(pzone->therm_dev); } -- GitLab From 4c7fa83aa5f8444662744dba82577075f11673ae Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 12 Mar 2013 15:43:29 +0000 Subject: [PATCH 0196/3163] thermal: db8500: Fix missing mutex_unlock() in probe error paths Signed-off-by: Axel Lin Signed-off-by: Zhang Rui --- drivers/thermal/db8500_thermal.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c index 6bdcec474fb1..1e3b3bf9f993 100644 --- a/drivers/thermal/db8500_thermal.c +++ b/drivers/thermal/db8500_thermal.c @@ -419,7 +419,8 @@ static int db8500_thermal_probe(struct platform_device *pdev) low_irq = platform_get_irq_byname(pdev, "IRQ_HOTMON_LOW"); if (low_irq < 0) { dev_err(&pdev->dev, "Get IRQ_HOTMON_LOW failed.\n"); - return low_irq; + ret = low_irq; + goto out_unlock; } ret = devm_request_threaded_irq(&pdev->dev, low_irq, NULL, @@ -427,13 +428,14 @@ static int db8500_thermal_probe(struct platform_device *pdev) "dbx500_temp_low", pzone); if (ret < 0) { dev_err(&pdev->dev, "Failed to allocate temp low irq.\n"); - return ret; + goto out_unlock; } high_irq = platform_get_irq_byname(pdev, "IRQ_HOTMON_HIGH"); if (high_irq < 0) { dev_err(&pdev->dev, "Get IRQ_HOTMON_HIGH failed.\n"); - return high_irq; + ret = high_irq; + goto out_unlock; } ret = devm_request_threaded_irq(&pdev->dev, high_irq, NULL, @@ -441,7 +443,7 @@ static int db8500_thermal_probe(struct platform_device *pdev) "dbx500_temp_high", pzone); if (ret < 0) { dev_err(&pdev->dev, "Failed to allocate temp high irq.\n"); - return ret; + goto out_unlock; } pzone->therm_dev = thermal_zone_device_register("db8500_thermal_zone", @@ -449,7 +451,8 @@ static int db8500_thermal_probe(struct platform_device *pdev) if (IS_ERR(pzone->therm_dev)) { dev_err(&pdev->dev, "Register thermal zone device failed.\n"); - return PTR_ERR(pzone->therm_dev); + ret = PTR_ERR(pzone->therm_dev); + goto out_unlock; } dev_info(&pdev->dev, "Thermal zone device registered.\n"); @@ -461,9 +464,11 @@ static int db8500_thermal_probe(struct platform_device *pdev) platform_set_drvdata(pdev, pzone); pzone->mode = THERMAL_DEVICE_ENABLED; + +out_unlock: mutex_unlock(&pzone->th_lock); - return 0; + return ret; } static int db8500_thermal_remove(struct platform_device *pdev) -- GitLab From f8b587055a793c7719f0d4f41b7b4aeeef43aa2d Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Wed, 20 Mar 2013 21:38:07 +0000 Subject: [PATCH 0197/3163] thermal: Fix compiler warning The following warning is obtained when CONFIG_NET is not defined: In file included from drivers/thermal/mvebu_thermal.c:27:0: include/linux/thermal.h:254:12: warning: 'thermal_generate_netlink_event' defined but not used [-Wunused-function] This patch fixes the warning by properly inlining thermal_generate_netlink_event(). Signed-off-by: Ezequiel Garcia Signed-off-by: Zhang Rui --- include/linux/thermal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/thermal.h b/include/linux/thermal.h index f0bd7f90a90d..fd7b8f3e6f42 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -251,7 +251,7 @@ void thermal_unregister_governor(struct thermal_governor *); extern int thermal_generate_netlink_event(struct thermal_zone_device *tz, enum events event); #else -static int thermal_generate_netlink_event(struct thermal_zone_device *tz, +static inline int thermal_generate_netlink_event(struct thermal_zone_device *tz, enum events event) { return 0; -- GitLab From 02519d3397b57bf723f6df69c92b0b66ecafb11a Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Thu, 21 Mar 2013 17:42:07 -0300 Subject: [PATCH 0198/3163] thermal: kirkwood: Fix valid check for thermal register The correct value is obtain by first shifting the register by the offset, later applying the valid mask and finally invert the result. This check was lacking an extra parenthesis to be strictly correct. Signed-off-by: Ezequiel Garcia Signed-off-by: Zhang Rui --- drivers/thermal/kirkwood_thermal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/kirkwood_thermal.c b/drivers/thermal/kirkwood_thermal.c index e5500edb5285..d2e05eec722a 100644 --- a/drivers/thermal/kirkwood_thermal.c +++ b/drivers/thermal/kirkwood_thermal.c @@ -41,8 +41,8 @@ static int kirkwood_get_temp(struct thermal_zone_device *thermal, reg = readl_relaxed(priv->sensor); /* Valid check */ - if (!(reg >> KIRKWOOD_THERMAL_VALID_OFFSET) & - KIRKWOOD_THERMAL_VALID_MASK) { + if (!((reg >> KIRKWOOD_THERMAL_VALID_OFFSET) & + KIRKWOOD_THERMAL_VALID_MASK)) { dev_err(&thermal->device, "Temperature sensor reading not valid\n"); return -EIO; -- GitLab From 696b6075afa97b42b3f59f96809ed586eb691c96 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Fri, 22 Mar 2013 09:23:02 -0300 Subject: [PATCH 0199/3163] thermal: kirkwood: Fix thermal sensor formula The currently formula has been taken from the 88AP510 SoC datasheet, which is not exactly correct. The correct value for the temperature in Celcius of the sensor present in this SoC is: Celsius = (322-reg)/1.3625 Signed-off-by: Lior Amsalem Signed-off-by: Ezequiel Garcia Acked-by: Andrew Lunn Signed-off-by: Zhang Rui --- drivers/thermal/kirkwood_thermal.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/kirkwood_thermal.c b/drivers/thermal/kirkwood_thermal.c index d2e05eec722a..dfeceaffbc03 100644 --- a/drivers/thermal/kirkwood_thermal.c +++ b/drivers/thermal/kirkwood_thermal.c @@ -49,13 +49,13 @@ static int kirkwood_get_temp(struct thermal_zone_device *thermal, } /* - * Calculate temperature. See Section 8.10.1 of the 88AP510, - * datasheet, which has the same sensor. - * Documentation/arm/Marvell/README + * Calculate temperature. According to Marvell internal + * documentation the formula for this is: + * Celsius = (322-reg)/1.3625 */ reg = (reg >> KIRKWOOD_THERMAL_TEMP_OFFSET) & KIRKWOOD_THERMAL_TEMP_MASK; - *temp = ((2281638UL - (7298*reg)) / 10); + *temp = ((3220000000UL - (10000000UL * reg)) / 13625); return 0; } -- GitLab From 2fd1db8819fbf73b5f74b4b4a205ab7be0957944 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Fri, 22 Mar 2013 09:23:03 -0300 Subject: [PATCH 0200/3163] thermal: dove: Fix thermal sensor formula The currently formula has been taken from the 88AP510 SoC datasheet, which is not exactly correct. The correct value for the temperature in Celcius of the sensor present in this SoC is: Celsius = (322-reg)/1.3625 Signed-off-by: Lior Amsalem Signed-off-by: Ezequiel Garcia Acked-by: Andrew Lunn Signed-off-by: Zhang Rui --- drivers/thermal/dove_thermal.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/dove_thermal.c b/drivers/thermal/dove_thermal.c index 3078c403b42d..4b15a5f270dc 100644 --- a/drivers/thermal/dove_thermal.c +++ b/drivers/thermal/dove_thermal.c @@ -107,12 +107,13 @@ static int dove_get_temp(struct thermal_zone_device *thermal, } /* - * Calculate temperature. See Section 8.10.1 of 88AP510, - * Documentation/arm/Marvell/README + * Calculate temperature. According to Marvell internal + * documentation the formula for this is: + * Celsius = (322-reg)/1.3625 */ reg = readl_relaxed(priv->sensor); reg = (reg >> DOVE_THERMAL_TEMP_OFFSET) & DOVE_THERMAL_TEMP_MASK; - *temp = ((2281638UL - (7298*reg)) / 10); + *temp = ((3220000000UL - (10000000UL * reg)) / 13625); return 0; } -- GitLab From 622fc5d4452a20aab7b8f5107c2931c5610e5753 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 26 Mar 2013 22:34:50 +0800 Subject: [PATCH 0201/3163] pwm: ab8500: Fix trivial typo in dev_err message Signed-off-by: Axel Lin Signed-off-by: Thierry Reding --- drivers/pwm/pwm-ab8500.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-ab8500.c b/drivers/pwm/pwm-ab8500.c index 93af1bbf88ce..3beb2b52bd2e 100644 --- a/drivers/pwm/pwm-ab8500.c +++ b/drivers/pwm/pwm-ab8500.c @@ -66,7 +66,7 @@ static int ab8500_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) AB8500_MISC, AB8500_PWM_OUT_CTRL7_REG, 1 << (chip->base - 1), ENABLE_PWM); if (ret < 0) - dev_err(chip->dev, "%s: Failed to disable PWM, Error %d\n", + dev_err(chip->dev, "%s: Failed to enable PWM, Error %d\n", pwm->label, ret); return ret; } -- GitLab From a38c9898574967c5a8ab670f1b27d9ecf71d32cc Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 26 Mar 2013 22:54:58 +0800 Subject: [PATCH 0202/3163] pwm: tiecap: Staticize non-exported symbols Both ecap_pwm_save_context() and ecap_pwm_restore_context() are only used in this file, make them static. Signed-off-by: Axel Lin Signed-off-by: Thierry Reding --- drivers/pwm/pwm-tiecap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c index f024db06e6c4..0d65fb2e02c7 100644 --- a/drivers/pwm/pwm-tiecap.c +++ b/drivers/pwm/pwm-tiecap.c @@ -295,7 +295,7 @@ static int ecap_pwm_remove(struct platform_device *pdev) return pwmchip_remove(&pc->chip); } -void ecap_pwm_save_context(struct ecap_pwm_chip *pc) +static void ecap_pwm_save_context(struct ecap_pwm_chip *pc) { pm_runtime_get_sync(pc->chip.dev); pc->ctx.ecctl2 = readw(pc->mmio_base + ECCTL2); @@ -304,7 +304,7 @@ void ecap_pwm_save_context(struct ecap_pwm_chip *pc) pm_runtime_put_sync(pc->chip.dev); } -void ecap_pwm_restore_context(struct ecap_pwm_chip *pc) +static void ecap_pwm_restore_context(struct ecap_pwm_chip *pc) { writel(pc->ctx.cap3, pc->mmio_base + CAP3); writel(pc->ctx.cap4, pc->mmio_base + CAP4); -- GitLab From b343a1887ea4f798758d0023838d16a428f728d5 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 26 Mar 2013 22:55:57 +0800 Subject: [PATCH 0203/3163] pwm: tiehrpwm: Staticize non-exported symbols Both ehrpwm_pwm_save_context() and ehrpwm_pwm_restore_context() are only used in this file, make them static. Signed-off-by: Axel Lin Signed-off-by: Thierry Reding --- drivers/pwm/pwm-tiehrpwm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c index d058ba90845b..6a217596942f 100644 --- a/drivers/pwm/pwm-tiehrpwm.c +++ b/drivers/pwm/pwm-tiehrpwm.c @@ -533,7 +533,7 @@ static int ehrpwm_pwm_remove(struct platform_device *pdev) return pwmchip_remove(&pc->chip); } -void ehrpwm_pwm_save_context(struct ehrpwm_pwm_chip *pc) +static void ehrpwm_pwm_save_context(struct ehrpwm_pwm_chip *pc) { pm_runtime_get_sync(pc->chip.dev); pc->ctx.tbctl = ehrpwm_read(pc->mmio_base, TBCTL); @@ -547,7 +547,7 @@ void ehrpwm_pwm_save_context(struct ehrpwm_pwm_chip *pc) pm_runtime_put_sync(pc->chip.dev); } -void ehrpwm_pwm_restore_context(struct ehrpwm_pwm_chip *pc) +static void ehrpwm_pwm_restore_context(struct ehrpwm_pwm_chip *pc) { ehrpwm_write(pc->mmio_base, TBPRD, pc->ctx.tbprd); ehrpwm_write(pc->mmio_base, CMPA, pc->ctx.cmpa); -- GitLab From bd5cdad0c8e0f0adcd6e9c582abe4c4233c19b03 Mon Sep 17 00:00:00 2001 From: "Li, Zhen-Hua" Date: Mon, 25 Mar 2013 16:20:52 +0800 Subject: [PATCH 0204/3163] iommu/vt-d: dmar_fault should only clear PPF/PFO field. When there is a dmar irq, dmar_fault is called and all of the fields in FSTS are cleared. But ICE/IQE/ITE should not be cleared here, they need to be processed and cleared in function qi_check_fault. [Minor cleanup by Joerg Roedel] Signed-off-by: Li, Zhen-Hua Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index e5cdaf87822c..9f8aa07360ba 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1204,7 +1204,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id) /* TBD: ignore advanced fault log currently */ if (!(fault_status & DMA_FSTS_PPF)) - goto clear_rest; + goto unlock_exit; fault_index = dma_fsts_fault_record_index(fault_status); reg = cap_fault_reg_offset(iommu->cap); @@ -1245,11 +1245,10 @@ irqreturn_t dmar_fault(int irq, void *dev_id) fault_index = 0; raw_spin_lock_irqsave(&iommu->register_lock, flag); } -clear_rest: - /* clear all the other faults */ - fault_status = readl(iommu->reg + DMAR_FSTS_REG); - writel(fault_status, iommu->reg + DMAR_FSTS_REG); + writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG); + +unlock_exit: raw_spin_unlock_irqrestore(&iommu->register_lock, flag); return IRQ_HANDLED; } @@ -1297,6 +1296,7 @@ int __init enable_drhd_fault_handling(void) for_each_drhd_unit(drhd) { int ret; struct intel_iommu *iommu = drhd->iommu; + u32 fault_status; ret = dmar_set_interrupt(iommu); if (ret) { @@ -1309,6 +1309,8 @@ int __init enable_drhd_fault_handling(void) * Clear any previous faults. */ dmar_fault(iommu->irq, iommu); + fault_status = readl(iommu->reg + DMAR_FSTS_REG); + writel(fault_status, iommu->reg + DMAR_FSTS_REG); } return 0; -- GitLab From 0e5e4f0e56aca0df1d5648db0be9028bd573b25c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 9 Nov 2012 16:33:05 -0700 Subject: [PATCH 0205/3163] NVMe: Add discard support for capable devices This adds discard support to block queues if the nvme device is capable of deallocating blocks as indicated by the controller's optional command support. A discard flagged bio request will submit an NVMe deallocate Data Set Management command for the requested blocks. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 60 +++++++++++++++++++++++++++++++++++++++++++- include/linux/nvme.h | 32 +++++++++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 9dcefe40380b..26e266072079 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -80,6 +80,7 @@ struct nvme_dev { char model[40]; char firmware_rev[8]; u32 max_hw_sectors; + u16 oncs; }; /* @@ -510,6 +511,44 @@ static int nvme_map_bio(struct device *dev, struct nvme_iod *iod, return length; } +/* + * We reuse the small pool to allocate the 16-byte range here as it is not + * worth having a special pool for these or additional cases to handle freeing + * the iod. + */ +static int nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns, + struct bio *bio, struct nvme_iod *iod, int cmdid) +{ + struct nvme_dsm_range *range; + struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; + + range = dma_pool_alloc(nvmeq->dev->prp_small_pool, GFP_ATOMIC, + &iod->first_dma); + if (!range) + return -ENOMEM; + + iod_list(iod)[0] = (__le64 *)range; + iod->npages = 0; + + range->cattr = cpu_to_le32(0); + range->nlb = cpu_to_le32(bio->bi_size >> ns->lba_shift); + range->slba = cpu_to_le64(bio->bi_sector >> (ns->lba_shift - 9)); + + memset(cmnd, 0, sizeof(*cmnd)); + cmnd->dsm.opcode = nvme_cmd_dsm; + cmnd->dsm.command_id = cmdid; + cmnd->dsm.nsid = cpu_to_le32(ns->ns_id); + cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma); + cmnd->dsm.nr = 0; + cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); + + if (++nvmeq->sq_tail == nvmeq->q_depth) + nvmeq->sq_tail = 0; + writel(nvmeq->sq_tail, nvmeq->q_db); + + return 0; +} + static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, int cmdid) { @@ -567,6 +606,12 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, if (unlikely(cmdid < 0)) goto free_iod; + if (bio->bi_rw & REQ_DISCARD) { + result = nvme_submit_discard(nvmeq, ns, bio, iod, cmdid); + if (result) + goto free_cmdid; + return result; + } if ((bio->bi_rw & REQ_FLUSH) && !psegs) return nvme_submit_flush(nvmeq, ns, cmdid); @@ -1347,6 +1392,16 @@ static void nvme_put_ns_idx(int index) spin_unlock(&dev_list_lock); } +static void nvme_config_discard(struct nvme_ns *ns) +{ + u32 logical_block_size = queue_logical_block_size(ns->queue); + ns->queue->limits.discard_zeroes_data = 0; + ns->queue->limits.discard_alignment = logical_block_size; + ns->queue->limits.discard_granularity = logical_block_size; + ns->queue->limits.max_discard_sectors = 0xffffffff; + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); +} + static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, struct nvme_id_ns *id, struct nvme_lba_range_type *rt) { @@ -1366,7 +1421,6 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, ns->queue->queue_flags = QUEUE_FLAG_DEFAULT; queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); -/* queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); */ blk_queue_make_request(ns->queue, nvme_make_request); ns->dev = dev; ns->queue->queuedata = ns; @@ -1392,6 +1446,9 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid); set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); + if (dev->oncs & NVME_CTRL_ONCS_DSM) + nvme_config_discard(ns); + return ns; out_free_queue: @@ -1520,6 +1577,7 @@ static int nvme_dev_add(struct nvme_dev *dev) ctrl = mem; nn = le32_to_cpup(&ctrl->nn); + dev->oncs = le16_to_cpup(&ctrl->oncs); memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 4fa3b0b9b071..bde44c1fd213 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -107,6 +107,12 @@ struct nvme_id_ctrl { __u8 vs[1024]; }; +enum { + NVME_CTRL_ONCS_COMPARE = 1 << 0, + NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, + NVME_CTRL_ONCS_DSM = 1 << 2, +}; + struct nvme_lbaf { __le16 ms; __u8 ds; @@ -246,6 +252,31 @@ enum { NVME_RW_DSM_COMPRESSED = 1 << 7, }; +struct nvme_dsm_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + __le64 prp1; + __le64 prp2; + __le32 nr; + __le32 attributes; + __u32 rsvd12[4]; +}; + +enum { + NVME_DSMGMT_IDR = 1 << 0, + NVME_DSMGMT_IDW = 1 << 1, + NVME_DSMGMT_AD = 1 << 2, +}; + +struct nvme_dsm_range { + __le32 cattr; + __le32 nlb; + __le64 slba; +}; + /* Admin commands */ enum nvme_admin_opcode { @@ -372,6 +403,7 @@ struct nvme_command { struct nvme_create_sq create_sq; struct nvme_delete_queue delete_queue; struct nvme_download_firmware dlfw; + struct nvme_dsm_cmd dsm; }; }; -- GitLab From 729dd1bd802acb973eec9c73ccb87d3143c13937 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Mon, 4 Mar 2013 18:40:56 -0700 Subject: [PATCH 0206/3163] NVMe: Rename nvme.c to nvme-core.c In preparation for adding nvme-scsi.c It is preferable to retain the module name 'nvme' Signed-off-by: Vishal Verma Signed-off-by: Matthew Wilcox --- drivers/block/Makefile | 1 + drivers/block/{nvme.c => nvme-core.c} | 0 2 files changed, 1 insertion(+) rename drivers/block/{nvme.c => nvme-core.c} (100%) diff --git a/drivers/block/Makefile b/drivers/block/Makefile index a3b40232c6ab..2a41c86d3ad9 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -42,4 +42,5 @@ obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ +nvme-y := nvme-core.o swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/nvme.c b/drivers/block/nvme-core.c similarity index 100% rename from drivers/block/nvme.c rename to drivers/block/nvme-core.c -- GitLab From 6ead114232f786e3ef7a034c8617f2a4df8e5226 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 20 Mar 2013 19:01:06 +0900 Subject: [PATCH 0207/3163] f2fs: fix the recovery flow to handle errors correctly We should handle errors during the recovery flow correctly. For example, if we get -ENOMEM, we should report a mount failure instead of conducting the remained mount procedure. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/recovery.c | 46 ++++++++++++++++++++++++++++------------------ fs/f2fs/super.c | 9 +++++++-- 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5bb87e0216f5..109e12d21a36 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1027,7 +1027,7 @@ void destroy_gc_caches(void); /* * recovery.c */ -void recover_fsync_data(struct f2fs_sb_info *); +int recover_fsync_data(struct f2fs_sb_info *); bool space_for_roll_forward(struct f2fs_sb_info *); /* diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 2d86eb26c493..61bdaa755906 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -118,10 +118,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) lock_page(page); - if (cp_ver != cpver_of_node(page)) { - err = -EINVAL; + if (cp_ver != cpver_of_node(page)) goto unlock_out; - } if (!is_fsync_dnode(page)) goto next; @@ -134,10 +132,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) FI_INC_LINK); } else { if (IS_INODE(page) && is_dent_dnode(page)) { - if (recover_inode_page(sbi, page)) { - err = -ENOMEM; + err = recover_inode_page(sbi, page); + if (err) goto unlock_out; - } } /* add this fsync inode to the list */ @@ -237,13 +234,14 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, iput(inode); } -static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, +static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page, block_t blkaddr) { unsigned int start, end; struct dnode_of_data dn; struct f2fs_summary sum; struct node_info ni; + int err = 0; start = start_bidx_of_node(ofs_of_node(page)); if (IS_INODE(page)) @@ -252,8 +250,9 @@ static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, end = start + ADDRS_PER_BLOCK; set_new_dnode(&dn, inode, NULL, NULL, 0); - if (get_dnode_of_data(&dn, start, ALLOC_NODE)) - return; + err = get_dnode_of_data(&dn, start, ALLOC_NODE); + if (err) + return err; wait_on_page_writeback(dn.node_page); @@ -298,14 +297,16 @@ static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); f2fs_put_dnode(&dn); + return 0; } -static void recover_data(struct f2fs_sb_info *sbi, +static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head, int type) { unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver); struct curseg_info *curseg; struct page *page; + int err = 0; block_t blkaddr; /* get node pages in the current segment */ @@ -315,13 +316,15 @@ static void recover_data(struct f2fs_sb_info *sbi, /* read node page */ page = alloc_page(GFP_NOFS | __GFP_ZERO); if (IS_ERR(page)) - return; + return -ENOMEM; + lock_page(page); while (1) { struct fsync_inode_entry *entry; - if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC)) + err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); + if (err) goto out; lock_page(page); @@ -333,7 +336,9 @@ static void recover_data(struct f2fs_sb_info *sbi, if (!entry) goto next; - do_recover_data(sbi, entry->inode, page, blkaddr); + err = do_recover_data(sbi, entry->inode, page, blkaddr); + if (err) + goto out; if (entry->blkaddr == blkaddr) { iput(entry->inode); @@ -349,22 +354,26 @@ static void recover_data(struct f2fs_sb_info *sbi, out: __free_pages(page, 0); - allocate_new_segments(sbi); + if (!err) + allocate_new_segments(sbi); + return err; } -void recover_fsync_data(struct f2fs_sb_info *sbi) +int recover_fsync_data(struct f2fs_sb_info *sbi) { struct list_head inode_list; + int err; fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", sizeof(struct fsync_inode_entry), NULL); if (unlikely(!fsync_entry_slab)) - return; + return -ENOMEM; INIT_LIST_HEAD(&inode_list); /* step #1: find fsynced inode numbers */ - if (find_fsync_dnodes(sbi, &inode_list)) + err = find_fsync_dnodes(sbi, &inode_list); + if (err) goto out; if (list_empty(&inode_list)) @@ -372,11 +381,12 @@ void recover_fsync_data(struct f2fs_sb_info *sbi) /* step #2: recover data */ sbi->por_doing = 1; - recover_data(sbi, &inode_list, CURSEG_WARM_NODE); + err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); sbi->por_doing = 0; BUG_ON(!list_empty(&inode_list)); out: destroy_fsync_dnodes(sbi, &inode_list); kmem_cache_destroy(fsync_entry_slab); write_checkpoint(sbi, false); + return err; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c9ef88da0723..252890ef8dbc 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -642,8 +642,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) } /* recover fsynced data */ - if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) - recover_fsync_data(sbi); + if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { + err = recover_fsync_data(sbi); + if (err) { + f2fs_msg(sb, KERN_ERR, "Failed to recover fsync data"); + goto free_root_inode; + } + } /* After POR, we can run background GC thread */ err = start_gc_thread(sbi); -- GitLab From 0ff153a2f1fa7ef31d6d9bc9ce6c3815dede55e6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 20 Mar 2013 14:58:38 +0900 Subject: [PATCH 0208/3163] f2fs: do not skip writing file meta during fsync This patch removes data_version check flow during the fsync call. The original purpose for the use of data_version was to avoid writng inode pages redundantly by the fsync calls repeatedly. However, when user can modify file meta and then call fsync, we should not skip fsync procedure. So, let's remove this condition check and hope that user triggers in right manner. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 --- fs/f2fs/f2fs.h | 1 - fs/f2fs/file.c | 10 ---------- fs/f2fs/inode.c | 1 - 4 files changed, 15 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ea8be6fc38f1..47a2d7c87ea9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -435,7 +435,6 @@ static int f2fs_read_data_pages(struct file *file, int do_write_data_page(struct page *page) { struct inode *inode = page->mapping->host; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); block_t old_blk_addr, new_blk_addr; struct dnode_of_data dn; int err = 0; @@ -465,8 +464,6 @@ int do_write_data_page(struct page *page) write_data_page(inode, page, &dn, old_blk_addr, &new_blk_addr); update_extent_cache(new_blk_addr, &dn); - F2FS_I(inode)->data_version = - le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver); } out_writepage: f2fs_put_dnode(&dn); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 109e12d21a36..380e2b3cdac7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -159,7 +159,6 @@ struct f2fs_inode_info { /* Use below internally in f2fs*/ unsigned long flags; /* use to pass per-file flags */ - unsigned long long data_version;/* latest version of data for fsync */ atomic_t dirty_dents; /* # of dirty dentry pages */ f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ff018a42e435..d65fcad578c5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -124,7 +124,6 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - unsigned long long cur_version; int ret = 0; bool need_cp = false; struct writeback_control wbc = { @@ -148,15 +147,6 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) goto out; - mutex_lock(&sbi->cp_mutex); - cur_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver); - mutex_unlock(&sbi->cp_mutex); - - if (F2FS_I(inode)->data_version != cur_version && - !(inode->i_state & I_DIRTY)) - goto out; - F2FS_I(inode)->data_version--; - if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) need_cp = true; else if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP)) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index e0e8308594a5..f798ddf2c8a8 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -80,7 +80,6 @@ static int do_read_inode(struct inode *inode) fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); fi->i_flags = le32_to_cpu(ri->i_flags); fi->flags = 0; - fi->data_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver) - 1; fi->i_advise = ri->i_advise; fi->i_pino = le32_to_cpu(ri->i_pino); get_extent_info(&fi->ext, ri->i_ext); -- GitLab From fa37241743ac26ba0ac6f54579158c2fae310a5c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 21 Mar 2013 12:53:19 +0900 Subject: [PATCH 0209/3163] f2fs: remain nat cache entries for further free nid allocation In the checkpoint flow, the f2fs investigates the total nat cache entries. Previously, if an entry has NULL_ADDR, f2fs drops the entry and adds the obsolete nid to the free nid list. However, this free nid will be reused sooner, resulting in its nat entry miss. In order to avoid this, we don't need to drop the nat cache entry at this moment. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f7b03ba9c0d7..0177f9434c25 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1621,11 +1621,11 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) nid_in_journal(sum, offset) = cpu_to_le32(nid); } - if (nat_get_blkaddr(ne) == NULL_ADDR) { + if (nat_get_blkaddr(ne) == NULL_ADDR && + !add_free_nid(NM_I(sbi), nid)) { write_lock(&nm_i->nat_tree_lock); __del_from_nat_cache(nm_i, ne); write_unlock(&nm_i->nat_tree_lock); - add_free_nid(NM_I(sbi), nid); } else { write_lock(&nm_i->nat_tree_lock); __clear_nat_cache_dirty(nm_i, ne); -- GitLab From 953a3e27e10fc6acb480801ea47197d0270d735e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 21 Mar 2013 15:21:57 +0900 Subject: [PATCH 0210/3163] f2fs: fix to give correct parent inode number for roll forward When we recover fsync'ed data after power-off-recovery, we should guarantee that any parent inode number should be correct for each direct inode blocks. So, let's make the following rules. - The fsync should do checkpoint to all the inodes that were experienced hard links. - So, the only normal files can be recovered by roll-forward. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 22 ++-------------------- fs/f2fs/namei.c | 14 ++++++++++---- fs/f2fs/node.h | 15 +++++++++++++++ 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 380e2b3cdac7..77e2eb061bfa 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -148,6 +148,7 @@ struct extent_info { * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. */ #define FADVISE_COLD_BIT 0x01 +#define FADVISE_CP_BIT 0x02 struct f2fs_inode_info { struct inode vfs_inode; /* serve a vfs inode */ @@ -825,7 +826,6 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr) /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ - FI_NEED_CP, /* need to do checkpoint during fsync */ FI_INC_LINK, /* need to increment i_nlink */ FI_ACL_MODE, /* indicate acl mode */ FI_NO_ALLOC, /* should not allocate any blocks */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d65fcad578c5..e031f570df79 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -103,23 +103,6 @@ static const struct vm_operations_struct f2fs_file_vm_ops = { .remap_pages = generic_file_remap_pages, }; -static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode) -{ - struct dentry *dentry; - nid_t pino; - - inode = igrab(inode); - dentry = d_find_any_alias(inode); - if (!dentry) { - iput(inode); - return 0; - } - pino = dentry->d_parent->d_inode->i_ino; - dput(dentry); - iput(inode); - return !is_checkpointed_node(sbi, pino); -} - int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -149,17 +132,16 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) need_cp = true; - else if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP)) + else if (is_cp_file(inode)) need_cp = true; else if (!space_for_roll_forward(sbi)) need_cp = true; - else if (need_to_sync_dir(sbi, inode)) + else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) need_cp = true; if (need_cp) { /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); - clear_inode_flag(F2FS_I(inode), FI_NEED_CP); } else { /* if there is no written node page, write its inode page */ while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index d4a171b1a68b..7c6e219a479c 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -15,6 +15,7 @@ #include #include "f2fs.h" +#include "node.h" #include "xattr.h" #include "acl.h" @@ -99,7 +100,7 @@ static int is_multimedia_file(const unsigned char *s, const char *sub) /* * Set multimedia files as cold files for hot/cold data separation */ -static inline void set_cold_file(struct f2fs_sb_info *sbi, struct inode *inode, +static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, const unsigned char *name) { int i; @@ -108,7 +109,7 @@ static inline void set_cold_file(struct f2fs_sb_info *sbi, struct inode *inode, int count = le32_to_cpu(sbi->raw_super->extension_count); for (i = 0; i < count; i++) { if (!is_multimedia_file(name, extlist[i])) { - F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT; + set_cold_file(inode); break; } } @@ -130,7 +131,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, return PTR_ERR(inode); if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) - set_cold_file(sbi, inode, dentry->d_name.name); + set_cold_files(sbi, inode, dentry->d_name.name); inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; @@ -173,6 +174,12 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, if (err) goto out; + /* + * This file should be checkpointed during fsync. + * We lost i_pino from now on. + */ + set_cp_file(inode); + d_instantiate(dentry, inode); return 0; out: @@ -425,7 +432,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } old_inode->i_ctime = CURRENT_TIME; - set_inode_flag(F2FS_I(old_inode), FI_NEED_CP); mark_inode_dirty(old_inode); f2fs_delete_entry(old_entry, old_page, NULL); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index afdb130f782e..d009cdfd2679 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -277,6 +277,21 @@ static inline int is_cold_file(struct inode *inode) return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT; } +static inline void set_cold_file(struct inode *inode) +{ + F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT; +} + +static inline int is_cp_file(struct inode *inode) +{ + return F2FS_I(inode)->i_advise & FADVISE_CP_BIT; +} + +static inline void set_cp_file(struct inode *inode) +{ + F2FS_I(inode)->i_advise |= FADVISE_CP_BIT; +} + static inline int is_cold_data(struct page *page) { return PageChecked(page); -- GitLab From 25f27db48e9d741cf3494f36e287d3c34a0fdba3 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 5 Mar 2013 21:22:38 +0100 Subject: [PATCH 0211/3163] gpio-ich: Check for pin availability at request time Stop checking for pin availability in direction and get functions. These functions can be called repeatedly, so checking every time is bad for performance. Now that requesting GPIO pins is no longer optional, checking for availability at pin request time is enough. Signed-off-by: Jean Delvare Cc: Peter Tyser Cc: Grant Likely Cc: Linus Walleij Signed-off-by: Linus Walleij --- drivers/gpio/gpio-ich.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/gpio/gpio-ich.c b/drivers/gpio/gpio-ich.c index f9dbd503fc40..31682178c59b 100644 --- a/drivers/gpio/gpio-ich.c +++ b/drivers/gpio/gpio-ich.c @@ -135,9 +135,6 @@ static bool ichx_gpio_check_available(struct gpio_chip *gpio, unsigned nr) static int ichx_gpio_direction_input(struct gpio_chip *gpio, unsigned nr) { - if (!ichx_gpio_check_available(gpio, nr)) - return -ENXIO; - /* * Try setting pin as an input and verify it worked since many pins * are output-only. @@ -151,9 +148,6 @@ static int ichx_gpio_direction_input(struct gpio_chip *gpio, unsigned nr) static int ichx_gpio_direction_output(struct gpio_chip *gpio, unsigned nr, int val) { - if (!ichx_gpio_check_available(gpio, nr)) - return -ENXIO; - /* Set GPIO output value. */ ichx_write_bit(GPIO_LVL, nr, val, 0); @@ -169,9 +163,6 @@ static int ichx_gpio_direction_output(struct gpio_chip *gpio, unsigned nr, static int ichx_gpio_get(struct gpio_chip *chip, unsigned nr) { - if (!ichx_gpio_check_available(chip, nr)) - return -ENXIO; - return ichx_read_bit(GPIO_LVL, nr); } @@ -180,9 +171,6 @@ static int ich6_gpio_get(struct gpio_chip *chip, unsigned nr) unsigned long flags; u32 data; - if (!ichx_gpio_check_available(chip, nr)) - return -ENXIO; - /* * GPI 0 - 15 need to be read from the power management registers on * a ICH6/3100 bridge. @@ -207,6 +195,9 @@ static int ich6_gpio_get(struct gpio_chip *chip, unsigned nr) static int ichx_gpio_request(struct gpio_chip *chip, unsigned nr) { + if (!ichx_gpio_check_available(chip, nr)) + return -ENXIO; + /* * Note we assume the BIOS properly set a bridge's USE value. Some * chips (eg Intel 3100) have bogus USE values though, so first see if -- GitLab From 5c97d146f20bb6a82a621009c0c6f368e40a905f Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 5 Mar 2013 20:21:56 +0800 Subject: [PATCH 0212/3163] gpio: viperboard: Remove duplicate code to set gpio->gpiob_val Set it once is enough, and it's done in vprbrd_gpiob_set() which is called by vprbrd_gpiob_direction_output(). Signed-off-by: Axel Lin Tested-by: Lars Poeschel Acked-by: Lars Poeschel Signed-off-by: Linus Walleij --- drivers/gpio/gpio-viperboard.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpio/gpio-viperboard.c b/drivers/gpio/gpio-viperboard.c index 59d72391de26..095ab14cea4d 100644 --- a/drivers/gpio/gpio-viperboard.c +++ b/drivers/gpio/gpio-viperboard.c @@ -380,10 +380,6 @@ static int vprbrd_gpiob_direction_output(struct gpio_chip *chip, struct vprbrd *vb = gpio->vb; gpio->gpiob_out |= (1 << offset); - if (value) - gpio->gpiob_val |= (1 << offset); - else - gpio->gpiob_val &= ~(1 << offset); mutex_lock(&vb->lock); -- GitLab From 13c3b0fcc8e33ba49f252378f6e7290b146042af Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Mon, 4 Mar 2013 18:40:57 -0700 Subject: [PATCH 0213/3163] NVMe: Move structures & definitions to header file nvme-scsi.c uses several data structures and definitions that were previously private to nvme-core.c. Move the definitions to nvme.h, protected by __KERNEL__. Signed-off-by: Vishal Verma Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 55 ----------------------------------- include/linux/nvme.h | 60 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 55 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 26e266072079..1f98040cf677 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -46,7 +46,6 @@ #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) #define NVME_MINORS 64 -#define NVME_IO_TIMEOUT (5 * HZ) #define ADMIN_TIMEOUT (60 * HZ) static int nvme_major; @@ -59,44 +58,6 @@ static DEFINE_SPINLOCK(dev_list_lock); static LIST_HEAD(dev_list); static struct task_struct *nvme_thread; -/* - * Represents an NVM Express device. Each nvme_dev is a PCI function. - */ -struct nvme_dev { - struct list_head node; - struct nvme_queue **queues; - u32 __iomem *dbs; - struct pci_dev *pci_dev; - struct dma_pool *prp_page_pool; - struct dma_pool *prp_small_pool; - int instance; - int queue_count; - int db_stride; - u32 ctrl_config; - struct msix_entry *entry; - struct nvme_bar __iomem *bar; - struct list_head namespaces; - char serial[20]; - char model[40]; - char firmware_rev[8]; - u32 max_hw_sectors; - u16 oncs; -}; - -/* - * An NVM Express namespace is equivalent to a SCSI LUN - */ -struct nvme_ns { - struct list_head list; - - struct nvme_dev *dev; - struct request_queue *queue; - struct gendisk *disk; - - int ns_id; - int lba_shift; -}; - /* * An NVM Express queue. Each device has at least two (one for admin * commands and one for I/O commands). @@ -295,22 +256,6 @@ static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) return 0; } -/* - * The nvme_iod describes the data in an I/O, including the list of PRP - * entries. You can't see it in this data structure because C doesn't let - * me express that. Use nvme_alloc_iod to ensure there's enough space - * allocated to store the PRP list. - */ -struct nvme_iod { - void *private; /* For the use of the submitter of the I/O */ - int npages; /* In the PRP list. 0 means small pool in use */ - int offset; /* Of PRP list */ - int nents; /* Used in scatterlist */ - int length; /* Of data, in bytes */ - dma_addr_t first_dma; - struct scatterlist sg[0]; -}; - static __le64 **iod_list(struct nvme_iod *iod) { return ((void *)iod) + iod->offset; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index bde44c1fd213..6f899add14ab 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -493,4 +493,64 @@ struct nvme_admin_cmd { #define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) #define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) +#ifdef __KERNEL__ +#include + +#define NVME_IO_TIMEOUT (5 * HZ) + +/* + * Represents an NVM Express device. Each nvme_dev is a PCI function. + */ +struct nvme_dev { + struct list_head node; + struct nvme_queue **queues; + u32 __iomem *dbs; + struct pci_dev *pci_dev; + struct dma_pool *prp_page_pool; + struct dma_pool *prp_small_pool; + int instance; + int queue_count; + int db_stride; + u32 ctrl_config; + struct msix_entry *entry; + struct nvme_bar __iomem *bar; + struct list_head namespaces; + char serial[20]; + char model[40]; + char firmware_rev[8]; + u32 max_hw_sectors; + u16 oncs; +}; + +/* + * An NVM Express namespace is equivalent to a SCSI LUN + */ +struct nvme_ns { + struct list_head list; + + struct nvme_dev *dev; + struct request_queue *queue; + struct gendisk *disk; + + int ns_id; + int lba_shift; +}; + +/* + * The nvme_iod describes the data in an I/O, including the list of PRP + * entries. You can't see it in this data structure because C doesn't let + * me express that. Use nvme_alloc_iod to ensure there's enough space + * allocated to store the PRP list. + */ +struct nvme_iod { + void *private; /* For the use of the submitter of the I/O */ + int npages; /* In the PRP list. 0 means small pool in use */ + int offset; /* Of PRP list */ + int nents; /* Used in scatterlist */ + int length; /* Of data, in bytes */ + dma_addr_t first_dma; + struct scatterlist sg[0]; +}; +#endif + #endif /* _LINUX_NVME_H */ -- GitLab From f8ebf8409abfdaeeb8c847381629a2a8b8e3d816 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 27 Mar 2013 07:13:41 -0400 Subject: [PATCH 0214/3163] NVMe: Add definitions for format command The SCSI emulation has the ability to send format commands, so we need to add the definition of the command. Also add a missing error code. Signed-off-by: Vishal Verma Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 1 + include/linux/nvme.h | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 1f98040cf677..d0cfb85d5582 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -93,6 +93,7 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(sizeof(struct nvme_features) != 64); + BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); BUILD_BUG_ON(sizeof(struct nvme_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096); BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 6f899add14ab..f1974cab60cf 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -393,6 +393,16 @@ struct nvme_download_firmware { __u32 rsvd12[4]; }; +struct nvme_format_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[4]; + __le32 cdw10; + __u32 rsvd11[5]; +}; + struct nvme_command { union { struct nvme_common_command common; @@ -403,6 +413,7 @@ struct nvme_command { struct nvme_create_sq create_sq; struct nvme_delete_queue delete_queue; struct nvme_download_firmware dlfw; + struct nvme_format_cmd format; struct nvme_dsm_cmd dsm; }; }; @@ -420,6 +431,7 @@ enum { NVME_SC_FUSED_FAIL = 0x9, NVME_SC_FUSED_MISSING = 0xa, NVME_SC_INVALID_NS = 0xb, + NVME_SC_CMD_SEQ_ERROR = 0xc, NVME_SC_LBA_RANGE = 0x80, NVME_SC_CAP_EXCEEDED = 0x81, NVME_SC_NS_NOT_READY = 0x82, -- GitLab From 987d0c8e9ded8d43843d602b6ab890d14745690f Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Sat, 23 Mar 2013 14:58:20 +0400 Subject: [PATCH 0215/3163] scripts/tags.sh: Add magic for OFFSET and DEFINE Add rules for definitions which is generally used in asm-offsets files. Signed-off-by: Kirill V Tkhai CC: Michal Marek CC: Andrew Morton Signed-off-by: Michal Marek --- scripts/tags.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/tags.sh b/scripts/tags.sh index 26a87e68afed..74f02e4dddd2 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -199,7 +199,9 @@ exuberant() --regex-c='/DEFINE_PER_CPU_SHARED_ALIGNED\(([^,]*,\s*)(\w*).*\)/\2/v/' \ --regex-c='/DECLARE_WAIT_QUEUE_HEAD\((\w*)/\1/v/' \ --regex-c='/DECLARE_(TASKLET|WORK|DELAYED_WORK)\((\w*)/\2/v/' \ - --regex-c='/DEFINE_PCI_DEVICE_TABLE\((\w*)/\1/v/' + --regex-c='/DEFINE_PCI_DEVICE_TABLE\((\w*)/\1/v/' \ + --regex-c='/(^\s)OFFSET\((\w*)/\2/v/' \ + --regex-c='/(^\s)DEFINE\((\w*)/\2/v/' all_kconfigs | xargs $1 -a \ --langdef=kconfig --language-force=kconfig \ -- GitLab From 021304907bd06a92cee362c605bd4d9a83bb1927 Mon Sep 17 00:00:00 2001 From: Nikolay Balandin Date: Wed, 6 Mar 2013 15:06:44 +0400 Subject: [PATCH 0216/3163] Add TI TCA9554 to supported devices table Signed-off-by: Nikolay Balandin Acked-by: Kuninori Morimoto Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pcf857x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c index a19b7457a726..a965620c1c2a 100644 --- a/drivers/gpio/gpio-pcf857x.c +++ b/drivers/gpio/gpio-pcf857x.c @@ -45,6 +45,7 @@ static const struct i2c_device_id pcf857x_id[] = { { "pca9675", 16 }, { "max7328", 8 }, { "max7329", 8 }, + { "tca9554", 8 }, { } }; MODULE_DEVICE_TABLE(i2c, pcf857x_id); -- GitLab From 3cbf1822b5fd98eccb641c94c8cd2455fdad9221 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Wed, 6 Mar 2013 13:49:36 -0800 Subject: [PATCH 0217/3163] gpio-sch: Allow for more than 8 lines in the resume well The E6xx (TunnelCreek) CPUs have 9 GPIO lines in the resume well. Update the resume functions to allow for more than 8 GPIO lines, using the core functions as a template. Cc: Grant Likely Cc: Linus Walleij Signed-off-by: Darren Hart Signed-off-by: Linus Walleij --- drivers/gpio/gpio-sch.c | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpio-sch.c b/drivers/gpio/gpio-sch.c index edae963f4625..7e7b52be6e29 100644 --- a/drivers/gpio/gpio-sch.c +++ b/drivers/gpio/gpio-sch.c @@ -125,13 +125,17 @@ static int sch_gpio_resume_direction_in(struct gpio_chip *gc, unsigned gpio_num) { u8 curr_dirs; + unsigned short offset, bit; spin_lock(&gpio_lock); - curr_dirs = inb(gpio_ba + RGIO); + offset = RGIO + gpio_num / 8; + bit = gpio_num % 8; + + curr_dirs = inb(gpio_ba + offset); - if (!(curr_dirs & (1 << gpio_num))) - outb(curr_dirs | (1 << gpio_num) , gpio_ba + RGIO); + if (!(curr_dirs & (1 << bit))) + outb(curr_dirs | (1 << bit), gpio_ba + offset); spin_unlock(&gpio_lock); return 0; @@ -139,22 +143,31 @@ static int sch_gpio_resume_direction_in(struct gpio_chip *gc, static int sch_gpio_resume_get(struct gpio_chip *gc, unsigned gpio_num) { - return !!(inb(gpio_ba + RGLV) & (1 << gpio_num)); + unsigned short offset, bit; + + offset = RGLV + gpio_num / 8; + bit = gpio_num % 8; + + return !!(inb(gpio_ba + offset) & (1 << bit)); } static void sch_gpio_resume_set(struct gpio_chip *gc, unsigned gpio_num, int val) { u8 curr_vals; + unsigned short offset, bit; spin_lock(&gpio_lock); - curr_vals = inb(gpio_ba + RGLV); + offset = RGLV + gpio_num / 8; + bit = gpio_num % 8; + + curr_vals = inb(gpio_ba + offset); if (val) - outb(curr_vals | (1 << gpio_num), gpio_ba + RGLV); + outb(curr_vals | (1 << bit), gpio_ba + offset); else - outb((curr_vals & ~(1 << gpio_num)), gpio_ba + RGLV); + outb((curr_vals & ~(1 << bit)), gpio_ba + offset); spin_unlock(&gpio_lock); } @@ -163,14 +176,18 @@ static int sch_gpio_resume_direction_out(struct gpio_chip *gc, unsigned gpio_num, int val) { u8 curr_dirs; + unsigned short offset, bit; sch_gpio_resume_set(gc, gpio_num, val); + offset = RGIO + gpio_num / 8; + bit = gpio_num % 8; + spin_lock(&gpio_lock); - curr_dirs = inb(gpio_ba + RGIO); - if (curr_dirs & (1 << gpio_num)) - outb(curr_dirs & ~(1 << gpio_num), gpio_ba + RGIO); + curr_dirs = inb(gpio_ba + offset); + if (curr_dirs & (1 << bit)) + outb(curr_dirs & ~(1 << bit), gpio_ba + offset); spin_unlock(&gpio_lock); return 0; -- GitLab From 61d793bbfb76ea6740dd1c1a4f2cdac57a0c1c5c Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 7 Mar 2013 10:48:19 +0200 Subject: [PATCH 0218/3163] gpio/gpio-ich: make ichx_gpio_check_available() return a pure boolean value It is more readable for humans to use double-bang (!!) to convert the value to pure boolean before it is returned. Signed-off-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/gpio/gpio-ich.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-ich.c b/drivers/gpio/gpio-ich.c index 31682178c59b..2829d6d50026 100644 --- a/drivers/gpio/gpio-ich.c +++ b/drivers/gpio/gpio-ich.c @@ -130,7 +130,7 @@ static int ichx_read_bit(int reg, unsigned nr) static bool ichx_gpio_check_available(struct gpio_chip *gpio, unsigned nr) { - return ichx_priv.use_gpio & (1 << (nr / 32)); + return !!(ichx_priv.use_gpio & (1 << (nr / 32))); } static int ichx_gpio_direction_input(struct gpio_chip *gpio, unsigned nr) -- GitLab From 977d16b87a78844f090af0565cbd2e3a94fd6337 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 8 Mar 2013 14:38:12 +0200 Subject: [PATCH 0219/3163] gpio-lynxpoint: Add X86 dependency and io-port handling header. Lynxpoint gpio driver uses X86 specific io-ports to control gpios Signed-off-by: Mathias Nyman Reviewed-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/gpio/Kconfig | 2 +- drivers/gpio/gpio-lynxpoint.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 93aaadf99f28..704d01d67522 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -303,7 +303,7 @@ config GPIO_GE_FPGA config GPIO_LYNXPOINT bool "Intel Lynxpoint GPIO support" - depends on ACPI + depends on ACPI && X86 select IRQ_DOMAIN help driver for GPIO functionality on Intel Lynxpoint PCH chipset diff --git a/drivers/gpio/gpio-lynxpoint.c b/drivers/gpio/gpio-lynxpoint.c index 3472b05ac512..86c17de87692 100644 --- a/drivers/gpio/gpio-lynxpoint.c +++ b/drivers/gpio/gpio-lynxpoint.c @@ -32,6 +32,7 @@ #include #include #include +#include /* LynxPoint chipset has support for 94 gpio pins */ -- GitLab From 1cfe6f8cb1078039a0f8dfc833a4ae752220d2db Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Mar 2013 20:06:30 +0900 Subject: [PATCH 0220/3163] gpio: em: Make use of devm functions Update the Emma Mobile GPIO driver to make use of devm functions. This simplifies the error handling and makes the code more compact. Signed-off-by: Magnus Damm Signed-off-by: Linus Walleij --- drivers/gpio/gpio-em.c | 53 +++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 34 deletions(-) diff --git a/drivers/gpio/gpio-em.c b/drivers/gpio/gpio-em.c index d05369735857..5cba855638bf 100644 --- a/drivers/gpio/gpio-em.c +++ b/drivers/gpio/gpio-em.c @@ -245,7 +245,7 @@ static int em_gio_probe(struct platform_device *pdev) const char *name = dev_name(&pdev->dev); int ret; - p = kzalloc(sizeof(*p), GFP_KERNEL); + p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL); if (!p) { dev_err(&pdev->dev, "failed to allocate driver data\n"); ret = -ENOMEM; @@ -264,21 +264,23 @@ static int em_gio_probe(struct platform_device *pdev) if (!io[0] || !io[1] || !irq[0] || !irq[1]) { dev_err(&pdev->dev, "missing IRQ or IOMEM\n"); ret = -EINVAL; - goto err1; + goto err0; } - p->base0 = ioremap_nocache(io[0]->start, resource_size(io[0])); + p->base0 = devm_ioremap_nocache(&pdev->dev, io[0]->start, + resource_size(io[0])); if (!p->base0) { dev_err(&pdev->dev, "failed to remap low I/O memory\n"); ret = -ENXIO; - goto err1; + goto err0; } - p->base1 = ioremap_nocache(io[1]->start, resource_size(io[1])); + p->base1 = devm_ioremap_nocache(&pdev->dev, io[1]->start, + resource_size(io[1])); if (!p->base1) { dev_err(&pdev->dev, "failed to remap high I/O memory\n"); ret = -ENXIO; - goto err2; + goto err0; } if (!pdata) { @@ -289,13 +291,13 @@ static int em_gio_probe(struct platform_device *pdev) &pdata->number_of_pins)) { dev_err(&pdev->dev, "Missing ngpios OF property\n"); ret = -EINVAL; - goto err3; + goto err0; } ret = of_alias_get_id(pdev->dev.of_node, "gpio"); if (ret < 0) { dev_err(&pdev->dev, "Couldn't get OF id\n"); - goto err3; + goto err0; } pdata->gpio_base = ret * 32; /* 32 GPIOs per instance */ } @@ -327,40 +329,32 @@ static int em_gio_probe(struct platform_device *pdev) if (!p->irq_domain) { ret = -ENXIO; dev_err(&pdev->dev, "cannot initialize irq domain\n"); - goto err3; + goto err0; } - if (request_irq(irq[0]->start, em_gio_irq_handler, 0, name, p)) { + if (devm_request_irq(&pdev->dev, irq[0]->start, + em_gio_irq_handler, 0, name, p)) { dev_err(&pdev->dev, "failed to request low IRQ\n"); ret = -ENOENT; - goto err4; + goto err1; } - if (request_irq(irq[1]->start, em_gio_irq_handler, 0, name, p)) { + if (devm_request_irq(&pdev->dev, irq[1]->start, + em_gio_irq_handler, 0, name, p)) { dev_err(&pdev->dev, "failed to request high IRQ\n"); ret = -ENOENT; - goto err5; + goto err1; } ret = gpiochip_add(gpio_chip); if (ret) { dev_err(&pdev->dev, "failed to add GPIO controller\n"); - goto err6; + goto err1; } return 0; -err6: - free_irq(irq[1]->start, pdev); -err5: - free_irq(irq[0]->start, pdev); -err4: - irq_domain_remove(p->irq_domain); -err3: - iounmap(p->base1); -err2: - iounmap(p->base0); err1: - kfree(p); + irq_domain_remove(p->irq_domain); err0: return ret; } @@ -368,22 +362,13 @@ static int em_gio_probe(struct platform_device *pdev) static int em_gio_remove(struct platform_device *pdev) { struct em_gio_priv *p = platform_get_drvdata(pdev); - struct resource *irq[2]; int ret; ret = gpiochip_remove(&p->gpio_chip); if (ret) return ret; - irq[0] = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - irq[1] = platform_get_resource(pdev, IORESOURCE_IRQ, 1); - - free_irq(irq[1]->start, pdev); - free_irq(irq[0]->start, pdev); irq_domain_remove(p->irq_domain); - iounmap(p->base1); - iounmap(p->base0); - kfree(p); return 0; } -- GitLab From 808c513ee64be7cec6a7dcfff1687bc0995362bc Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Mon, 4 Mar 2013 14:39:39 +0530 Subject: [PATCH 0221/3163] gpio/vt8500: Convert to devm_ioremap_resource() Use the newly introduced devm_ioremap_resource() instead of devm_request_and_ioremap() which provides more consistent error handling. devm_ioremap_resource() provides its own error messages; so all explicit error messages can be removed from the failure code paths. Signed-off-by: Sachin Kamat Reviewed-by: Thierry Reding Acked-by: Tony Prisk Signed-off-by: Linus Walleij --- drivers/gpio/gpio-vt8500.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-vt8500.c b/drivers/gpio/gpio-vt8500.c index 81683ca35ac1..b2d8d6f0c522 100644 --- a/drivers/gpio/gpio-vt8500.c +++ b/drivers/gpio/gpio-vt8500.c @@ -309,11 +309,9 @@ static int vt8500_gpio_probe(struct platform_device *pdev) return -ENODEV; } - gpio_base = devm_request_and_ioremap(&pdev->dev, res); - if (!gpio_base) { - dev_err(&pdev->dev, "Unable to map GPIO registers\n"); - return -ENOMEM; - } + gpio_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(gpio_base)) + return PTR_ERR(gpio_base); ret = vt8500_add_chips(pdev, gpio_base, of_id->data); -- GitLab From 24bb3813d525322e007c47134cd476f04dbb554f Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Fri, 15 Mar 2013 18:14:01 +0900 Subject: [PATCH 0222/3163] gpio: adp5520: use devm_kzalloc() Use devm_kzalloc() to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Linus Walleij --- drivers/gpio/gpio-adp5520.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-adp5520.c b/drivers/gpio/gpio-adp5520.c index 8afa95f831b1..f33f78dcadaa 100644 --- a/drivers/gpio/gpio-adp5520.c +++ b/drivers/gpio/gpio-adp5520.c @@ -105,7 +105,7 @@ static int adp5520_gpio_probe(struct platform_device *pdev) return -ENODEV; } - dev = kzalloc(sizeof(*dev), GFP_KERNEL); + dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL); if (dev == NULL) { dev_err(&pdev->dev, "failed to alloc memory\n"); return -ENOMEM; @@ -163,7 +163,6 @@ static int adp5520_gpio_probe(struct platform_device *pdev) return 0; err: - kfree(dev); return ret; } @@ -180,7 +179,6 @@ static int adp5520_gpio_remove(struct platform_device *pdev) return ret; } - kfree(dev); return 0; } -- GitLab From c3fe2bf4916e85fabc66690c8784bbce3ef4d2b4 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Fri, 15 Mar 2013 18:14:46 +0900 Subject: [PATCH 0223/3163] gpio: max7300: use devm_kzalloc() Use devm_kzalloc() to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Linus Walleij --- drivers/gpio/gpio-max7300.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-max7300.c b/drivers/gpio/gpio-max7300.c index 4b6b9a04e326..40ab6dfb6021 100644 --- a/drivers/gpio/gpio-max7300.c +++ b/drivers/gpio/gpio-max7300.c @@ -41,7 +41,7 @@ static int max7300_probe(struct i2c_client *client, I2C_FUNC_SMBUS_BYTE_DATA)) return -EIO; - ts = kzalloc(sizeof(struct max7301), GFP_KERNEL); + ts = devm_kzalloc(&client->dev, sizeof(struct max7301), GFP_KERNEL); if (!ts) return -ENOMEM; @@ -50,8 +50,6 @@ static int max7300_probe(struct i2c_client *client, ts->dev = &client->dev; ret = __max730x_probe(ts); - if (ret) - kfree(ts); return ret; } -- GitLab From 4cb06cd58c23f6520dc00d67e62267ef17d69206 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Fri, 15 Mar 2013 18:15:06 +0900 Subject: [PATCH 0224/3163] gpio: max7301: use devm_kzalloc() Use devm_kzalloc() to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Linus Walleij --- drivers/gpio/gpio-max7301.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-max7301.c b/drivers/gpio/gpio-max7301.c index c6c535c1310e..6e1c984a75d4 100644 --- a/drivers/gpio/gpio-max7301.c +++ b/drivers/gpio/gpio-max7301.c @@ -61,7 +61,7 @@ static int max7301_probe(struct spi_device *spi) if (ret < 0) return ret; - ts = kzalloc(sizeof(struct max7301), GFP_KERNEL); + ts = devm_kzalloc(&spi->dev, sizeof(struct max7301), GFP_KERNEL); if (!ts) return -ENOMEM; @@ -70,8 +70,6 @@ static int max7301_probe(struct spi_device *spi) ts->dev = &spi->dev; ret = __max730x_probe(ts); - if (ret) - kfree(ts); return ret; } -- GitLab From b09638a4d6b8032f6569e6ec8a5cc373ced045aa Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Fri, 15 Mar 2013 18:15:28 +0900 Subject: [PATCH 0225/3163] gpio: max732x: use devm_kzalloc() Use devm_kzalloc() to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Linus Walleij --- drivers/gpio/gpio-max732x.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-max732x.c b/drivers/gpio/gpio-max732x.c index 1e0467ce4c37..d4b51b163b03 100644 --- a/drivers/gpio/gpio-max732x.c +++ b/drivers/gpio/gpio-max732x.c @@ -589,7 +589,8 @@ static int max732x_probe(struct i2c_client *client, return -EINVAL; } - chip = kzalloc(sizeof(struct max732x_chip), GFP_KERNEL); + chip = devm_kzalloc(&client->dev, sizeof(struct max732x_chip), + GFP_KERNEL); if (chip == NULL) return -ENOMEM; chip->client = client; @@ -647,7 +648,6 @@ static int max732x_probe(struct i2c_client *client, out_failed: max732x_irq_teardown(chip); - kfree(chip); return ret; } @@ -680,7 +680,6 @@ static int max732x_remove(struct i2c_client *client) if (chip->client_dummy) i2c_unregister_device(chip->client_dummy); - kfree(chip); return 0; } -- GitLab From 632d8e55d1f0304579315669bfadcab7352973fd Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Fri, 15 Mar 2013 18:15:49 +0900 Subject: [PATCH 0226/3163] gpio: mc33880: use devm_kzalloc() Use devm_kzalloc() to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mc33880.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-mc33880.c b/drivers/gpio/gpio-mc33880.c index 6a8fdc26ae6a..b16148913a78 100644 --- a/drivers/gpio/gpio-mc33880.c +++ b/drivers/gpio/gpio-mc33880.c @@ -101,7 +101,7 @@ static int mc33880_probe(struct spi_device *spi) if (ret < 0) return ret; - mc = kzalloc(sizeof(struct mc33880), GFP_KERNEL); + mc = devm_kzalloc(&spi->dev, sizeof(struct mc33880), GFP_KERNEL); if (!mc) return -ENOMEM; @@ -143,7 +143,6 @@ static int mc33880_probe(struct spi_device *spi) exit_destroy: dev_set_drvdata(&spi->dev, NULL); mutex_destroy(&mc->lock); - kfree(mc); return ret; } @@ -159,10 +158,9 @@ static int mc33880_remove(struct spi_device *spi) dev_set_drvdata(&spi->dev, NULL); ret = gpiochip_remove(&mc->chip); - if (!ret) { + if (!ret) mutex_destroy(&mc->lock); - kfree(mc); - } else + else dev_err(&spi->dev, "Failed to remove the GPIO controller: %d\n", ret); -- GitLab From f39f54af032ce900815d0d718df5f1717eed50fe Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Fri, 15 Mar 2013 18:16:11 +0900 Subject: [PATCH 0227/3163] gpio: pcf857x: use devm_kzalloc() Use devm_kzalloc() to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pcf857x.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c index a965620c1c2a..e8faf53f3875 100644 --- a/drivers/gpio/gpio-pcf857x.c +++ b/drivers/gpio/gpio-pcf857x.c @@ -268,7 +268,7 @@ static int pcf857x_probe(struct i2c_client *client, } /* Allocate, initialize, and register this gpio_chip. */ - gpio = kzalloc(sizeof *gpio, GFP_KERNEL); + gpio = devm_kzalloc(&client->dev, sizeof(*gpio), GFP_KERNEL); if (!gpio) return -ENOMEM; @@ -391,7 +391,6 @@ static int pcf857x_probe(struct i2c_client *client, if (pdata && client->irq) pcf857x_irq_domain_cleanup(gpio); - kfree(gpio); return status; } @@ -416,9 +415,7 @@ static int pcf857x_remove(struct i2c_client *client) pcf857x_irq_domain_cleanup(gpio); status = gpiochip_remove(&gpio->chip); - if (status == 0) - kfree(gpio); - else + if (status) dev_err(&client->dev, "%s --> %d\n", "remove", status); return status; } -- GitLab From 30db2bd1c88cedf73e1eb753225249130cc00970 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Fri, 15 Mar 2013 18:16:49 +0900 Subject: [PATCH 0228/3163] gpio: mc33880: use dev_err() instead of printk() dev_err() is more preferred than printk(). Signed-off-by: Jingoo Han Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mc33880.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mc33880.c b/drivers/gpio/gpio-mc33880.c index b16148913a78..33958799bd3d 100644 --- a/drivers/gpio/gpio-mc33880.c +++ b/drivers/gpio/gpio-mc33880.c @@ -130,7 +130,8 @@ static int mc33880_probe(struct spi_device *spi) ret = mc33880_write_config(mc); if (ret) { - printk(KERN_ERR "Failed writing to " DRIVER_NAME ": %d\n", ret); + dev_err(&spi->dev, "Failed writing to " DRIVER_NAME ": %d\n", + ret); goto exit_destroy; } -- GitLab From 6c0cf42be3f8e1039d3f31c2e8e16b4d375527c7 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Fri, 15 Mar 2013 18:17:18 +0900 Subject: [PATCH 0229/3163] gpio: 74x164: use spi_get_drvdata() and spi_set_drvdata() Use the wrapper functions for getting and setting the driver data using spi_device instead of using dev_{get|set}_drvdata with &spi->dev, so we can directly pass a struct spi_device. Signed-off-by: Jingoo Han Signed-off-by: Linus Walleij --- drivers/gpio/gpio-74x164.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index 464be961f605..721607904d0a 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -137,7 +137,7 @@ static int gen_74x164_probe(struct spi_device *spi) mutex_init(&chip->lock); - dev_set_drvdata(&spi->dev, chip); + spi_set_drvdata(spi, chip); chip->spi = spi; @@ -176,7 +176,7 @@ static int gen_74x164_probe(struct spi_device *spi) return ret; exit_destroy: - dev_set_drvdata(&spi->dev, NULL); + spi_set_drvdata(spi, NULL); mutex_destroy(&chip->lock); return ret; } @@ -186,11 +186,11 @@ static int gen_74x164_remove(struct spi_device *spi) struct gen_74x164_chip *chip; int ret; - chip = dev_get_drvdata(&spi->dev); + chip = spi_get_drvdata(spi); if (chip == NULL) return -ENODEV; - dev_set_drvdata(&spi->dev, NULL); + spi_set_drvdata(spi, NULL); ret = gpiochip_remove(&chip->gpio_chip); if (!ret) -- GitLab From 493294d4a82470c44bf7ac9b21b901fb3e56dc3b Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Fri, 15 Mar 2013 18:17:54 +0900 Subject: [PATCH 0230/3163] gpio: mc33880: use spi_get_drvdata() and spi_set_drvdata() Use the wrapper functions for getting and setting the driver data using spi_device instead of using dev_{get|set}_drvdata with &spi->dev, so we can directly pass a struct spi_device. Signed-off-by: Jingoo Han Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mc33880.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-mc33880.c b/drivers/gpio/gpio-mc33880.c index 33958799bd3d..63a7a1bfb2d9 100644 --- a/drivers/gpio/gpio-mc33880.c +++ b/drivers/gpio/gpio-mc33880.c @@ -107,7 +107,7 @@ static int mc33880_probe(struct spi_device *spi) mutex_init(&mc->lock); - dev_set_drvdata(&spi->dev, mc); + spi_set_drvdata(spi, mc); mc->spi = spi; @@ -142,7 +142,7 @@ static int mc33880_probe(struct spi_device *spi) return ret; exit_destroy: - dev_set_drvdata(&spi->dev, NULL); + spi_set_drvdata(spi, NULL); mutex_destroy(&mc->lock); return ret; } @@ -152,11 +152,11 @@ static int mc33880_remove(struct spi_device *spi) struct mc33880 *mc; int ret; - mc = dev_get_drvdata(&spi->dev); + mc = spi_get_drvdata(spi); if (mc == NULL) return -ENODEV; - dev_set_drvdata(&spi->dev, NULL); + spi_set_drvdata(spi, NULL); ret = gpiochip_remove(&mc->chip); if (!ret) -- GitLab From 9ccb1a26cedf0a03a59f70f270565f3884ec08f6 Mon Sep 17 00:00:00 2001 From: Laurent Navet Date: Wed, 20 Mar 2013 13:15:55 +0100 Subject: [PATCH 0231/3163] gpio: gpiolib-of.c: fix checkpatch error Fix : gpio/gpiolib-of.c:64: ERROR: code indent should use tabs where possible Signed-off-by: Laurent Navet Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index a71a54a3e3f7..8940793912d9 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -61,7 +61,7 @@ static int of_gpiochip_find_and_xlate(struct gpio_chip *gc, void *data) * in flags for the GPIO. */ int of_get_named_gpio_flags(struct device_node *np, const char *propname, - int index, enum of_gpio_flags *flags) + int index, enum of_gpio_flags *flags) { /* Return -EPROBE_DEFER to support probe() functions to be called * later when the GPIO actually becomes available -- GitLab From f4dcd2d9417c2909362d2b42f038ecf1cdf86834 Mon Sep 17 00:00:00 2001 From: Laurent Navet Date: Wed, 20 Mar 2013 13:15:56 +0100 Subject: [PATCH 0232/3163] gpio: gpio-mvebu.c: fix checkpatch errors Fix : gpio/gpio-mvebu.c:120: ERROR: space required before the open parenthesis '(' gpio/gpio-mvebu.c:136: ERROR: space required before the open parenthesis '(' gpio/gpio-mvebu.c:154: ERROR: space required before the open parenthesis '(' gpio/gpio-mvebu.c:404: ERROR: space required before the open parenthesis '(' gpio/gpio-mvebu.c:476: ERROR: "(foo*)" should be "(foo *)" gpio/gpio-mvebu.c:480: ERROR: "(foo*)" should be "(foo *)" gpio/gpio-mvebu.c:484: ERROR: "(foo*)" should be "(foo *)" gpio/gpio-mvebu.c:512: ERROR: space prohibited after that '!' (ctx:BxW) gpio/gpio-mvebu.c:518: ERROR: space prohibited after that '!' (ctx:BxW) gpio/gpio-mvebu.c:518: ERROR: space required before the open brace '{' gpio/gpio-mvebu.c:563: ERROR: space prohibited after that '!' (ctx:BxW) gpio/gpio-mvebu.c:570: ERROR: trailing whitespace gpio/gpio-mvebu.c:577: ERROR: space required before the open parenthesis '(' gpio/gpio-mvebu.c:635: ERROR: space prohibited after that '!' (ctx:BxW) Signed-off-by: Laurent Navet Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mvebu.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 7472182967ce..474823e403fa 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -116,7 +116,7 @@ static inline void __iomem *mvebu_gpioreg_edge_cause(struct mvebu_gpio_chip *mvc { int cpu; - switch(mvchip->soc_variant) { + switch (mvchip->soc_variant) { case MVEBU_GPIO_SOC_VARIANT_ORION: case MVEBU_GPIO_SOC_VARIANT_MV78200: return mvchip->membase + GPIO_EDGE_CAUSE_OFF; @@ -132,7 +132,7 @@ static inline void __iomem *mvebu_gpioreg_edge_mask(struct mvebu_gpio_chip *mvch { int cpu; - switch(mvchip->soc_variant) { + switch (mvchip->soc_variant) { case MVEBU_GPIO_SOC_VARIANT_ORION: return mvchip->membase + GPIO_EDGE_MASK_OFF; case MVEBU_GPIO_SOC_VARIANT_MV78200: @@ -150,7 +150,7 @@ static void __iomem *mvebu_gpioreg_level_mask(struct mvebu_gpio_chip *mvchip) { int cpu; - switch(mvchip->soc_variant) { + switch (mvchip->soc_variant) { case MVEBU_GPIO_SOC_VARIANT_ORION: return mvchip->membase + GPIO_LEVEL_MASK_OFF; case MVEBU_GPIO_SOC_VARIANT_MV78200: @@ -400,7 +400,7 @@ static int mvebu_gpio_irq_set_type(struct irq_data *d, unsigned int type) /* * Configure interrupt polarity. */ - switch(type) { + switch (type) { case IRQ_TYPE_EDGE_RISING: case IRQ_TYPE_LEVEL_HIGH: u = readl_relaxed(mvebu_gpioreg_in_pol(mvchip)); @@ -472,15 +472,15 @@ static void mvebu_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) static struct of_device_id mvebu_gpio_of_match[] = { { .compatible = "marvell,orion-gpio", - .data = (void*) MVEBU_GPIO_SOC_VARIANT_ORION, + .data = (void *) MVEBU_GPIO_SOC_VARIANT_ORION, }, { .compatible = "marvell,mv78200-gpio", - .data = (void*) MVEBU_GPIO_SOC_VARIANT_MV78200, + .data = (void *) MVEBU_GPIO_SOC_VARIANT_MV78200, }, { .compatible = "marvell,armadaxp-gpio", - .data = (void*) MVEBU_GPIO_SOC_VARIANT_ARMADAXP, + .data = (void *) MVEBU_GPIO_SOC_VARIANT_ARMADAXP, }, { /* sentinel */ @@ -507,13 +507,13 @@ static int mvebu_gpio_probe(struct platform_device *pdev) soc_variant = MVEBU_GPIO_SOC_VARIANT_ORION; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (! res) { + if (!res) { dev_err(&pdev->dev, "Cannot get memory resource\n"); return -ENODEV; } mvchip = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_gpio_chip), GFP_KERNEL); - if (! mvchip){ + if (!mvchip) { dev_err(&pdev->dev, "Cannot allocate memory\n"); return -ENOMEM; } @@ -553,21 +553,21 @@ static int mvebu_gpio_probe(struct platform_device *pdev) * per-CPU registers */ if (soc_variant == MVEBU_GPIO_SOC_VARIANT_ARMADAXP) { res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (! res) { + if (!res) { dev_err(&pdev->dev, "Cannot get memory resource\n"); return -ENODEV; } mvchip->percpu_membase = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(mvchip->percpu_membase)) + if (IS_ERR(mvchip->percpu_membase)) return PTR_ERR(mvchip->percpu_membase); } /* * Mask and clear GPIO interrupts. */ - switch(soc_variant) { + switch (soc_variant) { case MVEBU_GPIO_SOC_VARIANT_ORION: writel_relaxed(0, mvchip->membase + GPIO_EDGE_CAUSE_OFF); writel_relaxed(0, mvchip->membase + GPIO_EDGE_MASK_OFF); @@ -625,7 +625,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev) gc = irq_alloc_generic_chip("mvebu_gpio_irq", 2, mvchip->irqbase, mvchip->membase, handle_level_irq); - if (! gc) { + if (!gc) { dev_err(&pdev->dev, "Cannot allocate generic irq_chip\n"); return -ENOMEM; } -- GitLab From e83507b763541cbbdf5a9e047c69755fec52aed9 Mon Sep 17 00:00:00 2001 From: Laurent Navet Date: Wed, 20 Mar 2013 13:15:57 +0100 Subject: [PATCH 0233/3163] gpio: gpio-omap.c: fix checkpatch error Fix : gpio/gpio-omap.c:697: ERROR: space required before the open parenthesis '(' Signed-off-by: Laurent Navet Acked-by: Santosh Shilimkar Signed-off-by: Linus Walleij --- drivers/gpio/gpio-omap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 0d30c7acf0c7..352f9949c054 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -703,7 +703,7 @@ static void gpio_irq_handler(unsigned int irq, struct irq_desc *desc) if (WARN_ON(!isr_reg)) goto exit; - while(1) { + while (1) { u32 isr_saved, level_mask = 0; u32 enabled; -- GitLab From 50e44430c6c1872a761c57161338dea069dc36db Mon Sep 17 00:00:00 2001 From: Laurent Navet Date: Wed, 20 Mar 2013 13:15:58 +0100 Subject: [PATCH 0234/3163] gpio: gpio-pca953x.c: fix checkpatch error Fix : gpio/gpio-pca953x.c:150: ERROR: else should follow close brace '}' Signed-off-by: Laurent Navet Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pca953x.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 24059462c87f..15dbc36340b8 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -146,8 +146,7 @@ static int pca953x_write_regs(struct pca953x_chip *chip, int reg, u8 *val) ret = i2c_smbus_write_i2c_block_data(chip->client, (reg << bank_shift) | REG_ADDR_AI, NBANK(chip), val); - } - else { + } else { switch (chip->chip_type) { case PCA953X_TYPE: ret = i2c_smbus_write_word_data(chip->client, -- GitLab From e37f4af762125c87749cda0efb6c18199e49f0ed Mon Sep 17 00:00:00 2001 From: Laurent Navet Date: Wed, 20 Mar 2013 13:15:59 +0100 Subject: [PATCH 0235/3163] gpio: gpio-pxa.c: fix checkpatch errors Fix : gpio/gpio-pxa.c:605: ERROR: space required after that ',' (ctx:VxV) gpio/gpio-pxa.c:672: ERROR: space prohibited after that open parenthesis '(' Signed-off-by: Laurent Navet Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pxa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index 9cc108d2b770..6d01914538e3 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -602,7 +602,7 @@ static int pxa_gpio_probe(struct platform_device *pdev) for_each_gpio_chip(gpio, c) { writel_relaxed(0, c->regbase + GFER_OFFSET); writel_relaxed(0, c->regbase + GRER_OFFSET); - writel_relaxed(~0,c->regbase + GEDR_OFFSET); + writel_relaxed(~0, c->regbase + GEDR_OFFSET); /* unmask GPIO edge detect for AP side */ if (gpio_is_mmp_type(gpio_type)) writel_relaxed(~0, c->regbase + ED_MASK_OFFSET); @@ -669,7 +669,7 @@ static void pxa_gpio_resume(void) for_each_gpio_chip(gpio, c) { /* restore level with set/clear */ - writel_relaxed( c->saved_gplr, c->regbase + GPSR_OFFSET); + writel_relaxed(c->saved_gplr, c->regbase + GPSR_OFFSET); writel_relaxed(~c->saved_gplr, c->regbase + GPCR_OFFSET); writel_relaxed(c->saved_grer, c->regbase + GRER_OFFSET); -- GitLab From be41cf589b0b7613c845802878afabc7e214cb79 Mon Sep 17 00:00:00 2001 From: Laurent Navet Date: Wed, 20 Mar 2013 13:16:00 +0100 Subject: [PATCH 0236/3163] gpio: gpio-sch.c: fix checkpatch error Fix : gpio/gpio-sch.c:206: ERROR: switch and case should be at the same indent Also remove blank lines Signed-off-by: Laurent Navet Signed-off-by: Linus Walleij --- drivers/gpio/gpio-sch.c | 74 +++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 39 deletions(-) diff --git a/drivers/gpio/gpio-sch.c b/drivers/gpio/gpio-sch.c index 7e7b52be6e29..1e4de16ceb41 100644 --- a/drivers/gpio/gpio-sch.c +++ b/drivers/gpio/gpio-sch.c @@ -221,45 +221,41 @@ static int sch_gpio_probe(struct platform_device *pdev) gpio_ba = res->start; switch (id) { - case PCI_DEVICE_ID_INTEL_SCH_LPC: - sch_gpio_core.base = 0; - sch_gpio_core.ngpio = 10; - - sch_gpio_resume.base = 10; - sch_gpio_resume.ngpio = 4; - - /* - * GPIO[6:0] enabled by default - * GPIO7 is configured by the CMC as SLPIOVR - * Enable GPIO[9:8] core powered gpios explicitly - */ - outb(0x3, gpio_ba + CGEN + 1); - /* - * SUS_GPIO[2:0] enabled by default - * Enable SUS_GPIO3 resume powered gpio explicitly - */ - outb(0x8, gpio_ba + RGEN); - break; - - case PCI_DEVICE_ID_INTEL_ITC_LPC: - sch_gpio_core.base = 0; - sch_gpio_core.ngpio = 5; - - sch_gpio_resume.base = 5; - sch_gpio_resume.ngpio = 9; - break; - - case PCI_DEVICE_ID_INTEL_CENTERTON_ILB: - sch_gpio_core.base = 0; - sch_gpio_core.ngpio = 21; - - sch_gpio_resume.base = 21; - sch_gpio_resume.ngpio = 9; - break; - - default: - err = -ENODEV; - goto err_sch_gpio_core; + case PCI_DEVICE_ID_INTEL_SCH_LPC: + sch_gpio_core.base = 0; + sch_gpio_core.ngpio = 10; + sch_gpio_resume.base = 10; + sch_gpio_resume.ngpio = 4; + /* + * GPIO[6:0] enabled by default + * GPIO7 is configured by the CMC as SLPIOVR + * Enable GPIO[9:8] core powered gpios explicitly + */ + outb(0x3, gpio_ba + CGEN + 1); + /* + * SUS_GPIO[2:0] enabled by default + * Enable SUS_GPIO3 resume powered gpio explicitly + */ + outb(0x8, gpio_ba + RGEN); + break; + + case PCI_DEVICE_ID_INTEL_ITC_LPC: + sch_gpio_core.base = 0; + sch_gpio_core.ngpio = 5; + sch_gpio_resume.base = 5; + sch_gpio_resume.ngpio = 9; + break; + + case PCI_DEVICE_ID_INTEL_CENTERTON_ILB: + sch_gpio_core.base = 0; + sch_gpio_core.ngpio = 21; + sch_gpio_resume.base = 21; + sch_gpio_resume.ngpio = 9; + break; + + default: + err = -ENODEV; + goto err_sch_gpio_core; } sch_gpio_core.dev = &pdev->dev; -- GitLab From 8ab2a6d20ec65e9607254f718bd295dd3361d6c3 Mon Sep 17 00:00:00 2001 From: Laurent Navet Date: Wed, 20 Mar 2013 13:16:01 +0100 Subject: [PATCH 0237/3163] gpio: gpio-stp-xway.c: fix checkpatch error Fix : gpio/gpio-stp-xway.c:220: ERROR: trailing whitespace Signed-off-by: Laurent Navet Signed-off-by: Linus Walleij --- drivers/gpio/gpio-stp-xway.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-stp-xway.c b/drivers/gpio/gpio-stp-xway.c index c20e05151212..04882a911b65 100644 --- a/drivers/gpio/gpio-stp-xway.c +++ b/drivers/gpio/gpio-stp-xway.c @@ -217,7 +217,7 @@ static int xway_stp_probe(struct platform_device *pdev) chip->virt = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(chip->virt)) return PTR_ERR(chip->virt); - + chip->gc.dev = &pdev->dev; chip->gc.label = "stp-xway"; chip->gc.direction_output = xway_stp_dir_out; -- GitLab From e90c636be6afbbd46779d180594bda60bc01a821 Mon Sep 17 00:00:00 2001 From: Laurent Navet Date: Wed, 20 Mar 2013 13:16:02 +0100 Subject: [PATCH 0238/3163] gpio: gpio-tc3589x.c: fix checkpatch errors Fix : gpio/gpio-tc3589x.c:285: ERROR: code indent should use tabs where possible gpio/gpio-tc3589x.c:286: ERROR: code indent should use tabs where possible gpio/gpio-tc3589x.c:287: ERROR: code indent should use tabs where possible gpio/gpio-tc3589x.c:347: ERROR: code indent should use tabs where possible Signed-off-by: Laurent Navet Signed-off-by: Linus Walleij --- drivers/gpio/gpio-tc3589x.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c index c0595bbf3268..d34d80dfb083 100644 --- a/drivers/gpio/gpio-tc3589x.c +++ b/drivers/gpio/gpio-tc3589x.c @@ -282,9 +282,9 @@ static void tc3589x_gpio_irq_unmap(struct irq_domain *d, unsigned int virq) } static struct irq_domain_ops tc3589x_irq_ops = { - .map = tc3589x_gpio_irq_map, - .unmap = tc3589x_gpio_irq_unmap, - .xlate = irq_domain_xlate_twocell, + .map = tc3589x_gpio_irq_map, + .unmap = tc3589x_gpio_irq_unmap, + .xlate = irq_domain_xlate_twocell, }; static int tc3589x_gpio_irq_init(struct tc3589x_gpio *tc3589x_gpio, @@ -344,7 +344,7 @@ static int tc3589x_gpio_probe(struct platform_device *pdev) tc3589x_gpio->chip.base = (pdata) ? pdata->gpio_base : -1; #ifdef CONFIG_OF_GPIO - tc3589x_gpio->chip.of_node = np; + tc3589x_gpio->chip.of_node = np; #endif tc3589x_gpio->irq_base = tc3589x->irq_base ? -- GitLab From 8a29a409671f82a21a56aeb42835f99795d0deb3 Mon Sep 17 00:00:00 2001 From: Laurent Navet Date: Wed, 20 Mar 2013 13:16:03 +0100 Subject: [PATCH 0239/3163] gpio: gpio-timberdale.c: fix checkpatch error Fix : gpio/gpio-timberdale.c:171: ERROR: else should follow close brace '}' Signed-off-by: Laurent Navet Signed-off-by: Linus Walleij --- drivers/gpio/gpio-timberdale.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-timberdale.c b/drivers/gpio/gpio-timberdale.c index 702cca9284f1..43774058b693 100644 --- a/drivers/gpio/gpio-timberdale.c +++ b/drivers/gpio/gpio-timberdale.c @@ -167,8 +167,7 @@ static int timbgpio_irq_type(struct irq_data *d, unsigned trigger) if (ver < 3) { ret = -EINVAL; goto out; - } - else { + } else { flr |= 1 << offset; bflr |= 1 << offset; } -- GitLab From 195c65e83ccc0094bc4143176a1b21de2ba9237b Mon Sep 17 00:00:00 2001 From: Laurent Navet Date: Wed, 20 Mar 2013 13:16:04 +0100 Subject: [PATCH 0240/3163] gpio: gpio-tps65910.c: fix checkpatch error Fix : gpio/gpio-tps65910.c:136: ERROR: space required before the open parenthesis '(' Signed-off-by: Laurent Navet Signed-off-by: Linus Walleij --- drivers/gpio/gpio-tps65910.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-tps65910.c b/drivers/gpio/gpio-tps65910.c index 5083825a0348..06146219d9d2 100644 --- a/drivers/gpio/gpio-tps65910.c +++ b/drivers/gpio/gpio-tps65910.c @@ -133,7 +133,7 @@ static int tps65910_gpio_probe(struct platform_device *pdev) tps65910_gpio->gpio_chip.owner = THIS_MODULE; tps65910_gpio->gpio_chip.label = tps65910->i2c_client->name; - switch(tps65910_chip_id(tps65910)) { + switch (tps65910_chip_id(tps65910)) { case TPS65910: tps65910_gpio->gpio_chip.ngpio = TPS65910_NUM_GPIO; break; -- GitLab From 047b93a35961f7a6561e6f5dcb040738f822b892 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 22 Mar 2013 13:10:09 +0100 Subject: [PATCH 0241/3163] MAX7301 GPIO: Do not force SPI speed when using OF Platform The bit_per_word can be set in the OF Device tree, so no need to force it as with the platform_data when using OF Platform Signed-off-by: Patrick Vasseur Signed-off-by: Christophe Leroy Signed-off-by: Linus Walleij --- drivers/gpio/gpio-max7301.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-max7301.c b/drivers/gpio/gpio-max7301.c index 6e1c984a75d4..3b16ab701630 100644 --- a/drivers/gpio/gpio-max7301.c +++ b/drivers/gpio/gpio-max7301.c @@ -56,7 +56,8 @@ static int max7301_probe(struct spi_device *spi) int ret; /* bits_per_word cannot be configured in platform data */ - spi->bits_per_word = 16; + if (spi->dev.platform_data) + spi->bits_per_word = 16; ret = spi_setup(spi); if (ret < 0) return ret; -- GitLab From a4ba5e1b92b69b9c7a15529657bb1cebed4539f3 Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Sun, 24 Mar 2013 15:45:29 +0100 Subject: [PATCH 0242/3163] gpio: mvebu: add dbg_show function This patch adds a dedicated dbg_show function to the gpio-mvebu driver. In addition to the generic gpiolib informations, this function displays informations related with the specific Marvell registers (blink enable, data in polarity, interrupt masks and cause). Signed-off-by: Simon Guinot Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mvebu.c | 59 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 474823e403fa..f7db3b336343 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -469,6 +469,64 @@ static void mvebu_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) } } +#ifdef CONFIG_DEBUG_FS +#include + +static void mvebu_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) +{ + struct mvebu_gpio_chip *mvchip = + container_of(chip, struct mvebu_gpio_chip, chip); + u32 out, io_conf, blink, in_pol, data_in, cause, edg_msk, lvl_msk; + int i; + + out = readl_relaxed(mvebu_gpioreg_out(mvchip)); + io_conf = readl_relaxed(mvebu_gpioreg_io_conf(mvchip)); + blink = readl_relaxed(mvebu_gpioreg_blink(mvchip)); + in_pol = readl_relaxed(mvebu_gpioreg_in_pol(mvchip)); + data_in = readl_relaxed(mvebu_gpioreg_data_in(mvchip)); + cause = readl_relaxed(mvebu_gpioreg_edge_cause(mvchip)); + edg_msk = readl_relaxed(mvebu_gpioreg_edge_mask(mvchip)); + lvl_msk = readl_relaxed(mvebu_gpioreg_level_mask(mvchip)); + + for (i = 0; i < chip->ngpio; i++) { + const char *label; + u32 msk; + bool is_out; + + label = gpiochip_is_requested(chip, i); + if (!label) + continue; + + msk = 1 << i; + is_out = !(io_conf & msk); + + seq_printf(s, " gpio-%-3d (%-20.20s)", chip->base + i, label); + + if (is_out) { + seq_printf(s, " out %s %s\n", + out & msk ? "hi" : "lo", + blink & msk ? "(blink )" : ""); + continue; + } + + seq_printf(s, " in %s (act %s) - IRQ", + (data_in ^ in_pol) & msk ? "hi" : "lo", + in_pol & msk ? "lo" : "hi"); + if (!((edg_msk | lvl_msk) & msk)) { + seq_printf(s, " disabled\n"); + continue; + } + if (edg_msk & msk) + seq_printf(s, " edge "); + if (lvl_msk & msk) + seq_printf(s, " level"); + seq_printf(s, " (%s)\n", cause & msk ? "pending" : "clear "); + } +} +#else +#define mvebu_gpio_dbg_show NULL +#endif + static struct of_device_id mvebu_gpio_of_match[] = { { .compatible = "marvell,orion-gpio", @@ -543,6 +601,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev) mvchip->chip.ngpio = ngpios; mvchip->chip.can_sleep = 0; mvchip->chip.of_node = np; + mvchip->chip.dbg_show = mvebu_gpio_dbg_show; spin_lock_init(&mvchip->lock); mvchip->membase = devm_ioremap_resource(&pdev->dev, res); -- GitLab From 43158441934fd9f1d2a2434c9eec1b682391a49b Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 16 Mar 2013 12:21:12 +0800 Subject: [PATCH 0243/3163] gpio: samsung: Add terminating entry for exynos_pinctrl_ids The of_device_id table is supposed to be zero-terminated. Signed-off-by: Axel Lin Acked-by: Grant Likely Acked-by: Kukjin Kim Signed-off-by: Linus Walleij --- drivers/gpio/gpio-samsung.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-samsung.c b/drivers/gpio/gpio-samsung.c index b3643ff007e4..c4b51d820417 100644 --- a/drivers/gpio/gpio-samsung.c +++ b/drivers/gpio/gpio-samsung.c @@ -3025,6 +3025,7 @@ static __init int samsung_gpiolib_init(void) { .compatible = "samsung,exynos4210-pinctrl", }, { .compatible = "samsung,exynos4x12-pinctrl", }, { .compatible = "samsung,exynos5440-pinctrl", }, + { } }; for_each_matching_node(pctrl_np, exynos_pinctrl_ids) if (pctrl_np && of_device_is_available(pctrl_np)) -- GitLab From b949be5857a4033e00fed67b707774f52619ce60 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Wed, 27 Mar 2013 14:08:33 +0100 Subject: [PATCH 0244/3163] idr: document exit conditions on idr_for_each_entry better And some manual common subexpression elimination which may help the compiler produce smaller code. Signed-off-by: George Spelvin Signed-off-by: Philipp Reisner Signed-off-by: Jens Axboe --- include/linux/idr.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/linux/idr.h b/include/linux/idr.h index 2640c7e99e51..6ece0583362a 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -122,11 +122,13 @@ static inline void *idr_find(struct idr *idr, int id) * @idp: idr handle * @entry: the type * to use as cursor * @id: id entry's key + * + * @entry and @id do not need to be initialized before the loop, and + * after normal terminatinon @entry is left with the value NULL. This + * is convenient for a "not found" value. */ -#define idr_for_each_entry(idp, entry, id) \ - for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ - entry != NULL; \ - ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) +#define idr_for_each_entry(idp, entry, id) \ + for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id) /* * Don't use the following functions. These exist only to suppress -- GitLab From 2d56a974f36ffd1d00aa897bd55e28079aa9e5b7 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 27 Mar 2013 14:08:34 +0100 Subject: [PATCH 0245/3163] drbd: reset ap_in_flight counter for new connections Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1921871ca9a8..cd172b490a95 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -849,6 +849,7 @@ int drbd_connected(struct drbd_conf *mdev) err = drbd_send_current_state(mdev); clear_bit(USE_DEGR_WFC_T, &mdev->flags); clear_bit(RESIZE_PENDING, &mdev->flags); + atomic_set(&mdev->ap_in_flight, 0); mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */ return err; } -- GitLab From a700471bf335965e7603273fd51034415553246a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 27 Mar 2013 14:08:35 +0100 Subject: [PATCH 0246/3163] drbd: abort start of resync early, if it raced with connection breakage We've seen a spurious full resync, because a connection breakage raced with drbd_start_resync(, C_SYNC_TARGET), and the resulting state change request intended to start the resync ended up looking like a local invalidate. Fix: Double check the state inside the lock, and don't even request that state change, if we had connection or IO problems. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_worker.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index f41e224caa7c..7f51f88b0a80 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1653,7 +1653,9 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) clear_bit(B_RS_H_DONE, &mdev->flags); write_lock_irq(&global_state_lock); - if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { + /* Did some connection breakage or IO error race with us? */ + if (mdev->state.conn < C_CONNECTED + || !get_ldev_if_state(mdev, D_NEGOTIATING)) { write_unlock_irq(&global_state_lock); mutex_unlock(mdev->state_mutex); return; -- GitLab From 9376d9f8b97f20df5d30c83713652c3118b31534 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 27 Mar 2013 14:08:36 +0100 Subject: [PATCH 0247/3163] drbd: move invalidating the whole bitmap out of after_state ch() To avoid other state change requests, after passing through sanitize_state(), to be mistaken for an invalidate, move the "set all bits as out-of-sync" into the invalidate path. Make invalidate and invalidate-remote behave consistently wrt. current connection state (need either an established replication link, or really be disconnected). Also mention that in the documentation. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 36 ++++++++++++++++++--------------- drivers/block/drbd/drbd_state.c | 7 ------- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 42fda4ae2f87..c49bda7918b3 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2448,19 +2448,23 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); - if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION) - retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); - - while (retcode == SS_NEED_CONNECTION) { - spin_lock_irq(&mdev->tconn->req_lock); - if (mdev->state.conn < C_CONNECTED) - retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->tconn->req_lock); - - if (retcode != SS_NEED_CONNECTION) - break; - - retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); + /* If that did not work, try again, + * but log failures this time (implicit CS_VERBOSE). + * + * If we happen to be C_STANDALONE R_SECONDARY, + * just change to D_INCONSISTENT, and set all bits in the bitmap. + * Otherwise, we just fail, to avoid races with the resync handshake. + */ + if (retcode < SS_SUCCESS) { + if (mdev->state.conn == C_STANDALONE && mdev->state.role == R_SECONDARY) { + retcode = drbd_request_state(mdev, NS(disk, D_INCONSISTENT)); + if (retcode >= SS_SUCCESS) { + if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, + "set_n_write from invalidate", BM_LOCKED_MASK)) + retcode = ERR_IO_MD_DISK; + } + } else + retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); } drbd_resume_io(mdev); @@ -2517,9 +2521,9 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); if (retcode < SS_SUCCESS) { - if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) { - /* The peer will get a resync upon connect anyways. - * Just make that into a full resync. */ + if (mdev->state.conn == C_STANDALONE && mdev->state.role == R_PRIMARY) { + /* The peer will get a resync upon connect anyways. Just make that + into a full resync. */ retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); if (retcode >= SS_SUCCESS) { if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 0fe220cfb9e9..3bc686f48b53 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1377,13 +1377,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED); - /* We are invalidating our self... */ - if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && - os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) - /* other bitmap operation expected during this phase */ - drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, - "set_n_write from invalidate", BM_LOCKED_MASK); - /* first half of local IO error, failure to attach, * or administrative detach */ if (os.disk != D_FAILED && ns.disk == D_FAILED) { -- GitLab From 5c4f13d991e69cb715ddc2b6a9bbecead7b02c9e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 27 Mar 2013 14:08:37 +0100 Subject: [PATCH 0248/3163] drbd: fix effective error returned when refusing an invalidate Since commit drbd: Disallow the peer_disk_state to be D_OUTDATED while connected trying to invalidate a disconnected Primary returned an error code that did not really match the situation: "Refusing to be Outdated while Connected" Insert two more specific conditions into is_valid_state(), changing that to "Need access to UpToDate data", respectively "Need a connection to start verify or resync". Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_state.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 3bc686f48b53..22e259f34370 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -570,6 +570,13 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) mdev->tconn->agreed_pro_version < 88) rv = SS_NOT_SUPPORTED; + else if (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) + rv = SS_NO_UP_TO_DATE_DISK; + + else if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) && + ns.pdsk == D_UNKNOWN) + rv = SS_NEED_CONNECTION; + else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) rv = SS_CONNECTED_OUTDATES; -- GitLab From 0b2dafcd9f8fd38d00398dd3da88225ad1e99726 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 27 Mar 2013 14:08:38 +0100 Subject: [PATCH 0249/3163] drbd: drop now useless duplicate state request from invalidate Patch best viewed with git diff --ignore-space-change. Now that we attempt the fallback to local bitmap operation only when disconnected, we can safely drop the extra "silent" state request from both invalidate and invalidate-remote. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 62 ++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 34 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index c49bda7918b3..56bafdcd943e 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2446,26 +2446,19 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); drbd_flush_workqueue(mdev); - retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); - - /* If that did not work, try again, - * but log failures this time (implicit CS_VERBOSE). - * - * If we happen to be C_STANDALONE R_SECONDARY, - * just change to D_INCONSISTENT, and set all bits in the bitmap. - * Otherwise, we just fail, to avoid races with the resync handshake. + /* If we happen to be C_STANDALONE R_SECONDARY, just change to + * D_INCONSISTENT, and set all bits in the bitmap. Otherwise, + * try to start a resync handshake as sync target for full sync. */ - if (retcode < SS_SUCCESS) { - if (mdev->state.conn == C_STANDALONE && mdev->state.role == R_SECONDARY) { - retcode = drbd_request_state(mdev, NS(disk, D_INCONSISTENT)); - if (retcode >= SS_SUCCESS) { - if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, - "set_n_write from invalidate", BM_LOCKED_MASK)) - retcode = ERR_IO_MD_DISK; - } - } else - retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); - } + if (mdev->state.conn == C_STANDALONE && mdev->state.role == R_SECONDARY) { + retcode = drbd_request_state(mdev, NS(disk, D_INCONSISTENT)); + if (retcode >= SS_SUCCESS) { + if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, + "set_n_write from invalidate", BM_LOCKED_MASK)) + retcode = ERR_IO_MD_DISK; + } + } else + retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); drbd_resume_io(mdev); out: @@ -2519,21 +2512,22 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); drbd_flush_workqueue(mdev); - retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); - if (retcode < SS_SUCCESS) { - if (mdev->state.conn == C_STANDALONE && mdev->state.role == R_PRIMARY) { - /* The peer will get a resync upon connect anyways. Just make that - into a full resync. */ - retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); - if (retcode >= SS_SUCCESS) { - if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, - "set_n_write from invalidate_peer", - BM_LOCKED_SET_ALLOWED)) - retcode = ERR_IO_MD_DISK; - } - } else - retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); - } + /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits + * in the bitmap. Otherwise, try to start a resync handshake + * as sync source for full sync. + */ + if (mdev->state.conn == C_STANDALONE && mdev->state.role == R_PRIMARY) { + /* The peer will get a resync upon connect anyways. Just make that + into a full resync. */ + retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); + if (retcode >= SS_SUCCESS) { + if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, + "set_n_write from invalidate_peer", + BM_LOCKED_SET_ALLOWED)) + retcode = ERR_IO_MD_DISK; + } + } else + retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); drbd_resume_io(mdev); out: -- GitLab From bb45185de2e90af63a7bc48855de6f870cc216fc Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 27 Mar 2013 14:08:39 +0100 Subject: [PATCH 0250/3163] drbd: fix spurious warning about bitmap being locked from detach Introduced in drbd: always write bitmap on detach, the bitmap bulk writeout on detach was indicating it expected exclusive bitmap access. Where I meant to say: expect no more modifications, but testing/counting is still allowed. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a150b59897a0..67d2bb3bb533 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3412,8 +3412,12 @@ static int w_go_diskless(struct drbd_work *w, int unused) * end up here after a failed attach, before ldev was even assigned. */ if (mdev->bitmap && mdev->ldev) { + /* An interrupted resync or similar is allowed to recounts bits + * while we detach. + * Any modifications would not be expected anymore, though. + */ if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write, - "detach", BM_LOCKED_MASK)) { + "detach", BM_LOCKED_TEST_ALLOWED)) { if (test_bit(WAS_READ_ERROR, &mdev->flags)) { drbd_md_set_flag(mdev, MDF_FULL_SYNC); drbd_md_sync(mdev); -- GitLab From 2bd5ed5d6713594eb2b4d234d01217d506279c7d Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 27 Mar 2013 14:08:40 +0100 Subject: [PATCH 0251/3163] drbd: Fix disconnect to keep the peer disk state if connection breaks during operation The issue was that if the connection broke while we did the gracefull state change to C_DISCONNECTING (C_TEARDOWN), then we returned a success code from the state engine. (SS_CW_NO_NEED) The result of that is that we missed to call the fence-peer script in such a case. Fixed that by introducing a new error code (SS_OUTDATE_WO_CONN). This one should never reach back into user space. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 7 +++++-- drivers/block/drbd/drbd_state.c | 14 +++++++------- drivers/block/drbd/drbd_strings.c | 1 + include/linux/drbd.h | 3 ++- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 56bafdcd943e..39e9a91a8f31 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2198,8 +2198,11 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool for return SS_SUCCESS; case SS_PRIMARY_NOP: /* Our state checking code wants to see the peer outdated. */ - rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, - pdsk, D_OUTDATED), CS_VERBOSE); + rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0); + + if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */ + rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_VERBOSE); + break; case SS_CW_FAILED_BY_PEER: /* The peer probably wants to see us outdated. */ diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 22e259f34370..90c5be2b1d30 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -642,6 +642,10 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_t && os.conn < C_WF_REPORT_PARAMS) rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */ + if (ns.conn == C_DISCONNECTING && ns.pdsk == D_OUTDATED && + os.conn < C_CONNECTED && os.pdsk > D_OUTDATED) + rv = SS_OUTDATE_WO_CONN; + return rv; } @@ -1748,13 +1752,9 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags)) return SS_CW_FAILED_BY_PEER; - rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR; - - if (rv == SS_UNKNOWN_ERROR) - rv = conn_is_valid_transition(tconn, mask, val, 0); - - if (rv == SS_SUCCESS) - rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ + rv = conn_is_valid_transition(tconn, mask, val, 0); + if (rv == SS_SUCCESS && tconn->cstate == C_WF_REPORT_PARAMS) + rv = SS_UNKNOWN_ERROR; /* continue waiting */ return rv; } diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c index 9a664bd27404..58e08ff2b2ce 100644 --- a/drivers/block/drbd/drbd_strings.c +++ b/drivers/block/drbd/drbd_strings.c @@ -89,6 +89,7 @@ static const char *drbd_state_sw_errors[] = { [-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated", [-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change", [-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted", + [-SS_OUTDATE_WO_CONN] = "Need a connection for a graceful disconnect/outdate peer", [-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config", }; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 0c5a18ec322c..316330705fd7 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -319,7 +319,8 @@ enum drbd_state_rv { SS_IN_TRANSIENT_STATE = -18, /* Retry after the next state change */ SS_CONCURRENT_ST_CHG = -19, /* Concurrent cluster side state change! */ SS_O_VOL_PEER_PRI = -20, - SS_AFTER_LAST_ERROR = -21, /* Keep this at bottom */ + SS_OUTDATE_WO_CONN = -21, + SS_AFTER_LAST_ERROR = -22, /* Keep this at bottom */ }; /* from drbd_strings.c */ -- GitLab From 7074e4a745799d521b17775f6d076d84dc7f8c50 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 27 Mar 2013 14:08:41 +0100 Subject: [PATCH 0252/3163] drbd: only fail empty flushes if no good data is reachable We completed empty flushes (blkdev_issue_flush()) with IO error if we lost the local disk, even if we still have an established replication link to a healthy remote disk. Fix this to only report errors to upper layers, if neither local nor remote data is reachable. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 12 ++++++++---- drivers/block/drbd/drbd_req.h | 8 ++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 9f7ff1cb46ff..beefe65764ff 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -263,8 +263,7 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) else root = &mdev->read_requests; drbd_remove_request_interval(root, req); - } else if (!(s & RQ_POSTPONED)) - D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); + } /* Before we can signal completion to the upper layers, * we may need to close the current transfer log epoch. @@ -755,6 +754,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT(req->rq_state & RQ_NET_PENDING); mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK|RQ_NET_DONE); break; + + case QUEUE_AS_DRBD_BARRIER: + start_new_tl_epoch(mdev->tconn); + mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE); + break; }; return rv; @@ -975,8 +979,8 @@ static int drbd_process_write_request(struct drbd_request *req) /* The only size==0 bios we expect are empty flushes. */ D_ASSERT(req->master_bio->bi_rw & REQ_FLUSH); if (remote) - start_new_tl_epoch(mdev->tconn); - return 0; + _req_mod(req, QUEUE_AS_DRBD_BARRIER); + return remote; } if (!remote && !send_oos) diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index c08d22964d06..978cb1addc98 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -88,6 +88,14 @@ enum drbd_req_event { QUEUE_FOR_NET_READ, QUEUE_FOR_SEND_OOS, + /* An empty flush is queued as P_BARRIER, + * which will cause it to complete "successfully", + * even if the local disk flush failed. + * + * Just like "real" requests, empty flushes (blkdev_issue_flush()) will + * only see an error if neither local nor remote data is reachable. */ + QUEUE_AS_DRBD_BARRIER, + SEND_CANCELED, SEND_FAILED, HANDED_OVER_TO_NETWORK, -- GitLab From 94ad0a101415978be04945b2787be1e8e8a874db Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 27 Mar 2013 14:08:42 +0100 Subject: [PATCH 0253/3163] drbd: fix memory leak We forgot to free the disk_conf, so for each attach/detach cycle we leaked 336 bytes. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 67d2bb3bb533..1b93a7262ef7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2819,6 +2819,7 @@ void drbd_free_bc(struct drbd_backing_dev *ldev) blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + kfree(ldev->disk_conf); kfree(ldev); } -- GitLab From a3f8f7dc7ad652cd84c12cb5efa0f7722dff4786 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 27 Mar 2013 14:08:43 +0100 Subject: [PATCH 0254/3163] drbd: validate resync_after dependency on attach already We validated resync_after dependencies, if changed via disk-options. But we did not validate them when first created via attach. We also did not check or cleanup dependencies that used to be correct, but now point to meanwhile removed minor devices. If the drbd_resync_after_valid() validation in disk-options tried to follow a dependency chain in this way, this could lead to NULL pointer dereference. Validate resync_after settings in drbd_adm_attach() already, as well as in drbd_adm_disk_opts(), and and only reject dependency loops. Depending on non-existing disks is allowed and equivalent to no dependency. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 6 ++++++ drivers/block/drbd/drbd_worker.c | 15 ++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 39e9a91a8f31..9e3f441e7e84 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1381,6 +1381,12 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } + write_lock_irq(&global_state_lock); + retcode = drbd_resync_after_valid(mdev, new_disk_conf->resync_after); + write_unlock_irq(&global_state_lock); + if (retcode != NO_ERROR) + goto fail; + rcu_read_lock(); nc = rcu_dereference(mdev->tconn->net_conf); if (nc) { diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 7f51f88b0a80..891c0ecaa292 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1426,7 +1426,7 @@ static int _drbd_may_sync_now(struct drbd_conf *mdev) int resync_after; while (1) { - if (!odev->ldev) + if (!odev->ldev || odev->state.disk == D_DISKLESS) return 1; rcu_read_lock(); resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; @@ -1434,7 +1434,7 @@ static int _drbd_may_sync_now(struct drbd_conf *mdev) if (resync_after == -1) return 1; odev = minor_to_mdev(resync_after); - if (!expect(odev)) + if (!odev) return 1; if ((odev->state.conn >= C_SYNC_SOURCE && odev->state.conn <= C_PAUSED_SYNC_T) || @@ -1516,7 +1516,7 @@ enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor) if (o_minor == -1) return NO_ERROR; - if (o_minor < -1 || minor_to_mdev(o_minor) == NULL) + if (o_minor < -1 || o_minor > MINORMASK) return ERR_RESYNC_AFTER; /* check for loops */ @@ -1525,6 +1525,15 @@ enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor) if (odev == mdev) return ERR_RESYNC_AFTER_CYCLE; + /* You are free to depend on diskless, non-existing, + * or not yet/no longer existing minors. + * We only reject dependency loops. + * We cannot follow the dependency chain beyond a detached or + * missing minor. + */ + if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS) + return NO_ERROR; + rcu_read_lock(); resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; rcu_read_unlock(); -- GitLab From ef57f9e6bb9278720c8a5278728f252ab85d7ac6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 27 Mar 2013 14:08:44 +0100 Subject: [PATCH 0255/3163] drbd: Fix build error when CONFIG_CRYPTO_HMAC is not set Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index cd172b490a95..7af0cc77aa60 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4660,8 +4660,8 @@ static int drbd_do_features(struct drbd_tconn *tconn) #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE) static int drbd_do_auth(struct drbd_tconn *tconn) { - dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n"); - dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n"); + conn_err(tconn, "This kernel was build without CONFIG_CRYPTO_HMAC.\n"); + conn_err(tconn, "You need to disable 'cram-hmac-alg' in drbd.conf.\n"); return -1; } #else -- GitLab From 607f25e56ee0a31e451f6bd8a7109fa1f5dcbe29 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 27 Mar 2013 14:08:45 +0100 Subject: [PATCH 0256/3163] drbd: fix drbd epoch write count for ahead/behind mode The sanity check when receiving P_BARRIER_ACK does expect all write requests with a given req->epoch to have been either all replicated, or all not replicated. Because req->epoch was assigned before calling maybe_pull_ahead(), this expectation was not met, leading to an off-by-one in the sanity check, and further to a "Protocol Error". Fix: move the call to maybe_pull_ahead() a few lines up, and assign req->epoch only after that. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index beefe65764ff..c24379ffd4e3 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -865,8 +865,10 @@ static void maybe_pull_ahead(struct drbd_conf *mdev) bool congested = false; enum drbd_on_congestion on_congestion; + rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); on_congestion = nc ? nc->on_congestion : OC_BLOCK; + rcu_read_unlock(); if (on_congestion == OC_BLOCK || tconn->agreed_pro_version < 96) return; @@ -960,14 +962,8 @@ static int drbd_process_write_request(struct drbd_request *req) struct drbd_conf *mdev = req->w.mdev; int remote, send_oos; - rcu_read_lock(); remote = drbd_should_do_remote(mdev->state); - if (remote) { - maybe_pull_ahead(mdev); - remote = drbd_should_do_remote(mdev->state); - } send_oos = drbd_should_send_out_of_sync(mdev->state); - rcu_read_unlock(); /* Need to replicate writes. Unless it is an empty flush, * which is better mapped to a DRBD P_BARRIER packet, @@ -1087,9 +1083,13 @@ static void drbd_send_and_submit(struct drbd_conf *mdev, struct drbd_request *re * but will re-aquire it before it returns here. * Needs to be before the check on drbd_suspended() */ complete_conflicting_writes(req); + /* no more giving up req_lock from now on! */ + + /* check for congestion, and potentially stop sending + * full data updates, but start sending "dirty bits" only. */ + maybe_pull_ahead(mdev); } - /* no more giving up req_lock from now on! */ if (drbd_suspended(mdev)) { /* push back and retry: */ -- GitLab From 193d01532a730a53cbc74462799dbc43968b97fd Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Wed, 27 Mar 2013 14:08:46 +0100 Subject: [PATCH 0257/3163] drbd: add module_put() on error path in drbd_proc_open() If single_open() fails in drbd_proc_open(), module refcount is left incremented. The patch adds module_put() on the error path. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_proc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 56672a61eb94..30fe0a57f5a0 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -313,8 +313,14 @@ static int drbd_seq_show(struct seq_file *seq, void *v) static int drbd_proc_open(struct inode *inode, struct file *file) { - if (try_module_get(THIS_MODULE)) - return single_open(file, drbd_seq_show, PDE(inode)->data); + int err; + + if (try_module_get(THIS_MODULE)) { + err = single_open(file, drbd_seq_show, PDE(inode)->data); + if (err) + module_put(THIS_MODULE); + return err; + } return -ENODEV; } -- GitLab From 7c689e63a847316c1b2500f86891b0a574ce7e69 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 27 Mar 2013 14:08:47 +0100 Subject: [PATCH 0258/3163] drbd: fix for deadlock when using automatic split-brain-recovery With an automatic after split-brain recovery policy of "after-sb-1pri call-pri-lost-after-sb", when trying to drbd_set_role() to R_SECONDARY, we run into a deadlock. This was first recognized and supposedly fixed by 2009-06-10 "Fixed a deadlock when using automatic split brain recovery when both nodes are" replacing drbd_set_role() with drbd_change_state() in that code-path, but the first hunk of that patch forgets to remove the drbd_set_role(). We apparently only ever tested the "two primaries" case. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7af0cc77aa60..a75c0b134856 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2662,7 +2662,6 @@ static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) if (hg == -1 && mdev->state.role == R_PRIMARY) { enum drbd_state_rv rv2; - drbd_set_role(mdev, R_SECONDARY, 0); /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, * we might be here in C_WF_REPORT_PARAMS which is transient. * we do not need to wait for the after state change work either. */ -- GitLab From 3990e04df085e0561ab34f84731dc5929585c526 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 27 Mar 2013 14:08:48 +0100 Subject: [PATCH 0259/3163] drbd: use sched_setscheduler() It was unnoticed for some time that assigning to current->policy is no longer sufficient to set a real time priority for a kernel thread. Reported-by: Charlie Suffin Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 6 ++++-- include/linux/drbd.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index a75c0b134856..0f449bbf0edf 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -5257,9 +5257,11 @@ int drbd_asender(struct drbd_thread *thi) bool ping_timeout_active = false; struct net_conf *nc; int ping_timeo, tcp_cork, ping_int; + struct sched_param param = { .sched_priority = 2 }; - current->policy = SCHED_RR; /* Make this a realtime task! */ - current->rt_priority = 2; /* more important than all other tasks */ + rv = sched_setscheduler(current, SCHED_RR, ¶m); + if (rv < 0) + conn_err(tconn, "drbd_asender: ERROR set priority, ret=%d\n", rv); while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 316330705fd7..1b4d4ee1168f 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -52,7 +52,7 @@ #endif extern const char *drbd_buildtag(void); -#define REL_VERSION "8.4.2" +#define REL_VERSION "8.4.3" #define API_VERSION 1 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 101 -- GitLab From 0b6ef4164f50698eee536903d69d086add1a7889 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 27 Mar 2013 14:08:49 +0100 Subject: [PATCH 0260/3163] drbd: fix if(); found by kbuild test robot Recently introduced al_begin_io_nonblock() was returning -EBUSY, even when it should return -EWOULDBLOCK. Impact: A few spurious wake_up() calls in prepare_al_transaction_nonblock(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 6afe173d5c2b..6608076dc39e 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -389,7 +389,7 @@ int drbd_al_begin_io_nonblock(struct drbd_conf *mdev, struct drbd_interval *i) if (unlikely(tmp != NULL)) { struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { - if (!test_and_set_bit(BME_PRIORITY, &bm_ext->flags)); + if (!test_and_set_bit(BME_PRIORITY, &bm_ext->flags)) return -EBUSY; return -EWOULDBLOCK; } -- GitLab From 5d0f6131a79adfa1fb51309c5f81a2a4ef879dd4 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Mon, 4 Mar 2013 18:40:58 -0700 Subject: [PATCH 0261/3163] NVMe: Add nvme-scsi.c Translates SCSI commands in SG_IO ioctl to NVMe commands. Uses the scsi-nvme translation spec from nvmexpress.org as reference. Signed-off-by: Vishal Verma Signed-off-by: Matthew Wilcox --- drivers/block/Makefile | 2 +- drivers/block/nvme-core.c | 37 +- drivers/block/nvme-scsi.c | 2941 +++++++++++++++++++++++++++++++++++++ include/linux/nvme.h | 35 + 4 files changed, 2997 insertions(+), 18 deletions(-) create mode 100644 drivers/block/nvme-scsi.c diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 2a41c86d3ad9..ca07399a8d99 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -42,5 +42,5 @@ obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ -nvme-y := nvme-core.o +nvme-y := nvme-core.o nvme-scsi.o swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index d0cfb85d5582..a89f7dbefba0 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -39,7 +39,7 @@ #include #include #include - +#include #include #define NVME_Q_DEPTH 1024 @@ -224,12 +224,12 @@ static void *cancel_cmdid(struct nvme_queue *nvmeq, int cmdid, return ctx; } -static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) +struct nvme_queue *get_nvmeq(struct nvme_dev *dev) { return dev->queues[get_cpu() + 1]; } -static void put_nvmeq(struct nvme_queue *nvmeq) +void put_nvmeq(struct nvme_queue *nvmeq) { put_cpu(); } @@ -290,7 +290,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp) return iod; } -static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) +void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) { const int last_prp = PAGE_SIZE / 8 - 1; int i; @@ -339,9 +339,8 @@ static void bio_completion(struct nvme_dev *dev, void *ctx, } /* length is in bytes. gfp flags indicates whether we may sleep. */ -static int nvme_setup_prps(struct nvme_dev *dev, - struct nvme_common_command *cmd, struct nvme_iod *iod, - int total_len, gfp_t gfp) +int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd, + struct nvme_iod *iod, int total_len, gfp_t gfp) { struct dma_pool *pool; int length = total_len; @@ -512,7 +511,7 @@ static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, return 0; } -static int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns) +int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns) { int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH, special_completion, NVME_IO_TIMEOUT); @@ -715,8 +714,8 @@ static void sync_completion(struct nvme_dev *dev, void *ctx, * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code */ -static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, - struct nvme_command *cmd, u32 *result, unsigned timeout) +int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, + u32 *result, unsigned timeout) { int cmdid; struct sync_cmd_info cmdinfo; @@ -745,7 +744,7 @@ static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, return cmdinfo.status; } -static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, +int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, u32 *result) { return nvme_submit_sync_cmd(dev->queues[0], cmd, result, ADMIN_TIMEOUT); @@ -818,7 +817,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid) return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid); } -static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, +int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, dma_addr_t dma_addr) { struct nvme_command c; @@ -832,7 +831,7 @@ static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, return nvme_submit_admin_cmd(dev, &c, NULL); } -static int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, +int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, dma_addr_t dma_addr, u32 *result) { struct nvme_command c; @@ -846,8 +845,8 @@ static int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, return nvme_submit_admin_cmd(dev, &c, result); } -static int nvme_set_features(struct nvme_dev *dev, unsigned fid, - unsigned dword11, dma_addr_t dma_addr, u32 *result) +int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, + dma_addr_t dma_addr, u32 *result) { struct nvme_command c; @@ -1065,7 +1064,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) return result; } -static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, +struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, unsigned long addr, unsigned length) { int i, err, count, nents, offset; @@ -1121,7 +1120,7 @@ static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, return ERR_PTR(err); } -static void nvme_unmap_user_pages(struct nvme_dev *dev, int write, +void nvme_unmap_user_pages(struct nvme_dev *dev, int write, struct nvme_iod *iod) { int i; @@ -1257,6 +1256,10 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, return nvme_user_admin_cmd(ns->dev, (void __user *)arg); case NVME_IOCTL_SUBMIT_IO: return nvme_submit_io(ns, (void __user *)arg); + case SG_GET_VERSION_NUM: + return nvme_sg_get_version_num((void __user *)arg); + case SG_IO: + return nvme_sg_io(ns, (void __user *)arg); default: return -ENOTTY; } diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c new file mode 100644 index 000000000000..483af3585c92 --- /dev/null +++ b/drivers/block/nvme-scsi.c @@ -0,0 +1,2941 @@ +/* + * NVM Express device driver + * Copyright (c) 2011, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* + * Refer to the SCSI-NVMe Translation spec for details on how + * each command is translated. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +static int sg_version_num = 30534; /* 2 digits for each component */ + +#define SNTI_TRANSLATION_SUCCESS 0 +#define SNTI_INTERNAL_ERROR 1 + +/* VPD Page Codes */ +#define VPD_SUPPORTED_PAGES 0x00 +#define VPD_SERIAL_NUMBER 0x80 +#define VPD_DEVICE_IDENTIFIERS 0x83 +#define VPD_EXTENDED_INQUIRY 0x86 +#define VPD_BLOCK_DEV_CHARACTERISTICS 0xB1 + +/* CDB offsets */ +#define REPORT_LUNS_CDB_ALLOC_LENGTH_OFFSET 6 +#define REPORT_LUNS_SR_OFFSET 2 +#define READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET 10 +#define REQUEST_SENSE_CDB_ALLOC_LENGTH_OFFSET 4 +#define REQUEST_SENSE_DESC_OFFSET 1 +#define REQUEST_SENSE_DESC_MASK 0x01 +#define DESCRIPTOR_FORMAT_SENSE_DATA_TYPE 1 +#define INQUIRY_EVPD_BYTE_OFFSET 1 +#define INQUIRY_PAGE_CODE_BYTE_OFFSET 2 +#define INQUIRY_EVPD_BIT_MASK 1 +#define INQUIRY_CDB_ALLOCATION_LENGTH_OFFSET 3 +#define START_STOP_UNIT_CDB_IMMED_OFFSET 1 +#define START_STOP_UNIT_CDB_IMMED_MASK 0x1 +#define START_STOP_UNIT_CDB_POWER_COND_MOD_OFFSET 3 +#define START_STOP_UNIT_CDB_POWER_COND_MOD_MASK 0xF +#define START_STOP_UNIT_CDB_POWER_COND_OFFSET 4 +#define START_STOP_UNIT_CDB_POWER_COND_MASK 0xF0 +#define START_STOP_UNIT_CDB_NO_FLUSH_OFFSET 4 +#define START_STOP_UNIT_CDB_NO_FLUSH_MASK 0x4 +#define START_STOP_UNIT_CDB_START_OFFSET 4 +#define START_STOP_UNIT_CDB_START_MASK 0x1 +#define WRITE_BUFFER_CDB_MODE_OFFSET 1 +#define WRITE_BUFFER_CDB_MODE_MASK 0x1F +#define WRITE_BUFFER_CDB_BUFFER_ID_OFFSET 2 +#define WRITE_BUFFER_CDB_BUFFER_OFFSET_OFFSET 3 +#define WRITE_BUFFER_CDB_PARM_LIST_LENGTH_OFFSET 6 +#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_OFFSET 1 +#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_MASK 0xC0 +#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_SHIFT 6 +#define FORMAT_UNIT_CDB_LONG_LIST_OFFSET 1 +#define FORMAT_UNIT_CDB_LONG_LIST_MASK 0x20 +#define FORMAT_UNIT_CDB_FORMAT_DATA_OFFSET 1 +#define FORMAT_UNIT_CDB_FORMAT_DATA_MASK 0x10 +#define FORMAT_UNIT_SHORT_PARM_LIST_LEN 4 +#define FORMAT_UNIT_LONG_PARM_LIST_LEN 8 +#define FORMAT_UNIT_PROT_INT_OFFSET 3 +#define FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET 0 +#define FORMAT_UNIT_PROT_FIELD_USAGE_MASK 0x07 + +/* Misc. defines */ +#define NIBBLE_SHIFT 4 +#define FIXED_SENSE_DATA 0x70 +#define DESC_FORMAT_SENSE_DATA 0x72 +#define FIXED_SENSE_DATA_ADD_LENGTH 10 +#define LUN_ENTRY_SIZE 8 +#define LUN_DATA_HEADER_SIZE 8 +#define ALL_LUNS_RETURNED 0x02 +#define ALL_WELL_KNOWN_LUNS_RETURNED 0x01 +#define RESTRICTED_LUNS_RETURNED 0x00 +#define NVME_POWER_STATE_START_VALID 0x00 +#define NVME_POWER_STATE_ACTIVE 0x01 +#define NVME_POWER_STATE_IDLE 0x02 +#define NVME_POWER_STATE_STANDBY 0x03 +#define NVME_POWER_STATE_LU_CONTROL 0x07 +#define POWER_STATE_0 0 +#define POWER_STATE_1 1 +#define POWER_STATE_2 2 +#define POWER_STATE_3 3 +#define DOWNLOAD_SAVE_ACTIVATE 0x05 +#define DOWNLOAD_SAVE_DEFER_ACTIVATE 0x0E +#define ACTIVATE_DEFERRED_MICROCODE 0x0F +#define FORMAT_UNIT_IMMED_MASK 0x2 +#define FORMAT_UNIT_IMMED_OFFSET 1 +#define KELVIN_TEMP_FACTOR 273 +#define FIXED_FMT_SENSE_DATA_SIZE 18 +#define DESC_FMT_SENSE_DATA_SIZE 8 + +/* SCSI/NVMe defines and bit masks */ +#define INQ_STANDARD_INQUIRY_PAGE 0x00 +#define INQ_SUPPORTED_VPD_PAGES_PAGE 0x00 +#define INQ_UNIT_SERIAL_NUMBER_PAGE 0x80 +#define INQ_DEVICE_IDENTIFICATION_PAGE 0x83 +#define INQ_EXTENDED_INQUIRY_DATA_PAGE 0x86 +#define INQ_BDEV_CHARACTERISTICS_PAGE 0xB1 +#define INQ_SERIAL_NUMBER_LENGTH 0x14 +#define INQ_NUM_SUPPORTED_VPD_PAGES 5 +#define VERSION_SPC_4 0x06 +#define ACA_UNSUPPORTED 0 +#define STANDARD_INQUIRY_LENGTH 36 +#define ADDITIONAL_STD_INQ_LENGTH 31 +#define EXTENDED_INQUIRY_DATA_PAGE_LENGTH 0x3C +#define RESERVED_FIELD 0 + +/* SCSI READ/WRITE Defines */ +#define IO_CDB_WP_MASK 0xE0 +#define IO_CDB_WP_SHIFT 5 +#define IO_CDB_FUA_MASK 0x8 +#define IO_6_CDB_LBA_OFFSET 0 +#define IO_6_CDB_LBA_MASK 0x001FFFFF +#define IO_6_CDB_TX_LEN_OFFSET 4 +#define IO_6_DEFAULT_TX_LEN 256 +#define IO_10_CDB_LBA_OFFSET 2 +#define IO_10_CDB_TX_LEN_OFFSET 7 +#define IO_10_CDB_WP_OFFSET 1 +#define IO_10_CDB_FUA_OFFSET 1 +#define IO_12_CDB_LBA_OFFSET 2 +#define IO_12_CDB_TX_LEN_OFFSET 6 +#define IO_12_CDB_WP_OFFSET 1 +#define IO_12_CDB_FUA_OFFSET 1 +#define IO_16_CDB_FUA_OFFSET 1 +#define IO_16_CDB_WP_OFFSET 1 +#define IO_16_CDB_LBA_OFFSET 2 +#define IO_16_CDB_TX_LEN_OFFSET 10 + +/* Mode Sense/Select defines */ +#define MODE_PAGE_INFO_EXCEP 0x1C +#define MODE_PAGE_CACHING 0x08 +#define MODE_PAGE_CONTROL 0x0A +#define MODE_PAGE_POWER_CONDITION 0x1A +#define MODE_PAGE_RETURN_ALL 0x3F +#define MODE_PAGE_BLK_DES_LEN 0x08 +#define MODE_PAGE_LLBAA_BLK_DES_LEN 0x10 +#define MODE_PAGE_CACHING_LEN 0x14 +#define MODE_PAGE_CONTROL_LEN 0x0C +#define MODE_PAGE_POW_CND_LEN 0x28 +#define MODE_PAGE_INF_EXC_LEN 0x0C +#define MODE_PAGE_ALL_LEN 0x54 +#define MODE_SENSE6_MPH_SIZE 4 +#define MODE_SENSE6_ALLOC_LEN_OFFSET 4 +#define MODE_SENSE_PAGE_CONTROL_OFFSET 2 +#define MODE_SENSE_PAGE_CONTROL_MASK 0xC0 +#define MODE_SENSE_PAGE_CODE_OFFSET 2 +#define MODE_SENSE_PAGE_CODE_MASK 0x3F +#define MODE_SENSE_LLBAA_OFFSET 1 +#define MODE_SENSE_LLBAA_MASK 0x10 +#define MODE_SENSE_LLBAA_SHIFT 4 +#define MODE_SENSE_DBD_OFFSET 1 +#define MODE_SENSE_DBD_MASK 8 +#define MODE_SENSE_DBD_SHIFT 3 +#define MODE_SENSE10_MPH_SIZE 8 +#define MODE_SENSE10_ALLOC_LEN_OFFSET 7 +#define MODE_SELECT_CDB_PAGE_FORMAT_OFFSET 1 +#define MODE_SELECT_CDB_SAVE_PAGES_OFFSET 1 +#define MODE_SELECT_6_CDB_PARAM_LIST_LENGTH_OFFSET 4 +#define MODE_SELECT_10_CDB_PARAM_LIST_LENGTH_OFFSET 7 +#define MODE_SELECT_CDB_PAGE_FORMAT_MASK 0x10 +#define MODE_SELECT_CDB_SAVE_PAGES_MASK 0x1 +#define MODE_SELECT_6_BD_OFFSET 3 +#define MODE_SELECT_10_BD_OFFSET 6 +#define MODE_SELECT_10_LLBAA_OFFSET 4 +#define MODE_SELECT_10_LLBAA_MASK 1 +#define MODE_SELECT_6_MPH_SIZE 4 +#define MODE_SELECT_10_MPH_SIZE 8 +#define CACHING_MODE_PAGE_WCE_MASK 0x04 +#define MODE_SENSE_BLK_DESC_ENABLED 0 +#define MODE_SENSE_BLK_DESC_COUNT 1 +#define MODE_SELECT_PAGE_CODE_MASK 0x3F +#define SHORT_DESC_BLOCK 8 +#define LONG_DESC_BLOCK 16 +#define MODE_PAGE_POW_CND_LEN_FIELD 0x26 +#define MODE_PAGE_INF_EXC_LEN_FIELD 0x0A +#define MODE_PAGE_CACHING_LEN_FIELD 0x12 +#define MODE_PAGE_CONTROL_LEN_FIELD 0x0A +#define MODE_SENSE_PC_CURRENT_VALUES 0 + +/* Log Sense defines */ +#define LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE 0x00 +#define LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH 0x07 +#define LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE 0x2F +#define LOG_PAGE_TEMPERATURE_PAGE 0x0D +#define LOG_SENSE_CDB_SP_OFFSET 1 +#define LOG_SENSE_CDB_SP_NOT_ENABLED 0 +#define LOG_SENSE_CDB_PC_OFFSET 2 +#define LOG_SENSE_CDB_PC_MASK 0xC0 +#define LOG_SENSE_CDB_PC_SHIFT 6 +#define LOG_SENSE_CDB_PC_CUMULATIVE_VALUES 1 +#define LOG_SENSE_CDB_PAGE_CODE_MASK 0x3F +#define LOG_SENSE_CDB_ALLOC_LENGTH_OFFSET 7 +#define REMAINING_INFO_EXCP_PAGE_LENGTH 0x8 +#define LOG_INFO_EXCP_PAGE_LENGTH 0xC +#define REMAINING_TEMP_PAGE_LENGTH 0xC +#define LOG_TEMP_PAGE_LENGTH 0x10 +#define LOG_TEMP_UNKNOWN 0xFF +#define SUPPORTED_LOG_PAGES_PAGE_LENGTH 0x3 + +/* Read Capacity defines */ +#define READ_CAP_10_RESP_SIZE 8 +#define READ_CAP_16_RESP_SIZE 32 + +/* NVMe Namespace and Command Defines */ +#define NVME_GET_SMART_LOG_PAGE 0x02 +#define NVME_GET_FEAT_TEMP_THRESH 0x04 +#define BYTES_TO_DWORDS 4 +#define NVME_MAX_FIRMWARE_SLOT 7 + +/* Report LUNs defines */ +#define REPORT_LUNS_FIRST_LUN_OFFSET 8 + +/* SCSI ADDITIONAL SENSE Codes */ + +#define SCSI_ASC_NO_SENSE 0x00 +#define SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT 0x03 +#define SCSI_ASC_LUN_NOT_READY 0x04 +#define SCSI_ASC_WARNING 0x0B +#define SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED 0x10 +#define SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED 0x10 +#define SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED 0x10 +#define SCSI_ASC_UNRECOVERED_READ_ERROR 0x11 +#define SCSI_ASC_MISCOMPARE_DURING_VERIFY 0x1D +#define SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID 0x20 +#define SCSI_ASC_ILLEGAL_COMMAND 0x20 +#define SCSI_ASC_ILLEGAL_BLOCK 0x21 +#define SCSI_ASC_INVALID_CDB 0x24 +#define SCSI_ASC_INVALID_LUN 0x25 +#define SCSI_ASC_INVALID_PARAMETER 0x26 +#define SCSI_ASC_FORMAT_COMMAND_FAILED 0x31 +#define SCSI_ASC_INTERNAL_TARGET_FAILURE 0x44 + +/* SCSI ADDITIONAL SENSE Code Qualifiers */ + +#define SCSI_ASCQ_CAUSE_NOT_REPORTABLE 0x00 +#define SCSI_ASCQ_FORMAT_COMMAND_FAILED 0x01 +#define SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED 0x01 +#define SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED 0x02 +#define SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED 0x03 +#define SCSI_ASCQ_FORMAT_IN_PROGRESS 0x04 +#define SCSI_ASCQ_POWER_LOSS_EXPECTED 0x08 +#define SCSI_ASCQ_INVALID_LUN_ID 0x09 + +/** + * DEVICE_SPECIFIC_PARAMETER in mode parameter header (see sbc2r16) to + * enable DPOFUA support type 0x10 value. + */ +#define DEVICE_SPECIFIC_PARAMETER 0 +#define VPD_ID_DESCRIPTOR_LENGTH sizeof(VPD_IDENTIFICATION_DESCRIPTOR) + +/* MACROs to extract information from CDBs */ + +#define GET_OPCODE(cdb) cdb[0] + +#define GET_U8_FROM_CDB(cdb, index) (cdb[index] << 0) + +#define GET_U16_FROM_CDB(cdb, index) ((cdb[index] << 8) | (cdb[index + 1] << 0)) + +#define GET_U24_FROM_CDB(cdb, index) ((cdb[index] << 16) | \ +(cdb[index + 1] << 8) | \ +(cdb[index + 2] << 0)) + +#define GET_U32_FROM_CDB(cdb, index) ((cdb[index] << 24) | \ +(cdb[index + 1] << 16) | \ +(cdb[index + 2] << 8) | \ +(cdb[index + 3] << 0)) + +#define GET_U64_FROM_CDB(cdb, index) ((((u64)cdb[index]) << 56) | \ +(((u64)cdb[index + 1]) << 48) | \ +(((u64)cdb[index + 2]) << 40) | \ +(((u64)cdb[index + 3]) << 32) | \ +(((u64)cdb[index + 4]) << 24) | \ +(((u64)cdb[index + 5]) << 16) | \ +(((u64)cdb[index + 6]) << 8) | \ +(((u64)cdb[index + 7]) << 0)) + +/* Inquiry Helper Macros */ +#define GET_INQ_EVPD_BIT(cdb) \ +((GET_U8_FROM_CDB(cdb, INQUIRY_EVPD_BYTE_OFFSET) & \ +INQUIRY_EVPD_BIT_MASK) ? 1 : 0) + +#define GET_INQ_PAGE_CODE(cdb) \ +(GET_U8_FROM_CDB(cdb, INQUIRY_PAGE_CODE_BYTE_OFFSET)) + +#define GET_INQ_ALLOC_LENGTH(cdb) \ +(GET_U16_FROM_CDB(cdb, INQUIRY_CDB_ALLOCATION_LENGTH_OFFSET)) + +/* Report LUNs Helper Macros */ +#define GET_REPORT_LUNS_ALLOC_LENGTH(cdb) \ +(GET_U32_FROM_CDB(cdb, REPORT_LUNS_CDB_ALLOC_LENGTH_OFFSET)) + +/* Read Capacity Helper Macros */ +#define GET_READ_CAP_16_ALLOC_LENGTH(cdb) \ +(GET_U32_FROM_CDB(cdb, READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET)) + +#define IS_READ_CAP_16(cdb) \ +((cdb[0] == SERVICE_ACTION_IN && cdb[1] == SAI_READ_CAPACITY_16) ? 1 : 0) + +/* Request Sense Helper Macros */ +#define GET_REQUEST_SENSE_ALLOC_LENGTH(cdb) \ +(GET_U8_FROM_CDB(cdb, REQUEST_SENSE_CDB_ALLOC_LENGTH_OFFSET)) + +/* Mode Sense Helper Macros */ +#define GET_MODE_SENSE_DBD(cdb) \ +((GET_U8_FROM_CDB(cdb, MODE_SENSE_DBD_OFFSET) & MODE_SENSE_DBD_MASK) >> \ +MODE_SENSE_DBD_SHIFT) + +#define GET_MODE_SENSE_LLBAA(cdb) \ +((GET_U8_FROM_CDB(cdb, MODE_SENSE_LLBAA_OFFSET) & \ +MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT) + +#define GET_MODE_SENSE_MPH_SIZE(cdb10) \ +(cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE) + + +/* Struct to gather data that needs to be extracted from a SCSI CDB. + Not conforming to any particular CDB variant, but compatible with all. */ + +struct nvme_trans_io_cdb { + u8 fua; + u8 prot_info; + u64 lba; + u32 xfer_len; +}; + + +/* Internal Helper Functions */ + + +/* Copy data to userspace memory */ + +static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from, + unsigned long n) +{ + int res = SNTI_TRANSLATION_SUCCESS; + unsigned long not_copied; + int i; + void *index = from; + size_t remaining = n; + size_t xfer_len; + + if (hdr->iovec_count > 0) { + struct sg_iovec *sgl = hdr->dxferp; + + for (i = 0; i < hdr->iovec_count; i++) { + xfer_len = min(remaining, sgl[i].iov_len); + not_copied = copy_to_user(__user sgl[i].iov_base, index, + xfer_len); + if (not_copied) { + res = -EFAULT; + break; + } + index += xfer_len; + remaining -= xfer_len; + if (remaining == 0) + break; + } + return res; + } + not_copied = copy_to_user(__user hdr->dxferp, from, n); + if (not_copied) + res = -EFAULT; + return res; +} + +/* Copy data from userspace memory */ + +static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to, + unsigned long n) +{ + int res = SNTI_TRANSLATION_SUCCESS; + unsigned long not_copied; + int i; + void *index = to; + size_t remaining = n; + size_t xfer_len; + + if (hdr->iovec_count > 0) { + struct sg_iovec *sgl = hdr->dxferp; + + for (i = 0; i < hdr->iovec_count; i++) { + xfer_len = min(remaining, sgl[i].iov_len); + not_copied = copy_from_user(index, + __user sgl[i].iov_base, xfer_len); + if (not_copied) { + res = -EFAULT; + break; + } + index += xfer_len; + remaining -= xfer_len; + if (remaining == 0) + break; + } + return res; + } + + not_copied = copy_from_user(to, __user hdr->dxferp, n); + if (not_copied) + res = -EFAULT; + return res; +} + +/* Status/Sense Buffer Writeback */ + +static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key, + u8 asc, u8 ascq) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u8 xfer_len; + u8 resp[DESC_FMT_SENSE_DATA_SIZE]; + + if (scsi_status_is_good(status)) { + hdr->status = SAM_STAT_GOOD; + hdr->masked_status = GOOD; + hdr->host_status = DID_OK; + hdr->driver_status = DRIVER_OK; + hdr->sb_len_wr = 0; + } else { + hdr->status = status; + hdr->masked_status = status >> 1; + hdr->host_status = DID_OK; + hdr->driver_status = DRIVER_OK; + + memset(resp, 0, DESC_FMT_SENSE_DATA_SIZE); + resp[0] = DESC_FORMAT_SENSE_DATA; + resp[1] = sense_key; + resp[2] = asc; + resp[3] = ascq; + + xfer_len = min_t(u8, hdr->mx_sb_len, DESC_FMT_SENSE_DATA_SIZE); + hdr->sb_len_wr = xfer_len; + if (copy_to_user(__user hdr->sbp, resp, xfer_len) > 0) + res = -EFAULT; + } + + return res; +} + +static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc) +{ + u8 status, sense_key, asc, ascq; + int res = SNTI_TRANSLATION_SUCCESS; + + /* For non-nvme (Linux) errors, simply return the error code */ + if (nvme_sc < 0) + return nvme_sc; + + /* Mask DNR, More, and reserved fields */ + nvme_sc &= 0x7FF; + + switch (nvme_sc) { + /* Generic Command Status */ + case NVME_SC_SUCCESS: + status = SAM_STAT_GOOD; + sense_key = NO_SENSE; + asc = SCSI_ASC_NO_SENSE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_INVALID_OPCODE: + status = SAM_STAT_CHECK_CONDITION; + sense_key = ILLEGAL_REQUEST; + asc = SCSI_ASC_ILLEGAL_COMMAND; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_INVALID_FIELD: + status = SAM_STAT_CHECK_CONDITION; + sense_key = ILLEGAL_REQUEST; + asc = SCSI_ASC_INVALID_CDB; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_DATA_XFER_ERROR: + status = SAM_STAT_CHECK_CONDITION; + sense_key = MEDIUM_ERROR; + asc = SCSI_ASC_NO_SENSE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_POWER_LOSS: + status = SAM_STAT_TASK_ABORTED; + sense_key = ABORTED_COMMAND; + asc = SCSI_ASC_WARNING; + ascq = SCSI_ASCQ_POWER_LOSS_EXPECTED; + break; + case NVME_SC_INTERNAL: + status = SAM_STAT_CHECK_CONDITION; + sense_key = HARDWARE_ERROR; + asc = SCSI_ASC_INTERNAL_TARGET_FAILURE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_ABORT_REQ: + status = SAM_STAT_TASK_ABORTED; + sense_key = ABORTED_COMMAND; + asc = SCSI_ASC_NO_SENSE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_ABORT_QUEUE: + status = SAM_STAT_TASK_ABORTED; + sense_key = ABORTED_COMMAND; + asc = SCSI_ASC_NO_SENSE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_FUSED_FAIL: + status = SAM_STAT_TASK_ABORTED; + sense_key = ABORTED_COMMAND; + asc = SCSI_ASC_NO_SENSE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_FUSED_MISSING: + status = SAM_STAT_TASK_ABORTED; + sense_key = ABORTED_COMMAND; + asc = SCSI_ASC_NO_SENSE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_INVALID_NS: + status = SAM_STAT_CHECK_CONDITION; + sense_key = ILLEGAL_REQUEST; + asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID; + ascq = SCSI_ASCQ_INVALID_LUN_ID; + break; + case NVME_SC_LBA_RANGE: + status = SAM_STAT_CHECK_CONDITION; + sense_key = ILLEGAL_REQUEST; + asc = SCSI_ASC_ILLEGAL_BLOCK; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_CAP_EXCEEDED: + status = SAM_STAT_CHECK_CONDITION; + sense_key = MEDIUM_ERROR; + asc = SCSI_ASC_NO_SENSE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_NS_NOT_READY: + status = SAM_STAT_CHECK_CONDITION; + sense_key = NOT_READY; + asc = SCSI_ASC_LUN_NOT_READY; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + + /* Command Specific Status */ + case NVME_SC_INVALID_FORMAT: + status = SAM_STAT_CHECK_CONDITION; + sense_key = ILLEGAL_REQUEST; + asc = SCSI_ASC_FORMAT_COMMAND_FAILED; + ascq = SCSI_ASCQ_FORMAT_COMMAND_FAILED; + break; + case NVME_SC_BAD_ATTRIBUTES: + status = SAM_STAT_CHECK_CONDITION; + sense_key = ILLEGAL_REQUEST; + asc = SCSI_ASC_INVALID_CDB; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + + /* Media Errors */ + case NVME_SC_WRITE_FAULT: + status = SAM_STAT_CHECK_CONDITION; + sense_key = MEDIUM_ERROR; + asc = SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_READ_ERROR: + status = SAM_STAT_CHECK_CONDITION; + sense_key = MEDIUM_ERROR; + asc = SCSI_ASC_UNRECOVERED_READ_ERROR; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_GUARD_CHECK: + status = SAM_STAT_CHECK_CONDITION; + sense_key = MEDIUM_ERROR; + asc = SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED; + ascq = SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED; + break; + case NVME_SC_APPTAG_CHECK: + status = SAM_STAT_CHECK_CONDITION; + sense_key = MEDIUM_ERROR; + asc = SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED; + ascq = SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED; + break; + case NVME_SC_REFTAG_CHECK: + status = SAM_STAT_CHECK_CONDITION; + sense_key = MEDIUM_ERROR; + asc = SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED; + ascq = SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED; + break; + case NVME_SC_COMPARE_FAILED: + status = SAM_STAT_CHECK_CONDITION; + sense_key = MISCOMPARE; + asc = SCSI_ASC_MISCOMPARE_DURING_VERIFY; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + case NVME_SC_ACCESS_DENIED: + status = SAM_STAT_CHECK_CONDITION; + sense_key = ILLEGAL_REQUEST; + asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID; + ascq = SCSI_ASCQ_INVALID_LUN_ID; + break; + + /* Unspecified/Default */ + case NVME_SC_CMDID_CONFLICT: + case NVME_SC_CMD_SEQ_ERROR: + case NVME_SC_CQ_INVALID: + case NVME_SC_QID_INVALID: + case NVME_SC_QUEUE_SIZE: + case NVME_SC_ABORT_LIMIT: + case NVME_SC_ABORT_MISSING: + case NVME_SC_ASYNC_LIMIT: + case NVME_SC_FIRMWARE_SLOT: + case NVME_SC_FIRMWARE_IMAGE: + case NVME_SC_INVALID_VECTOR: + case NVME_SC_INVALID_LOG_PAGE: + default: + status = SAM_STAT_CHECK_CONDITION; + sense_key = ILLEGAL_REQUEST; + asc = SCSI_ASC_NO_SENSE; + ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + break; + } + + res = nvme_trans_completion(hdr, status, sense_key, asc, ascq); + + return res; +} + +/* INQUIRY Helper Functions */ + +static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, + struct sg_io_hdr *hdr, u8 *inq_response, + int alloc_len) +{ + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ns *id_ns; + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + int xfer_len; + u8 resp_data_format = 0x02; + u8 protect; + u8 cmdque = 0x01 << 1; + + mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out_dma; + } + + /* nvme ns identify - use DPS value for PROTECT field */ + nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + /* + * If nvme_sc was -ve, res will be -ve here. + * If nvme_sc was +ve, the status would bace been translated, and res + * can only be 0 or -ve. + * - If 0 && nvme_sc > 0, then go into next if where res gets nvme_sc + * - If -ve, return because its a Linux error. + */ + if (res) + goto out_free; + if (nvme_sc) { + res = nvme_sc; + goto out_free; + } + id_ns = mem; + (id_ns->dps) ? (protect = 0x01) : (protect = 0); + + memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); + inq_response[2] = VERSION_SPC_4; + inq_response[3] = resp_data_format; /*normaca=0 | hisup=0 */ + inq_response[4] = ADDITIONAL_STD_INQ_LENGTH; + inq_response[5] = protect; /* sccs=0 | acc=0 | tpgs=0 | pc3=0 */ + inq_response[7] = cmdque; /* wbus16=0 | sync=0 | vs=0 */ + strncpy(&inq_response[8], "NVMe ", 8); + strncpy(&inq_response[16], dev->model, 16); + strncpy(&inq_response[32], dev->firmware_rev, 4); + + xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); + res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); + + out_free: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, + dma_addr); + out_dma: + return res; +} + +static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns, + struct sg_io_hdr *hdr, u8 *inq_response, + int alloc_len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int xfer_len; + + memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); + inq_response[1] = INQ_SUPPORTED_VPD_PAGES_PAGE; /* Page Code */ + inq_response[3] = INQ_NUM_SUPPORTED_VPD_PAGES; /* Page Length */ + inq_response[4] = INQ_SUPPORTED_VPD_PAGES_PAGE; + inq_response[5] = INQ_UNIT_SERIAL_NUMBER_PAGE; + inq_response[6] = INQ_DEVICE_IDENTIFICATION_PAGE; + inq_response[7] = INQ_EXTENDED_INQUIRY_DATA_PAGE; + inq_response[8] = INQ_BDEV_CHARACTERISTICS_PAGE; + + xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); + res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); + + return res; +} + +static int nvme_trans_unit_serial_page(struct nvme_ns *ns, + struct sg_io_hdr *hdr, u8 *inq_response, + int alloc_len) +{ + struct nvme_dev *dev = ns->dev; + int res = SNTI_TRANSLATION_SUCCESS; + int xfer_len; + + memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); + inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */ + inq_response[3] = INQ_SERIAL_NUMBER_LENGTH; /* Page Length */ + strncpy(&inq_response[4], dev->serial, INQ_SERIAL_NUMBER_LENGTH); + + xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); + res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); + + return res; +} + +static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *inq_response, int alloc_len) +{ + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ctrl *id_ctrl; + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + u8 ieee[4]; + int xfer_len; + u32 tmp_id = cpu_to_be64(ns->ns_id); + + mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out_dma; + } + + /* nvme controller identify */ + nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_free; + if (nvme_sc) { + res = nvme_sc; + goto out_free; + } + id_ctrl = mem; + + /* Since SCSI tried to save 4 bits... [SPC-4(r34) Table 591] */ + ieee[0] = id_ctrl->ieee[0] << 4; + ieee[1] = id_ctrl->ieee[0] >> 4 | id_ctrl->ieee[1] << 4; + ieee[2] = id_ctrl->ieee[1] >> 4 | id_ctrl->ieee[2] << 4; + ieee[3] = id_ctrl->ieee[2] >> 4; + + memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); + inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE; /* Page Code */ + inq_response[3] = 20; /* Page Length */ + /* Designation Descriptor start */ + inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */ + inq_response[5] = 0x03; /* PIV=0b | Asso=00b | Designator Type=3h */ + inq_response[6] = 0x00; /* Rsvd */ + inq_response[7] = 16; /* Designator Length */ + /* Designator start */ + inq_response[8] = 0x60 | ieee[3]; /* NAA=6h | IEEE ID MSB, High nibble*/ + inq_response[9] = ieee[2]; /* IEEE ID */ + inq_response[10] = ieee[1]; /* IEEE ID */ + inq_response[11] = ieee[0]; /* IEEE ID| Vendor Specific ID... */ + inq_response[12] = (dev->pci_dev->vendor & 0xFF00) >> 8; + inq_response[13] = (dev->pci_dev->vendor & 0x00FF); + inq_response[14] = dev->serial[0]; + inq_response[15] = dev->serial[1]; + inq_response[16] = dev->model[0]; + inq_response[17] = dev->model[1]; + memcpy(&inq_response[18], &tmp_id, sizeof(u32)); + /* Last 2 bytes are zero */ + + xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); + res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); + + out_free: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, + dma_addr); + out_dma: + return res; +} + +static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, + int alloc_len) +{ + u8 *inq_response; + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ctrl *id_ctrl; + struct nvme_id_ns *id_ns; + int xfer_len; + u8 microcode = 0x80; + u8 spt; + u8 spt_lut[8] = {0, 0, 2, 1, 4, 6, 5, 7}; + u8 grd_chk, app_chk, ref_chk, protect; + u8 uask_sup = 0x20; + u8 v_sup; + u8 luiclr = 0x01; + + inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL); + if (inq_response == NULL) { + res = -ENOMEM; + goto out_mem; + } + + mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out_dma; + } + + /* nvme ns identify */ + nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_free; + if (nvme_sc) { + res = nvme_sc; + goto out_free; + } + id_ns = mem; + spt = spt_lut[(id_ns->dpc) & 0x07] << 3; + (id_ns->dps) ? (protect = 0x01) : (protect = 0); + grd_chk = protect << 2; + app_chk = protect << 1; + ref_chk = protect; + + /* nvme controller identify */ + nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_free; + if (nvme_sc) { + res = nvme_sc; + goto out_free; + } + id_ctrl = mem; + v_sup = id_ctrl->vwc; + + memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH); + inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE; /* Page Code */ + inq_response[2] = 0x00; /* Page Length MSB */ + inq_response[3] = 0x3C; /* Page Length LSB */ + inq_response[4] = microcode | spt | grd_chk | app_chk | ref_chk; + inq_response[5] = uask_sup; + inq_response[6] = v_sup; + inq_response[7] = luiclr; + inq_response[8] = 0; + inq_response[9] = 0; + + xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH); + res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); + + out_free: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, + dma_addr); + out_dma: + kfree(inq_response); + out_mem: + return res; +} + +static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, + int alloc_len) +{ + u8 *inq_response; + int res = SNTI_TRANSLATION_SUCCESS; + int xfer_len; + + inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL); + if (inq_response == NULL) { + res = -ENOMEM; + goto out_mem; + } + + memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH); + inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE; /* Page Code */ + inq_response[2] = 0x00; /* Page Length MSB */ + inq_response[3] = 0x3C; /* Page Length LSB */ + inq_response[4] = 0x00; /* Medium Rotation Rate MSB */ + inq_response[5] = 0x01; /* Medium Rotation Rate LSB */ + inq_response[6] = 0x00; /* Form Factor */ + + xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH); + res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); + + kfree(inq_response); + out_mem: + return res; +} + +/* LOG SENSE Helper Functions */ + +static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, + int alloc_len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int xfer_len; + u8 *log_response; + + log_response = kmalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL); + if (log_response == NULL) { + res = -ENOMEM; + goto out_mem; + } + memset(log_response, 0, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH); + + log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE; + /* Subpage=0x00, Page Length MSB=0 */ + log_response[3] = SUPPORTED_LOG_PAGES_PAGE_LENGTH; + log_response[4] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE; + log_response[5] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE; + log_response[6] = LOG_PAGE_TEMPERATURE_PAGE; + + xfer_len = min(alloc_len, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH); + res = nvme_trans_copy_to_user(hdr, log_response, xfer_len); + + kfree(log_response); + out_mem: + return res; +} + +static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, + struct sg_io_hdr *hdr, int alloc_len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int xfer_len; + u8 *log_response; + struct nvme_command c; + struct nvme_dev *dev = ns->dev; + struct nvme_smart_log *smart_log; + dma_addr_t dma_addr; + void *mem; + u8 temp_c; + u16 temp_k; + + log_response = kmalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL); + if (log_response == NULL) { + res = -ENOMEM; + goto out_mem; + } + memset(log_response, 0, LOG_INFO_EXCP_PAGE_LENGTH); + + mem = dma_alloc_coherent(&dev->pci_dev->dev, + sizeof(struct nvme_smart_log), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out_dma; + } + + /* Get SMART Log Page */ + memset(&c, 0, sizeof(c)); + c.common.opcode = nvme_admin_get_log_page; + c.common.nsid = cpu_to_le32(0xFFFFFFFF); + c.common.prp1 = cpu_to_le64(dma_addr); + c.common.cdw10[0] = cpu_to_le32(((sizeof(struct nvme_smart_log) / + BYTES_TO_DWORDS) << 16) | NVME_GET_SMART_LOG_PAGE); + res = nvme_submit_admin_cmd(dev, &c, NULL); + if (res != NVME_SC_SUCCESS) { + temp_c = LOG_TEMP_UNKNOWN; + } else { + smart_log = mem; + temp_k = (smart_log->temperature[1] << 8) + + (smart_log->temperature[0]); + temp_c = temp_k - KELVIN_TEMP_FACTOR; + } + + log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE; + /* Subpage=0x00, Page Length MSB=0 */ + log_response[3] = REMAINING_INFO_EXCP_PAGE_LENGTH; + /* Informational Exceptions Log Parameter 1 Start */ + /* Parameter Code=0x0000 bytes 4,5 */ + log_response[6] = 0x23; /* DU=0, TSD=1, ETC=0, TMC=0, FMT_AND_LNK=11b */ + log_response[7] = 0x04; /* PARAMETER LENGTH */ + /* Add sense Code and qualifier = 0x00 each */ + /* Use Temperature from NVMe Get Log Page, convert to C from K */ + log_response[10] = temp_c; + + xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH); + res = nvme_trans_copy_to_user(hdr, log_response, xfer_len); + + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log), + mem, dma_addr); + out_dma: + kfree(log_response); + out_mem: + return res; +} + +static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, + int alloc_len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int xfer_len; + u8 *log_response; + struct nvme_command c; + struct nvme_dev *dev = ns->dev; + struct nvme_smart_log *smart_log; + dma_addr_t dma_addr; + void *mem; + u32 feature_resp; + u8 temp_c_cur, temp_c_thresh; + u16 temp_k; + + log_response = kmalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL); + if (log_response == NULL) { + res = -ENOMEM; + goto out_mem; + } + memset(log_response, 0, LOG_TEMP_PAGE_LENGTH); + + mem = dma_alloc_coherent(&dev->pci_dev->dev, + sizeof(struct nvme_smart_log), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out_dma; + } + + /* Get SMART Log Page */ + memset(&c, 0, sizeof(c)); + c.common.opcode = nvme_admin_get_log_page; + c.common.nsid = cpu_to_le32(0xFFFFFFFF); + c.common.prp1 = cpu_to_le64(dma_addr); + c.common.cdw10[0] = cpu_to_le32(((sizeof(struct nvme_smart_log) / + BYTES_TO_DWORDS) << 16) | NVME_GET_SMART_LOG_PAGE); + res = nvme_submit_admin_cmd(dev, &c, NULL); + if (res != NVME_SC_SUCCESS) { + temp_c_cur = LOG_TEMP_UNKNOWN; + } else { + smart_log = mem; + temp_k = (smart_log->temperature[1] << 8) + + (smart_log->temperature[0]); + temp_c_cur = temp_k - KELVIN_TEMP_FACTOR; + } + + /* Get Features for Temp Threshold */ + res = nvme_get_features(dev, NVME_FEAT_TEMP_THRESH, 0, 0, + &feature_resp); + if (res != NVME_SC_SUCCESS) + temp_c_thresh = LOG_TEMP_UNKNOWN; + else + temp_c_thresh = (feature_resp & 0xFFFF) - KELVIN_TEMP_FACTOR; + + log_response[0] = LOG_PAGE_TEMPERATURE_PAGE; + /* Subpage=0x00, Page Length MSB=0 */ + log_response[3] = REMAINING_TEMP_PAGE_LENGTH; + /* Temperature Log Parameter 1 (Temperature) Start */ + /* Parameter Code = 0x0000 */ + log_response[6] = 0x01; /* Format and Linking = 01b */ + log_response[7] = 0x02; /* Parameter Length */ + /* Use Temperature from NVMe Get Log Page, convert to C from K */ + log_response[9] = temp_c_cur; + /* Temperature Log Parameter 2 (Reference Temperature) Start */ + log_response[11] = 0x01; /* Parameter Code = 0x0001 */ + log_response[12] = 0x01; /* Format and Linking = 01b */ + log_response[13] = 0x02; /* Parameter Length */ + /* Use Temperature Thresh from NVMe Get Log Page, convert to C from K */ + log_response[15] = temp_c_thresh; + + xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH); + res = nvme_trans_copy_to_user(hdr, log_response, xfer_len); + + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log), + mem, dma_addr); + out_dma: + kfree(log_response); + out_mem: + return res; +} + +/* MODE SENSE Helper Functions */ + +static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa, + u16 mode_data_length, u16 blk_desc_len) +{ + /* Quick check to make sure I don't stomp on my own memory... */ + if ((cdb10 && len < 8) || (!cdb10 && len < 4)) + return SNTI_INTERNAL_ERROR; + + if (cdb10) { + resp[0] = (mode_data_length & 0xFF00) >> 8; + resp[1] = (mode_data_length & 0x00FF); + /* resp[2] and [3] are zero */ + resp[4] = llbaa; + resp[5] = RESERVED_FIELD; + resp[6] = (blk_desc_len & 0xFF00) >> 8; + resp[7] = (blk_desc_len & 0x00FF); + } else { + resp[0] = (mode_data_length & 0x00FF); + /* resp[1] and [2] are zero */ + resp[3] = (blk_desc_len & 0x00FF); + } + + return SNTI_TRANSLATION_SUCCESS; +} + +static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *resp, int len, u8 llbaa) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ns *id_ns; + u8 flbas; + u32 lba_length; + + if (llbaa == 0 && len < MODE_PAGE_BLK_DES_LEN) + return SNTI_INTERNAL_ERROR; + else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN) + return SNTI_INTERNAL_ERROR; + + mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out; + } + + /* nvme ns identify */ + nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_dma; + if (nvme_sc) { + res = nvme_sc; + goto out_dma; + } + id_ns = mem; + flbas = (id_ns->flbas) & 0x0F; + lba_length = (1 << (id_ns->lbaf[flbas].ds)); + + if (llbaa == 0) { + u32 tmp_cap = cpu_to_be32(id_ns->ncap); + /* Byte 4 is reserved */ + u32 tmp_len = cpu_to_be32(lba_length) & 0x00FFFFFF; + + memcpy(resp, &tmp_cap, sizeof(u32)); + memcpy(&resp[4], &tmp_len, sizeof(u32)); + } else { + u64 tmp_cap = cpu_to_be64(id_ns->ncap); + u32 tmp_len = cpu_to_be32(lba_length); + + memcpy(resp, &tmp_cap, sizeof(u64)); + /* Bytes 8, 9, 10, 11 are reserved */ + memcpy(&resp[12], &tmp_len, sizeof(u32)); + } + + out_dma: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, + dma_addr); + out: + return res; +} + +static int nvme_trans_fill_control_page(struct nvme_ns *ns, + struct sg_io_hdr *hdr, u8 *resp, + int len) +{ + if (len < MODE_PAGE_CONTROL_LEN) + return SNTI_INTERNAL_ERROR; + + resp[0] = MODE_PAGE_CONTROL; + resp[1] = MODE_PAGE_CONTROL_LEN_FIELD; + resp[2] = 0x0E; /* TST=000b, TMF_ONLY=0, DPICZ=1, + * D_SENSE=1, GLTSD=1, RLEC=0 */ + resp[3] = 0x12; /* Q_ALGO_MODIFIER=1h, NUAR=0, QERR=01b */ + /* Byte 4: VS=0, RAC=0, UA_INT=0, SWP=0 */ + resp[5] = 0x40; /* ATO=0, TAS=1, ATMPE=0, RWWP=0, AUTOLOAD=0 */ + /* resp[6] and [7] are obsolete, thus zero */ + resp[8] = 0xFF; /* Busy timeout period = 0xffff */ + resp[9] = 0xFF; + /* Bytes 10,11: Extended selftest completion time = 0x0000 */ + + return SNTI_TRANSLATION_SUCCESS; +} + +static int nvme_trans_fill_caching_page(struct nvme_ns *ns, + struct sg_io_hdr *hdr, + u8 *resp, int len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + u32 feature_resp; + u8 vwc; + + if (len < MODE_PAGE_CACHING_LEN) + return SNTI_INTERNAL_ERROR; + + nvme_sc = nvme_get_features(dev, NVME_FEAT_VOLATILE_WC, 0, 0, + &feature_resp); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out; + if (nvme_sc) { + res = nvme_sc; + goto out; + } + vwc = feature_resp & 0x00000001; + + resp[0] = MODE_PAGE_CACHING; + resp[1] = MODE_PAGE_CACHING_LEN_FIELD; + resp[2] = vwc << 2; + + out: + return res; +} + +static int nvme_trans_fill_pow_cnd_page(struct nvme_ns *ns, + struct sg_io_hdr *hdr, u8 *resp, + int len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + + if (len < MODE_PAGE_POW_CND_LEN) + return SNTI_INTERNAL_ERROR; + + resp[0] = MODE_PAGE_POWER_CONDITION; + resp[1] = MODE_PAGE_POW_CND_LEN_FIELD; + /* All other bytes are zero */ + + return res; +} + +static int nvme_trans_fill_inf_exc_page(struct nvme_ns *ns, + struct sg_io_hdr *hdr, u8 *resp, + int len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + + if (len < MODE_PAGE_INF_EXC_LEN) + return SNTI_INTERNAL_ERROR; + + resp[0] = MODE_PAGE_INFO_EXCEP; + resp[1] = MODE_PAGE_INF_EXC_LEN_FIELD; + resp[2] = 0x88; + /* All other bytes are zero */ + + return res; +} + +static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *resp, int len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u16 mode_pages_offset_1 = 0; + u16 mode_pages_offset_2, mode_pages_offset_3, mode_pages_offset_4; + + mode_pages_offset_2 = mode_pages_offset_1 + MODE_PAGE_CACHING_LEN; + mode_pages_offset_3 = mode_pages_offset_2 + MODE_PAGE_CONTROL_LEN; + mode_pages_offset_4 = mode_pages_offset_3 + MODE_PAGE_POW_CND_LEN; + + res = nvme_trans_fill_caching_page(ns, hdr, &resp[mode_pages_offset_1], + MODE_PAGE_CACHING_LEN); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out; + res = nvme_trans_fill_control_page(ns, hdr, &resp[mode_pages_offset_2], + MODE_PAGE_CONTROL_LEN); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out; + res = nvme_trans_fill_pow_cnd_page(ns, hdr, &resp[mode_pages_offset_3], + MODE_PAGE_POW_CND_LEN); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out; + res = nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4], + MODE_PAGE_INF_EXC_LEN); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out; + + out: + return res; +} + +static inline int nvme_trans_get_blk_desc_len(u8 dbd, u8 llbaa) +{ + if (dbd == MODE_SENSE_BLK_DESC_ENABLED) { + /* SPC-4: len = 8 x Num_of_descriptors if llbaa = 0, 16x if 1 */ + return 8 * (llbaa + 1) * MODE_SENSE_BLK_DESC_COUNT; + } else { + return 0; + } +} + +static int nvme_trans_mode_page_create(struct nvme_ns *ns, + struct sg_io_hdr *hdr, u8 *cmd, + u16 alloc_len, u8 cdb10, + int (*mode_page_fill_func) + (struct nvme_ns *, + struct sg_io_hdr *hdr, u8 *, int), + u16 mode_pages_tot_len) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int xfer_len; + u8 *response; + u8 dbd, llbaa; + u16 resp_size; + int mph_size; + u16 mode_pages_offset_1; + u16 blk_desc_len, blk_desc_offset, mode_data_length; + + dbd = GET_MODE_SENSE_DBD(cmd); + llbaa = GET_MODE_SENSE_LLBAA(cmd); + mph_size = GET_MODE_SENSE_MPH_SIZE(cdb10); + blk_desc_len = nvme_trans_get_blk_desc_len(dbd, llbaa); + + resp_size = mph_size + blk_desc_len + mode_pages_tot_len; + /* Refer spc4r34 Table 440 for calculation of Mode data Length field */ + mode_data_length = 3 + (3 * cdb10) + blk_desc_len + mode_pages_tot_len; + + blk_desc_offset = mph_size; + mode_pages_offset_1 = blk_desc_offset + blk_desc_len; + + response = kmalloc(resp_size, GFP_KERNEL); + if (response == NULL) { + res = -ENOMEM; + goto out_mem; + } + memset(response, 0, resp_size); + + res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10, + llbaa, mode_data_length, blk_desc_len); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out_free; + if (blk_desc_len > 0) { + res = nvme_trans_fill_blk_desc(ns, hdr, + &response[blk_desc_offset], + blk_desc_len, llbaa); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out_free; + } + res = mode_page_fill_func(ns, hdr, &response[mode_pages_offset_1], + mode_pages_tot_len); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out_free; + + xfer_len = min(alloc_len, resp_size); + res = nvme_trans_copy_to_user(hdr, response, xfer_len); + + out_free: + kfree(response); + out_mem: + return res; +} + +/* Read Capacity Helper Functions */ + +static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns, + u8 cdb16) +{ + u8 flbas; + u32 lba_length; + u64 rlba; + u8 prot_en; + u8 p_type_lut[4] = {0, 0, 1, 2}; + u64 tmp_rlba; + u32 tmp_rlba_32; + u32 tmp_len; + + flbas = (id_ns->flbas) & 0x0F; + lba_length = (1 << (id_ns->lbaf[flbas].ds)); + rlba = le64_to_cpup(&id_ns->nsze) - 1; + (id_ns->dps) ? (prot_en = 0x01) : (prot_en = 0); + + if (!cdb16) { + if (rlba > 0xFFFFFFFF) + rlba = 0xFFFFFFFF; + tmp_rlba_32 = cpu_to_be32(rlba); + tmp_len = cpu_to_be32(lba_length); + memcpy(response, &tmp_rlba_32, sizeof(u32)); + memcpy(&response[4], &tmp_len, sizeof(u32)); + } else { + tmp_rlba = cpu_to_be64(rlba); + tmp_len = cpu_to_be32(lba_length); + memcpy(response, &tmp_rlba, sizeof(u64)); + memcpy(&response[8], &tmp_len, sizeof(u32)); + response[12] = (p_type_lut[id_ns->dps & 0x3] << 1) | prot_en; + /* P_I_Exponent = 0x0 | LBPPBE = 0x0 */ + /* LBPME = 0 | LBPRZ = 0 | LALBA = 0x00 */ + /* Bytes 16-31 - Reserved */ + } +} + +/* Start Stop Unit Helper Functions */ + +static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 pc, u8 pcmod, u8 start) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ctrl *id_ctrl; + int lowest_pow_st; /* max npss = lowest power consumption */ + unsigned ps_desired = 0; + + /* NVMe Controller Identify */ + mem = dma_alloc_coherent(&dev->pci_dev->dev, + sizeof(struct nvme_id_ctrl), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out; + } + nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_dma; + if (nvme_sc) { + res = nvme_sc; + goto out_dma; + } + id_ctrl = mem; + lowest_pow_st = id_ctrl->npss - 1; + + switch (pc) { + case NVME_POWER_STATE_START_VALID: + /* Action unspecified if POWER CONDITION MODIFIER != 0 */ + if (pcmod == 0 && start == 0x1) + ps_desired = POWER_STATE_0; + if (pcmod == 0 && start == 0x0) + ps_desired = lowest_pow_st; + break; + case NVME_POWER_STATE_ACTIVE: + /* Action unspecified if POWER CONDITION MODIFIER != 0 */ + if (pcmod == 0) + ps_desired = POWER_STATE_0; + break; + case NVME_POWER_STATE_IDLE: + /* Action unspecified if POWER CONDITION MODIFIER != [0,1,2] */ + /* min of desired state and (lps-1) because lps is STOP */ + if (pcmod == 0x0) + ps_desired = min(POWER_STATE_1, (lowest_pow_st - 1)); + else if (pcmod == 0x1) + ps_desired = min(POWER_STATE_2, (lowest_pow_st - 1)); + else if (pcmod == 0x2) + ps_desired = min(POWER_STATE_3, (lowest_pow_st - 1)); + break; + case NVME_POWER_STATE_STANDBY: + /* Action unspecified if POWER CONDITION MODIFIER != [0,1] */ + if (pcmod == 0x0) + ps_desired = max(0, (lowest_pow_st - 2)); + else if (pcmod == 0x1) + ps_desired = max(0, (lowest_pow_st - 1)); + break; + case NVME_POWER_STATE_LU_CONTROL: + default: + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + break; + } + nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0, + NULL); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_dma; + if (nvme_sc) + res = nvme_sc; + out_dma: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem, + dma_addr); + out: + return res; +} + +/* Write Buffer Helper Functions */ +/* Also using this for Format Unit with hdr passed as NULL, and buffer_id, 0 */ + +static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 opcode, u32 tot_len, u32 offset, + u8 buffer_id) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + struct nvme_command c; + struct nvme_iod *iod = NULL; + unsigned length; + + memset(&c, 0, sizeof(c)); + c.common.opcode = opcode; + if (opcode == nvme_admin_download_fw) { + if (hdr->iovec_count > 0) { + /* Assuming SGL is not allowed for this command */ + res = nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, + SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } + iod = nvme_map_user_pages(dev, DMA_TO_DEVICE, + (unsigned long)hdr->dxferp, tot_len); + if (IS_ERR(iod)) { + res = PTR_ERR(iod); + goto out; + } + length = nvme_setup_prps(dev, &c.common, iod, tot_len, + GFP_KERNEL); + if (length != tot_len) { + res = -ENOMEM; + goto out_unmap; + } + + c.dlfw.numd = (tot_len/BYTES_TO_DWORDS) - 1; + c.dlfw.offset = offset/BYTES_TO_DWORDS; + } else if (opcode == nvme_admin_activate_fw) { + c.common.cdw10[0] = buffer_id; + /* AA=01b Replace & activate at reset */ + c.common.cdw10[0] |= 0x00000008; + } + + nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_unmap; + if (nvme_sc) + res = nvme_sc; + + out_unmap: + if (opcode == nvme_admin_download_fw) { + nvme_unmap_user_pages(dev, DMA_TO_DEVICE, iod); + nvme_free_iod(dev, iod); + } + out: + return res; +} + +/* Mode Select Helper Functions */ + +static inline void nvme_trans_modesel_get_bd_len(u8 *parm_list, u8 cdb10, + u16 *bd_len, u8 *llbaa) +{ + if (cdb10) { + /* 10 Byte CDB */ + *bd_len = (parm_list[MODE_SELECT_10_BD_OFFSET] << 8) + + parm_list[MODE_SELECT_10_BD_OFFSET + 1]; + *llbaa = parm_list[MODE_SELECT_10_LLBAA_OFFSET] && + MODE_SELECT_10_LLBAA_MASK; + } else { + /* 6 Byte CDB */ + *bd_len = parm_list[MODE_SELECT_6_BD_OFFSET]; + } +} + +static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list, + u16 idx, u16 bd_len, u8 llbaa) +{ + u16 bd_num; + + bd_num = bd_len / ((llbaa == 0) ? + SHORT_DESC_BLOCK : LONG_DESC_BLOCK); + /* Store block descriptor info if a FORMAT UNIT comes later */ + /* TODO Saving 1st BD info; what to do if multiple BD received? */ + if (llbaa == 0) { + /* Standard Block Descriptor - spc4r34 7.5.5.1 */ + ns->mode_select_num_blocks = + (parm_list[idx + 1] << 16) + + (parm_list[idx + 2] << 8) + + (parm_list[idx + 3]); + + ns->mode_select_block_len = + (parm_list[idx + 5] << 16) + + (parm_list[idx + 6] << 8) + + (parm_list[idx + 7]); + } else { + /* Long LBA Block Descriptor - sbc3r27 6.4.2.3 */ + ns->mode_select_num_blocks = + (((u64)parm_list[idx + 0]) << 56) + + (((u64)parm_list[idx + 1]) << 48) + + (((u64)parm_list[idx + 2]) << 40) + + (((u64)parm_list[idx + 3]) << 32) + + (((u64)parm_list[idx + 4]) << 24) + + (((u64)parm_list[idx + 5]) << 16) + + (((u64)parm_list[idx + 6]) << 8) + + ((u64)parm_list[idx + 7]); + + ns->mode_select_block_len = + (parm_list[idx + 12] << 24) + + (parm_list[idx + 13] << 16) + + (parm_list[idx + 14] << 8) + + (parm_list[idx + 15]); + } +} + +static u16 nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *mode_page, u8 page_code) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + unsigned dword11; + + switch (page_code) { + case MODE_PAGE_CACHING: + dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0); + nvme_sc = nvme_set_features(dev, NVME_FEAT_VOLATILE_WC, dword11, + 0, NULL); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + break; + if (nvme_sc) { + res = nvme_sc; + break; + } + break; + case MODE_PAGE_CONTROL: + break; + case MODE_PAGE_POWER_CONDITION: + /* Verify the OS is not trying to set timers */ + if ((mode_page[2] & 0x01) != 0 || (mode_page[3] & 0x0F) != 0) { + res = nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, + SCSI_ASC_INVALID_PARAMETER, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + if (!res) + res = SNTI_INTERNAL_ERROR; + break; + } + break; + default: + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + if (!res) + res = SNTI_INTERNAL_ERROR; + break; + } + + return res; +} + +static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd, u16 parm_list_len, u8 pf, + u8 sp, u8 cdb10) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u8 *parm_list; + u16 bd_len; + u8 llbaa = 0; + u16 index, saved_index; + u8 page_code; + u16 mp_size; + + /* Get parm list from data-in/out buffer */ + parm_list = kmalloc(parm_list_len, GFP_KERNEL); + if (parm_list == NULL) { + res = -ENOMEM; + goto out; + } + + res = nvme_trans_copy_from_user(hdr, parm_list, parm_list_len); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out_mem; + + nvme_trans_modesel_get_bd_len(parm_list, cdb10, &bd_len, &llbaa); + index = (cdb10) ? (MODE_SELECT_10_MPH_SIZE) : (MODE_SELECT_6_MPH_SIZE); + + if (bd_len != 0) { + /* Block Descriptors present, parse */ + nvme_trans_modesel_save_bd(ns, parm_list, index, bd_len, llbaa); + index += bd_len; + } + saved_index = index; + + /* Multiple mode pages may be present; iterate through all */ + /* In 1st Iteration, don't do NVME Command, only check for CDB errors */ + do { + page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK; + mp_size = parm_list[index + 1] + 2; + if ((page_code != MODE_PAGE_CACHING) && + (page_code != MODE_PAGE_CONTROL) && + (page_code != MODE_PAGE_POWER_CONDITION)) { + res = nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, + SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out_mem; + } + index += mp_size; + } while (index < parm_list_len); + + /* In 2nd Iteration, do the NVME Commands */ + index = saved_index; + do { + page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK; + mp_size = parm_list[index + 1] + 2; + res = nvme_trans_modesel_get_mp(ns, hdr, &parm_list[index], + page_code); + if (res != SNTI_TRANSLATION_SUCCESS) + break; + index += mp_size; + } while (index < parm_list_len); + + out_mem: + kfree(parm_list); + out: + return res; +} + +/* Format Unit Helper Functions */ + +static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, + struct sg_io_hdr *hdr) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ns *id_ns; + u8 flbas; + + /* + * SCSI Expects a MODE SELECT would have been issued prior to + * a FORMAT UNIT, and the block size and number would be used + * from the block descriptor in it. If a MODE SELECT had not + * been issued, FORMAT shall use the current values for both. + */ + + if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) { + mem = dma_alloc_coherent(&dev->pci_dev->dev, + sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out; + } + /* nvme ns identify */ + nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_dma; + if (nvme_sc) { + res = nvme_sc; + goto out_dma; + } + id_ns = mem; + + if (ns->mode_select_num_blocks == 0) + ns->mode_select_num_blocks = id_ns->ncap; + if (ns->mode_select_block_len == 0) { + flbas = (id_ns->flbas) & 0x0F; + ns->mode_select_block_len = + (1 << (id_ns->lbaf[flbas].ds)); + } + out_dma: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + mem, dma_addr); + } + out: + return res; +} + +static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len, + u8 format_prot_info, u8 *nvme_pf_code) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u8 *parm_list; + u8 pf_usage, pf_code; + + parm_list = kmalloc(len, GFP_KERNEL); + if (parm_list == NULL) { + res = -ENOMEM; + goto out; + } + res = nvme_trans_copy_from_user(hdr, parm_list, len); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out_mem; + + if ((parm_list[FORMAT_UNIT_IMMED_OFFSET] & + FORMAT_UNIT_IMMED_MASK) != 0) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out_mem; + } + + if (len == FORMAT_UNIT_LONG_PARM_LIST_LEN && + (parm_list[FORMAT_UNIT_PROT_INT_OFFSET] & 0x0F) != 0) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out_mem; + } + pf_usage = parm_list[FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET] & + FORMAT_UNIT_PROT_FIELD_USAGE_MASK; + pf_code = (pf_usage << 2) | format_prot_info; + switch (pf_code) { + case 0: + *nvme_pf_code = 0; + break; + case 2: + *nvme_pf_code = 1; + break; + case 3: + *nvme_pf_code = 2; + break; + case 7: + *nvme_pf_code = 3; + break; + default: + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + break; + } + + out_mem: + kfree(parm_list); + out: + return res; +} + +static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 prot_info) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ns *id_ns; + u8 i; + u8 flbas, nlbaf; + u8 selected_lbaf = 0xFF; + u32 cdw10 = 0; + struct nvme_command c; + + /* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */ + mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out; + } + /* nvme ns identify */ + nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_dma; + if (nvme_sc) { + res = nvme_sc; + goto out_dma; + } + id_ns = mem; + flbas = (id_ns->flbas) & 0x0F; + nlbaf = id_ns->nlbaf; + + for (i = 0; i < nlbaf; i++) { + if (ns->mode_select_block_len == (1 << (id_ns->lbaf[i].ds))) { + selected_lbaf = i; + break; + } + } + if (selected_lbaf > 0x0F) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + } + if (ns->mode_select_num_blocks != id_ns->ncap) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + } + + cdw10 |= prot_info << 5; + cdw10 |= selected_lbaf & 0x0F; + memset(&c, 0, sizeof(c)); + c.format.opcode = nvme_admin_format_nvm; + c.format.nsid = ns->ns_id; + c.format.cdw10 = cpu_to_le32(cdw10); + + nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_dma; + if (nvme_sc) + res = nvme_sc; + + out_dma: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, + dma_addr); + out: + return res; +} + +/* Read/Write Helper Functions */ + +static inline void nvme_trans_get_io_cdb6(u8 *cmd, + struct nvme_trans_io_cdb *cdb_info) +{ + cdb_info->fua = 0; + cdb_info->prot_info = 0; + cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_6_CDB_LBA_OFFSET) & + IO_6_CDB_LBA_MASK; + cdb_info->xfer_len = GET_U8_FROM_CDB(cmd, IO_6_CDB_TX_LEN_OFFSET); + + /* sbc3r27 sec 5.32 - TRANSFER LEN of 0 implies a 256 Block transfer */ + if (cdb_info->xfer_len == 0) + cdb_info->xfer_len = IO_6_DEFAULT_TX_LEN; +} + +static inline void nvme_trans_get_io_cdb10(u8 *cmd, + struct nvme_trans_io_cdb *cdb_info) +{ + cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_10_CDB_FUA_OFFSET) & + IO_CDB_FUA_MASK; + cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_10_CDB_WP_OFFSET) & + IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; + cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_10_CDB_LBA_OFFSET); + cdb_info->xfer_len = GET_U16_FROM_CDB(cmd, IO_10_CDB_TX_LEN_OFFSET); +} + +static inline void nvme_trans_get_io_cdb12(u8 *cmd, + struct nvme_trans_io_cdb *cdb_info) +{ + cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_12_CDB_FUA_OFFSET) & + IO_CDB_FUA_MASK; + cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_12_CDB_WP_OFFSET) & + IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; + cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_12_CDB_LBA_OFFSET); + cdb_info->xfer_len = GET_U32_FROM_CDB(cmd, IO_12_CDB_TX_LEN_OFFSET); +} + +static inline void nvme_trans_get_io_cdb16(u8 *cmd, + struct nvme_trans_io_cdb *cdb_info) +{ + cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_16_CDB_FUA_OFFSET) & + IO_CDB_FUA_MASK; + cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_16_CDB_WP_OFFSET) & + IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; + cdb_info->lba = GET_U64_FROM_CDB(cmd, IO_16_CDB_LBA_OFFSET); + cdb_info->xfer_len = GET_U32_FROM_CDB(cmd, IO_16_CDB_TX_LEN_OFFSET); +} + +static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr, + struct nvme_trans_io_cdb *cdb_info, + u32 max_blocks) +{ + /* If using iovecs, send one nvme command per vector */ + if (hdr->iovec_count > 0) + return hdr->iovec_count; + else if (cdb_info->xfer_len > max_blocks) + return ((cdb_info->xfer_len - 1) / max_blocks) + 1; + else + return 1; +} + +static u16 nvme_trans_io_get_control(struct nvme_ns *ns, + struct nvme_trans_io_cdb *cdb_info) +{ + u16 control = 0; + + /* When Protection information support is added, implement here */ + + if (cdb_info->fua > 0) + control |= NVME_RW_FUA; + + return control; +} + +static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, + struct nvme_trans_io_cdb *cdb_info, u8 is_write) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_dev *dev = ns->dev; + struct nvme_queue *nvmeq = get_nvmeq(ns->dev); + u32 num_cmds; + struct nvme_iod *iod; + u64 unit_len; + u64 unit_num_blocks; /* Number of blocks to xfer in each nvme cmd */ + u32 retcode; + u32 i = 0; + u64 nvme_offset = 0; + void *next_mapping_addr; + struct nvme_command c; + u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read); + u16 control; + u32 max_blocks = (dev->max_hw_sectors << 9) >> ns->lba_shift; + + num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks); + + /* + * This loop handles two cases. + * First, when an SGL is used in the form of an iovec list: + * - Use iov_base as the next mapping address for the nvme command_id + * - Use iov_len as the data transfer length for the command. + * Second, when we have a single buffer + * - If larger than max_blocks, split into chunks, offset + * each nvme command accordingly. + */ + for (i = 0; i < num_cmds; i++) { + memset(&c, 0, sizeof(c)); + if (hdr->iovec_count > 0) { + struct sg_iovec *sgl = hdr->dxferp; + + unit_len = sgl[i].iov_len; + unit_num_blocks = unit_len >> ns->lba_shift; + next_mapping_addr = sgl[i].iov_base; + } else { + unit_num_blocks = min((u64)max_blocks, + (cdb_info->xfer_len - nvme_offset)); + unit_len = unit_num_blocks << ns->lba_shift; + next_mapping_addr = hdr->dxferp + + ((1 << ns->lba_shift) * nvme_offset); + } + + c.rw.opcode = opcode; + c.rw.nsid = cpu_to_le32(ns->ns_id); + c.rw.slba = cpu_to_le64(cdb_info->lba + nvme_offset); + c.rw.length = cpu_to_le16(unit_num_blocks - 1); + control = nvme_trans_io_get_control(ns, cdb_info); + c.rw.control = cpu_to_le16(control); + + iod = nvme_map_user_pages(dev, + (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, + (unsigned long)next_mapping_addr, unit_len); + if (IS_ERR(iod)) { + res = PTR_ERR(iod); + goto out; + } + retcode = nvme_setup_prps(dev, &c.common, iod, unit_len, + GFP_KERNEL); + if (retcode != unit_len) { + nvme_unmap_user_pages(dev, + (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, + iod); + nvme_free_iod(dev, iod); + res = -ENOMEM; + goto out; + } + + nvme_offset += unit_num_blocks; + + nvmeq = get_nvmeq(dev); + /* + * Since nvme_submit_sync_cmd sleeps, we can't keep + * preemption disabled. We may be preempted at any + * point, and be rescheduled to a different CPU. That + * will cause cacheline bouncing, but no additional + * races since q_lock already protects against other + * CPUs. + */ + put_nvmeq(nvmeq); + nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, + NVME_IO_TIMEOUT); + if (nvme_sc != NVME_SC_SUCCESS) { + nvme_unmap_user_pages(dev, + (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, + iod); + nvme_free_iod(dev, iod); + res = nvme_trans_status_code(hdr, nvme_sc); + goto out; + } + nvme_unmap_user_pages(dev, + (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, + iod); + nvme_free_iod(dev, iod); + } + res = nvme_trans_status_code(hdr, NVME_SC_SUCCESS); + + out: + return res; +} + + +/* SCSI Command Translation Functions */ + +static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + struct nvme_trans_io_cdb cdb_info; + u8 opcode = cmd[0]; + u64 xfer_bytes; + u64 sum_iov_len = 0; + struct sg_iovec *sgl; + int i; + + /* Extract Fields from CDB */ + switch (opcode) { + case WRITE_6: + case READ_6: + nvme_trans_get_io_cdb6(cmd, &cdb_info); + break; + case WRITE_10: + case READ_10: + nvme_trans_get_io_cdb10(cmd, &cdb_info); + break; + case WRITE_12: + case READ_12: + nvme_trans_get_io_cdb12(cmd, &cdb_info); + break; + case WRITE_16: + case READ_16: + nvme_trans_get_io_cdb16(cmd, &cdb_info); + break; + default: + /* Will never really reach here */ + res = SNTI_INTERNAL_ERROR; + goto out; + } + + /* Calculate total length of transfer (in bytes) */ + if (hdr->iovec_count > 0) { + sgl = hdr->dxferp; + for (i = 0; i < hdr->iovec_count; i++) { + sum_iov_len += sgl[i].iov_len; + /* IO vector sizes should be multiples of block size */ + if (sgl[i].iov_len % (1 << ns->lba_shift) != 0) { + res = nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, + SCSI_ASC_INVALID_PARAMETER, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } + } + } else { + sum_iov_len = hdr->dxfer_len; + } + + /* As Per sg ioctl howto, if the lengths differ, use the lower one */ + xfer_bytes = min(((u64)hdr->dxfer_len), sum_iov_len); + + /* If block count and actual data buffer size dont match, error out */ + if (xfer_bytes != (cdb_info.xfer_len << ns->lba_shift)) { + res = -EINVAL; + goto out; + } + + /* Check for 0 length transfer - it is not illegal */ + if (cdb_info.xfer_len == 0) + goto out; + + /* Send NVMe IO Command(s) */ + res = nvme_trans_do_nvme_io(ns, hdr, &cdb_info, is_write); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out; + + out: + return res; +} + +static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u8 evpd; + u8 page_code; + int alloc_len; + u8 *inq_response; + + evpd = GET_INQ_EVPD_BIT(cmd); + page_code = GET_INQ_PAGE_CODE(cmd); + alloc_len = GET_INQ_ALLOC_LENGTH(cmd); + + inq_response = kmalloc(STANDARD_INQUIRY_LENGTH, GFP_KERNEL); + if (inq_response == NULL) { + res = -ENOMEM; + goto out_mem; + } + + if (evpd == 0) { + if (page_code == INQ_STANDARD_INQUIRY_PAGE) { + res = nvme_trans_standard_inquiry_page(ns, hdr, + inq_response, alloc_len); + } else { + res = nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, + SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + } + } else { + switch (page_code) { + case VPD_SUPPORTED_PAGES: + res = nvme_trans_supported_vpd_pages(ns, hdr, + inq_response, alloc_len); + break; + case VPD_SERIAL_NUMBER: + res = nvme_trans_unit_serial_page(ns, hdr, inq_response, + alloc_len); + break; + case VPD_DEVICE_IDENTIFIERS: + res = nvme_trans_device_id_page(ns, hdr, inq_response, + alloc_len); + break; + case VPD_EXTENDED_INQUIRY: + res = nvme_trans_ext_inq_page(ns, hdr, alloc_len); + break; + case VPD_BLOCK_DEV_CHARACTERISTICS: + res = nvme_trans_bdev_char_page(ns, hdr, alloc_len); + break; + default: + res = nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, + SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + break; + } + } + kfree(inq_response); + out_mem: + return res; +} + +static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u16 alloc_len; + u8 sp; + u8 pc; + u8 page_code; + + sp = GET_U8_FROM_CDB(cmd, LOG_SENSE_CDB_SP_OFFSET); + if (sp != LOG_SENSE_CDB_SP_NOT_ENABLED) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } + pc = GET_U8_FROM_CDB(cmd, LOG_SENSE_CDB_PC_OFFSET); + page_code = pc & LOG_SENSE_CDB_PAGE_CODE_MASK; + pc = (pc & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT; + if (pc != LOG_SENSE_CDB_PC_CUMULATIVE_VALUES) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } + alloc_len = GET_U16_FROM_CDB(cmd, LOG_SENSE_CDB_ALLOC_LENGTH_OFFSET); + switch (page_code) { + case LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE: + res = nvme_trans_log_supp_pages(ns, hdr, alloc_len); + break; + case LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE: + res = nvme_trans_log_info_exceptions(ns, hdr, alloc_len); + break; + case LOG_PAGE_TEMPERATURE_PAGE: + res = nvme_trans_log_temperature(ns, hdr, alloc_len); + break; + default: + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + break; + } + + out: + return res; +} + +static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u8 cdb10 = 0; + u16 parm_list_len; + u8 page_format; + u8 save_pages; + + page_format = GET_U8_FROM_CDB(cmd, MODE_SELECT_CDB_PAGE_FORMAT_OFFSET); + page_format &= MODE_SELECT_CDB_PAGE_FORMAT_MASK; + + save_pages = GET_U8_FROM_CDB(cmd, MODE_SELECT_CDB_SAVE_PAGES_OFFSET); + save_pages &= MODE_SELECT_CDB_SAVE_PAGES_MASK; + + if (GET_OPCODE(cmd) == MODE_SELECT) { + parm_list_len = GET_U8_FROM_CDB(cmd, + MODE_SELECT_6_CDB_PARAM_LIST_LENGTH_OFFSET); + } else { + parm_list_len = GET_U16_FROM_CDB(cmd, + MODE_SELECT_10_CDB_PARAM_LIST_LENGTH_OFFSET); + cdb10 = 1; + } + + if (parm_list_len != 0) { + /* + * According to SPC-4 r24, a paramter list length field of 0 + * shall not be considered an error + */ + res = nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len, + page_format, save_pages, cdb10); + } + + return res; +} + +static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u16 alloc_len; + u8 cdb10 = 0; + u8 page_code; + u8 pc; + + if (GET_OPCODE(cmd) == MODE_SENSE) { + alloc_len = GET_U8_FROM_CDB(cmd, MODE_SENSE6_ALLOC_LEN_OFFSET); + } else { + alloc_len = GET_U16_FROM_CDB(cmd, + MODE_SENSE10_ALLOC_LEN_OFFSET); + cdb10 = 1; + } + + pc = GET_U8_FROM_CDB(cmd, MODE_SENSE_PAGE_CONTROL_OFFSET) & + MODE_SENSE_PAGE_CONTROL_MASK; + if (pc != MODE_SENSE_PC_CURRENT_VALUES) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } + + page_code = GET_U8_FROM_CDB(cmd, MODE_SENSE_PAGE_CODE_OFFSET) & + MODE_SENSE_PAGE_CODE_MASK; + switch (page_code) { + case MODE_PAGE_CACHING: + res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len, + cdb10, + &nvme_trans_fill_caching_page, + MODE_PAGE_CACHING_LEN); + break; + case MODE_PAGE_CONTROL: + res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len, + cdb10, + &nvme_trans_fill_control_page, + MODE_PAGE_CONTROL_LEN); + break; + case MODE_PAGE_POWER_CONDITION: + res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len, + cdb10, + &nvme_trans_fill_pow_cnd_page, + MODE_PAGE_POW_CND_LEN); + break; + case MODE_PAGE_INFO_EXCEP: + res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len, + cdb10, + &nvme_trans_fill_inf_exc_page, + MODE_PAGE_INF_EXC_LEN); + break; + case MODE_PAGE_RETURN_ALL: + res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len, + cdb10, + &nvme_trans_fill_all_pages, + MODE_PAGE_ALL_LEN); + break; + default: + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + break; + } + + out: + return res; +} + +static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + u32 alloc_len = READ_CAP_10_RESP_SIZE; + u32 resp_size = READ_CAP_10_RESP_SIZE; + u32 xfer_len; + u8 cdb16; + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ns *id_ns; + u8 *response; + + cdb16 = IS_READ_CAP_16(cmd); + if (cdb16) { + alloc_len = GET_READ_CAP_16_ALLOC_LENGTH(cmd); + resp_size = READ_CAP_16_RESP_SIZE; + } + + mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out; + } + /* nvme ns identify */ + nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_dma; + if (nvme_sc) { + res = nvme_sc; + goto out_dma; + } + id_ns = mem; + + response = kmalloc(resp_size, GFP_KERNEL); + if (response == NULL) { + res = -ENOMEM; + goto out_dma; + } + memset(response, 0, resp_size); + nvme_trans_fill_read_cap(response, id_ns, cdb16); + + xfer_len = min(alloc_len, resp_size); + res = nvme_trans_copy_to_user(hdr, response, xfer_len); + + kfree(response); + out_dma: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, + dma_addr); + out: + return res; +} + +static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + u32 alloc_len, xfer_len, resp_size; + u8 select_report; + u8 *response; + struct nvme_dev *dev = ns->dev; + dma_addr_t dma_addr; + void *mem; + struct nvme_id_ctrl *id_ctrl; + u32 ll_length, lun_id; + u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET; + u32 tmp_len; + + alloc_len = GET_REPORT_LUNS_ALLOC_LENGTH(cmd); + select_report = GET_U8_FROM_CDB(cmd, REPORT_LUNS_SR_OFFSET); + + if ((select_report != ALL_LUNS_RETURNED) && + (select_report != ALL_WELL_KNOWN_LUNS_RETURNED) && + (select_report != RESTRICTED_LUNS_RETURNED)) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } else { + /* NVMe Controller Identify */ + mem = dma_alloc_coherent(&dev->pci_dev->dev, + sizeof(struct nvme_id_ctrl), + &dma_addr, GFP_KERNEL); + if (mem == NULL) { + res = -ENOMEM; + goto out; + } + nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out_dma; + if (nvme_sc) { + res = nvme_sc; + goto out_dma; + } + id_ctrl = mem; + ll_length = id_ctrl->nn * LUN_ENTRY_SIZE; + resp_size = ll_length + LUN_DATA_HEADER_SIZE; + + if (alloc_len < resp_size) { + res = nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out_dma; + } + + response = kmalloc(resp_size, GFP_KERNEL); + if (response == NULL) { + res = -ENOMEM; + goto out_dma; + } + memset(response, 0, resp_size); + + /* The first LUN ID will always be 0 per the SAM spec */ + for (lun_id = 0; lun_id < id_ctrl->nn; lun_id++) { + /* + * Set the LUN Id and then increment to the next LUN + * location in the parameter data. + */ + u64 tmp_id = cpu_to_be64(lun_id); + memcpy(&response[lun_id_offset], &tmp_id, sizeof(u64)); + lun_id_offset += LUN_ENTRY_SIZE; + } + tmp_len = cpu_to_be32(ll_length); + memcpy(response, &tmp_len, sizeof(u32)); + } + + xfer_len = min(alloc_len, resp_size); + res = nvme_trans_copy_to_user(hdr, response, xfer_len); + + kfree(response); + out_dma: + dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem, + dma_addr); + out: + return res; +} + +static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u8 alloc_len, xfer_len, resp_size; + u8 desc_format; + u8 *response; + + alloc_len = GET_REQUEST_SENSE_ALLOC_LENGTH(cmd); + desc_format = GET_U8_FROM_CDB(cmd, REQUEST_SENSE_DESC_OFFSET); + desc_format &= REQUEST_SENSE_DESC_MASK; + + resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) : + (FIXED_FMT_SENSE_DATA_SIZE)); + response = kmalloc(resp_size, GFP_KERNEL); + if (response == NULL) { + res = -ENOMEM; + goto out; + } + memset(response, 0, resp_size); + + if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) { + /* Descriptor Format Sense Data */ + response[0] = DESC_FORMAT_SENSE_DATA; + response[1] = NO_SENSE; + /* TODO How is LOW POWER CONDITION ON handled? (byte 2) */ + response[2] = SCSI_ASC_NO_SENSE; + response[3] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + /* SDAT_OVFL = 0 | Additional Sense Length = 0 */ + } else { + /* Fixed Format Sense Data */ + response[0] = FIXED_SENSE_DATA; + /* Byte 1 = Obsolete */ + response[2] = NO_SENSE; /* FM, EOM, ILI, SDAT_OVFL = 0 */ + /* Bytes 3-6 - Information - set to zero */ + response[7] = FIXED_SENSE_DATA_ADD_LENGTH; + /* Bytes 8-11 - Cmd Specific Information - set to zero */ + response[12] = SCSI_ASC_NO_SENSE; + response[13] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; + /* Byte 14 = Field Replaceable Unit Code = 0 */ + /* Bytes 15-17 - SKSV=0; Sense Key Specific = 0 */ + } + + xfer_len = min(alloc_len, resp_size); + res = nvme_trans_copy_to_user(hdr, response, xfer_len); + + kfree(response); + out: + return res; +} + +static int nvme_trans_security_protocol(struct nvme_ns *ns, + struct sg_io_hdr *hdr, + u8 *cmd) +{ + return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); +} + +static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_queue *nvmeq = get_nvmeq(ns->dev); + u8 immed, pcmod, pc, no_flush, start; + + immed = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_IMMED_OFFSET); + pcmod = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_POWER_COND_MOD_OFFSET); + pc = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_POWER_COND_OFFSET); + no_flush = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_NO_FLUSH_OFFSET); + start = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_START_OFFSET); + + immed &= START_STOP_UNIT_CDB_IMMED_MASK; + pcmod &= START_STOP_UNIT_CDB_POWER_COND_MOD_MASK; + pc = (pc & START_STOP_UNIT_CDB_POWER_COND_MASK) >> NIBBLE_SHIFT; + no_flush &= START_STOP_UNIT_CDB_NO_FLUSH_MASK; + start &= START_STOP_UNIT_CDB_START_MASK; + + if (immed != 0) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + } else { + if (no_flush == 0) { + /* Issue NVME FLUSH command prior to START STOP UNIT */ + nvme_sc = nvme_submit_flush_data(nvmeq, ns); + put_nvmeq(nvmeq); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out; + if (nvme_sc) { + res = nvme_sc; + goto out; + } + } + /* Setup the expected power state transition */ + res = nvme_trans_power_state(ns, hdr, pc, pcmod, start); + } + + out: + return res; +} + +static int nvme_trans_synchronize_cache(struct nvme_ns *ns, + struct sg_io_hdr *hdr, u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + int nvme_sc; + struct nvme_queue *nvmeq = get_nvmeq(ns->dev); + put_nvmeq(nvmeq); + nvme_sc = nvme_submit_flush_data(nvmeq, ns); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + goto out; + if (nvme_sc) + res = nvme_sc; + + out: + return res; +} + +static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u8 parm_hdr_len = 0; + u8 nvme_pf_code = 0; + u8 format_prot_info, long_list, format_data; + + format_prot_info = GET_U8_FROM_CDB(cmd, + FORMAT_UNIT_CDB_FORMAT_PROT_INFO_OFFSET); + long_list = GET_U8_FROM_CDB(cmd, FORMAT_UNIT_CDB_LONG_LIST_OFFSET); + format_data = GET_U8_FROM_CDB(cmd, FORMAT_UNIT_CDB_FORMAT_DATA_OFFSET); + + format_prot_info = (format_prot_info & + FORMAT_UNIT_CDB_FORMAT_PROT_INFO_MASK) >> + FORMAT_UNIT_CDB_FORMAT_PROT_INFO_SHIFT; + long_list &= FORMAT_UNIT_CDB_LONG_LIST_MASK; + format_data &= FORMAT_UNIT_CDB_FORMAT_DATA_MASK; + + if (format_data != 0) { + if (format_prot_info != 0) { + if (long_list == 0) + parm_hdr_len = FORMAT_UNIT_SHORT_PARM_LIST_LEN; + else + parm_hdr_len = FORMAT_UNIT_LONG_PARM_LIST_LEN; + } + } else if (format_data == 0 && format_prot_info != 0) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } + + /* Get parm header from data-in/out buffer */ + /* + * According to the translation spec, the only fields in the parameter + * list we are concerned with are in the header. So allocate only that. + */ + if (parm_hdr_len > 0) { + res = nvme_trans_fmt_get_parm_header(hdr, parm_hdr_len, + format_prot_info, &nvme_pf_code); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out; + } + + /* Attempt to activate any previously downloaded firmware image */ + res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw, 0, 0, 0); + + /* Determine Block size and count and send format command */ + res = nvme_trans_fmt_set_blk_size_count(ns, hdr); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out; + + res = nvme_trans_fmt_send_cmd(ns, hdr, nvme_pf_code); + + out: + return res; +} + +static int nvme_trans_test_unit_ready(struct nvme_ns *ns, + struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + struct nvme_dev *dev = ns->dev; + + if (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + NOT_READY, SCSI_ASC_LUN_NOT_READY, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + else + res = nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0); + + return res; +} + +static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + int res = SNTI_TRANSLATION_SUCCESS; + u32 buffer_offset, parm_list_length; + u8 buffer_id, mode; + + parm_list_length = + GET_U24_FROM_CDB(cmd, WRITE_BUFFER_CDB_PARM_LIST_LENGTH_OFFSET); + if (parm_list_length % BYTES_TO_DWORDS != 0) { + /* NVMe expects Firmware file to be a whole number of DWORDS */ + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } + buffer_id = GET_U8_FROM_CDB(cmd, WRITE_BUFFER_CDB_BUFFER_ID_OFFSET); + if (buffer_id > NVME_MAX_FIRMWARE_SLOT) { + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + goto out; + } + mode = GET_U8_FROM_CDB(cmd, WRITE_BUFFER_CDB_MODE_OFFSET) & + WRITE_BUFFER_CDB_MODE_MASK; + buffer_offset = + GET_U24_FROM_CDB(cmd, WRITE_BUFFER_CDB_BUFFER_OFFSET_OFFSET); + + switch (mode) { + case DOWNLOAD_SAVE_ACTIVATE: + res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_download_fw, + parm_list_length, buffer_offset, + buffer_id); + if (res != SNTI_TRANSLATION_SUCCESS) + goto out; + res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw, + parm_list_length, buffer_offset, + buffer_id); + break; + case DOWNLOAD_SAVE_DEFER_ACTIVATE: + res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_download_fw, + parm_list_length, buffer_offset, + buffer_id); + break; + case ACTIVATE_DEFERRED_MICROCODE: + res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw, + parm_list_length, buffer_offset, + buffer_id); + break; + default: + res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + break; + } + + out: + return res; +} + +static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) +{ + u8 cmd[BLK_MAX_CDB]; + int retcode; + unsigned int opcode; + + if (hdr->cmdp == NULL) + return -EMSGSIZE; + if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len)) + return -EFAULT; + + opcode = cmd[0]; + + switch (opcode) { + case READ_6: + case READ_10: + case READ_12: + case READ_16: + retcode = nvme_trans_io(ns, hdr, 0, cmd); + break; + case WRITE_6: + case WRITE_10: + case WRITE_12: + case WRITE_16: + retcode = nvme_trans_io(ns, hdr, 1, cmd); + break; + case INQUIRY: + retcode = nvme_trans_inquiry(ns, hdr, cmd); + break; + case LOG_SENSE: + retcode = nvme_trans_log_sense(ns, hdr, cmd); + break; + case MODE_SELECT: + case MODE_SELECT_10: + retcode = nvme_trans_mode_select(ns, hdr, cmd); + break; + case MODE_SENSE: + case MODE_SENSE_10: + retcode = nvme_trans_mode_sense(ns, hdr, cmd); + break; + case READ_CAPACITY: + retcode = nvme_trans_read_capacity(ns, hdr, cmd); + break; + case SERVICE_ACTION_IN: + if (IS_READ_CAP_16(cmd)) + retcode = nvme_trans_read_capacity(ns, hdr, cmd); + else + goto out; + break; + case REPORT_LUNS: + retcode = nvme_trans_report_luns(ns, hdr, cmd); + break; + case REQUEST_SENSE: + retcode = nvme_trans_request_sense(ns, hdr, cmd); + break; + case SECURITY_PROTOCOL_IN: + case SECURITY_PROTOCOL_OUT: + retcode = nvme_trans_security_protocol(ns, hdr, cmd); + break; + case START_STOP: + retcode = nvme_trans_start_stop(ns, hdr, cmd); + break; + case SYNCHRONIZE_CACHE: + retcode = nvme_trans_synchronize_cache(ns, hdr, cmd); + break; + case FORMAT_UNIT: + retcode = nvme_trans_format_unit(ns, hdr, cmd); + break; + case TEST_UNIT_READY: + retcode = nvme_trans_test_unit_ready(ns, hdr, cmd); + break; + case WRITE_BUFFER: + retcode = nvme_trans_write_buffer(ns, hdr, cmd); + break; + default: + out: + retcode = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + break; + } + return retcode; +} + +int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr) +{ + struct sg_io_hdr hdr; + int retcode; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (copy_from_user(&hdr, u_hdr, sizeof(hdr))) + return -EFAULT; + if (hdr.interface_id != 'S') + return -EINVAL; + if (hdr.cmd_len > BLK_MAX_CDB) + return -EINVAL; + + retcode = nvme_scsi_translate(ns, &hdr); + if (retcode < 0) + return retcode; + if (retcode > 0) + retcode = SNTI_TRANSLATION_SUCCESS; + if (copy_to_user(__user u_hdr, &hdr, sizeof(sg_io_hdr_t)) > 0) + return -EFAULT; + + return retcode; +} + +int nvme_sg_get_version_num(int __user *ip) +{ + return put_user(sg_version_num, ip); +} diff --git a/include/linux/nvme.h b/include/linux/nvme.h index f1974cab60cf..aa575033dbe7 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -546,6 +546,8 @@ struct nvme_ns { int ns_id; int lba_shift; + u64 mode_select_num_blocks; + u32 mode_select_block_len; }; /* @@ -563,6 +565,39 @@ struct nvme_iod { dma_addr_t first_dma; struct scatterlist sg[0]; }; + +/** + * nvme_free_iod - frees an nvme_iod + * @dev: The device that the I/O was submitted to + * @iod: The memory to free + */ +void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod); + +int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd, + struct nvme_iod *iod, int total_len, gfp_t gfp); +struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, + unsigned long addr, unsigned length); +void nvme_unmap_user_pages(struct nvme_dev *dev, int write, + struct nvme_iod *iod); +struct nvme_queue *get_nvmeq(struct nvme_dev *dev); +void put_nvmeq(struct nvme_queue *nvmeq); +int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, + u32 *result, unsigned timeout); +int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns); +int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *, + u32 *result); +int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns, + dma_addr_t dma_addr); +int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, + dma_addr_t dma_addr, u32 *result); +int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, + dma_addr_t dma_addr, u32 *result); + +struct sg_io_hdr; + +int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr); +int nvme_sg_get_version_num(int __user *ip); + #endif #endif /* _LINUX_NVME_H */ -- GitLab From 169ef1cf6171d35550fef85645b83b960e241cff Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Mar 2013 12:50:55 -0600 Subject: [PATCH 0262/3163] bcache: Don't export utility code, prefix with bch_ Signed-off-by: Kent Overstreet Cc: linux-bcache@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/bcache.h | 2 +- drivers/md/bcache/bset.c | 4 +-- drivers/md/bcache/btree.c | 18 ++++++------ drivers/md/bcache/debug.c | 2 +- drivers/md/bcache/journal.c | 4 +-- drivers/md/bcache/movinggc.c | 4 +-- drivers/md/bcache/request.c | 14 ++++----- drivers/md/bcache/super.c | 18 ++++++------ drivers/md/bcache/sysfs.c | 24 ++++++++-------- drivers/md/bcache/sysfs.h | 2 +- drivers/md/bcache/util.c | 38 +++++++++--------------- drivers/md/bcache/util.h | 54 +++++++++++++++++------------------ drivers/md/bcache/writeback.c | 6 ++-- 13 files changed, 89 insertions(+), 101 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index b2846e70149b..f05723565f17 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -1033,7 +1033,7 @@ static inline void bkey_init(struct bkey *k) * jset: The checksum is _always_ the first 8 bytes of these structs */ #define csum_set(i) \ - crc64(((void *) (i)) + sizeof(uint64_t), \ + bch_crc64(((void *) (i)) + sizeof(uint64_t), \ ((void *) end(i)) - (((void *) (i)) + sizeof(uint64_t))) /* Error handling macros */ diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 4dc9cb4efacb..0b33aac1f146 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -1026,7 +1026,7 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, if (!start) { spin_lock(&b->c->sort_time_lock); - time_stats_update(&b->c->sort_time, start_time); + bch_time_stats_update(&b->c->sort_time, start_time); spin_unlock(&b->c->sort_time_lock); } } @@ -1076,7 +1076,7 @@ void bch_btree_sort_into(struct btree *b, struct btree *new) btree_mergesort(b, new->sets->data, &iter, false, true); spin_lock(&b->c->sort_time_lock); - time_stats_update(&b->c->sort_time, start_time); + bch_time_stats_update(&b->c->sort_time, start_time); spin_unlock(&b->c->sort_time_lock); bkey_copy_key(&new->key, &b->key); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 24b678059091..f2b2c653c5a5 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -129,7 +129,7 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i) uint64_t crc = b->key.ptr[0]; void *data = (void *) i + 8, *end = end(i); - crc = crc64_update(crc, data, end - data); + crc = bch_crc64_update(crc, data, end - data); return crc ^ 0xffffffffffffffff; } @@ -231,7 +231,7 @@ void bch_btree_read_done(struct closure *cl) mutex_unlock(&b->c->fill_lock); spin_lock(&b->c->btree_read_time_lock); - time_stats_update(&b->c->btree_read_time, b->io_start_time); + bch_time_stats_update(&b->c->btree_read_time, b->io_start_time); spin_unlock(&b->c->btree_read_time_lock); smp_wmb(); /* read_done is our write lock */ @@ -259,7 +259,7 @@ void bch_btree_read(struct btree *b) b->bio->bi_rw = REQ_META|READ_SYNC; b->bio->bi_size = KEY_SIZE(&b->key) << 9; - bio_map(b->bio, b->sets[0].data); + bch_bio_map(b->bio, b->sets[0].data); pr_debug("%s", pbtree(b)); trace_bcache_btree_read(b->bio); @@ -327,12 +327,12 @@ static void do_btree_write(struct btree *b) btree_bio_init(b); b->bio->bi_rw = REQ_META|WRITE_SYNC; b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); - bio_map(b->bio, i); + bch_bio_map(b->bio, i); bkey_copy(&k.key, &b->key); SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); - if (!bio_alloc_pages(b->bio, GFP_NOIO)) { + if (!bch_bio_alloc_pages(b->bio, GFP_NOIO)) { int j; struct bio_vec *bv; void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); @@ -347,7 +347,7 @@ static void do_btree_write(struct btree *b) continue_at(cl, btree_write_done, NULL); } else { b->bio->bi_vcnt = 0; - bio_map(b->bio, i); + bch_bio_map(b->bio, i); trace_bcache_btree_write(b->bio); bch_submit_bbio(b->bio, b->c, &k.key, 0); @@ -815,7 +815,7 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k, void bch_cannibalize_unlock(struct cache_set *c, struct closure *cl) { if (c->try_harder == cl) { - time_stats_update(&c->try_harder_time, c->try_harder_start); + bch_time_stats_update(&c->try_harder_time, c->try_harder_start); c->try_harder = NULL; __closure_wake_up(&c->try_wait); } @@ -1536,7 +1536,7 @@ static void bch_btree_gc(struct closure *cl) available = bch_btree_gc_finish(c); - time_stats_update(&c->btree_gc_time, start_time); + bch_time_stats_update(&c->btree_gc_time, start_time); stats.key_bytes *= sizeof(uint64_t); stats.dirty <<= 9; @@ -2007,7 +2007,7 @@ static int btree_split(struct btree *b, struct btree_op *op) rw_unlock(true, n1); btree_node_free(b, op); - time_stats_update(&b->c->btree_split_time, start_time); + bch_time_stats_update(&b->c->btree_split_time, start_time); return 0; err_free2: diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 141a5cac11ad..732234d9ec04 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -200,7 +200,7 @@ void bch_data_verify(struct search *s) if (!check) return; - if (bio_alloc_pages(check, GFP_NOIO)) + if (bch_bio_alloc_pages(check, GFP_NOIO)) goto out_put; check->bi_rw = READ_SYNC; diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 21fd1010cf5d..b0a3d0577d13 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -54,7 +54,7 @@ reread: left = ca->sb.bucket_size - offset; bio->bi_end_io = journal_read_endio; bio->bi_private = &op->cl; - bio_map(bio, data); + bch_bio_map(bio, data); closure_bio_submit(bio, &op->cl, ca); closure_sync(&op->cl); @@ -621,7 +621,7 @@ static void journal_write_unlocked(struct closure *cl) bio->bi_end_io = journal_write_endio; bio->bi_private = w; - bio_map(bio, w->data); + bch_bio_map(bio, w->data); trace_bcache_journal_write(bio); bio_list_add(&list, bio); diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index e3ec0a550b00..8589512c972e 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -85,7 +85,7 @@ static void moving_init(struct moving_io *io) PAGE_SECTORS); bio->bi_private = &io->s.cl; bio->bi_io_vec = bio->bi_inline_vecs; - bio_map(bio, NULL); + bch_bio_map(bio, NULL); } static void write_moving(struct closure *cl) @@ -159,7 +159,7 @@ static void read_moving(struct closure *cl) bio->bi_rw = READ; bio->bi_end_io = read_moving_endio; - if (bio_alloc_pages(bio, GFP_KERNEL)) + if (bch_bio_alloc_pages(bio, GFP_KERNEL)) goto err; pr_debug("%s", pkey(&w->key)); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index dbda9646ef38..83731dc36f34 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -58,8 +58,8 @@ static ssize_t cache_mode_read(struct cgroup *cgrp, struct cftype *cft, char __user *buf, size_t nbytes, loff_t *ppos) { char tmp[1024]; - int len = snprint_string_list(tmp, PAGE_SIZE, bch_cache_modes, - cgroup_to_bcache(cgrp)->cache_mode + 1); + int len = bch_snprint_string_list(tmp, PAGE_SIZE, bch_cache_modes, + cgroup_to_bcache(cgrp)->cache_mode + 1); if (len < 0) return len; @@ -70,7 +70,7 @@ static ssize_t cache_mode_read(struct cgroup *cgrp, struct cftype *cft, static int cache_mode_write(struct cgroup *cgrp, struct cftype *cft, const char *buf) { - int v = read_string_list(buf, bch_cache_modes); + int v = bch_read_string_list(buf, bch_cache_modes); if (v < 0) return v; @@ -205,7 +205,7 @@ static void bio_csum(struct bio *bio, struct bkey *k) bio_for_each_segment(bv, bio, i) { void *d = kmap(bv->bv_page) + bv->bv_offset; - csum = crc64_update(csum, d, bv->bv_len); + csum = bch_crc64_update(csum, d, bv->bv_len); kunmap(bv->bv_page); } @@ -835,7 +835,7 @@ static void request_read_done(struct closure *cl) s->op.cache_bio->bi_sector = s->cache_miss->bi_sector; s->op.cache_bio->bi_bdev = s->cache_miss->bi_bdev; s->op.cache_bio->bi_size = s->cache_bio_sectors << 9; - bio_map(s->op.cache_bio, NULL); + bch_bio_map(s->op.cache_bio, NULL); src = bio_iovec(s->op.cache_bio); dst = bio_iovec(s->cache_miss); @@ -962,8 +962,8 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, if (!bch_btree_insert_check_key(b, &s->op, s->op.cache_bio)) goto out_put; - bio_map(s->op.cache_bio, NULL); - if (bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO)) + bch_bio_map(s->op.cache_bio, NULL); + if (bch_bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO)) goto out_put; s->cache_miss = miss; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 484ae6c8f43a..f47ecb5cb318 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -142,7 +142,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, goto err; err = "Bad UUID"; - if (is_zero(sb->uuid, 16)) + if (bch_is_zero(sb->uuid, 16)) goto err; err = "Unsupported superblock version"; @@ -170,7 +170,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, goto out; err = "Bad UUID"; - if (is_zero(sb->set_uuid, 16)) + if (bch_is_zero(sb->set_uuid, 16)) goto err; err = "Bad cache device number in set"; @@ -218,7 +218,7 @@ static void __write_super(struct cache_sb *sb, struct bio *bio) bio->bi_sector = SB_SECTOR; bio->bi_rw = REQ_SYNC|REQ_META; bio->bi_size = SB_SIZE; - bio_map(bio, NULL); + bch_bio_map(bio, NULL); out->offset = cpu_to_le64(sb->offset); out->version = cpu_to_le64(sb->version); @@ -332,7 +332,7 @@ static void uuid_io(struct cache_set *c, unsigned long rw, bio->bi_end_io = uuid_endio; bio->bi_private = cl; - bio_map(bio, c->uuids); + bch_bio_map(bio, c->uuids); bch_submit_bbio(bio, c, k, i); @@ -344,7 +344,7 @@ static void uuid_io(struct cache_set *c, unsigned long rw, pkey(&c->uuid_bucket)); for (u = c->uuids; u < c->uuids + c->nr_uuids; u++) - if (!is_zero(u->uuid, 16)) + if (!bch_is_zero(u->uuid, 16)) pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u", u - c->uuids, u->uuid, u->label, u->first_reg, u->last_reg, u->invalidated); @@ -491,7 +491,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw) bio->bi_end_io = prio_endio; bio->bi_private = ca; - bio_map(bio, ca->disk_buckets); + bch_bio_map(bio, ca->disk_buckets); closure_bio_submit(bio, &ca->prio, ca); closure_sync(cl); @@ -538,7 +538,7 @@ void bch_prio_write(struct cache *ca) p->next_bucket = ca->prio_buckets[i + 1]; p->magic = pset_magic(ca); - p->csum = crc64(&p->magic, bucket_bytes(ca) - 8); + p->csum = bch_crc64(&p->magic, bucket_bytes(ca) - 8); bucket = bch_bucket_alloc(ca, WATERMARK_PRIO, &cl); BUG_ON(bucket == -1); @@ -585,7 +585,7 @@ static void prio_read(struct cache *ca, uint64_t bucket) prio_io(ca, bucket, READ_SYNC); - if (p->csum != crc64(&p->magic, bucket_bytes(ca) - 8)) + if (p->csum != bch_crc64(&p->magic, bucket_bytes(ca) - 8)) pr_warn("bad csum reading priorities"); if (p->magic != pset_magic(ca)) @@ -898,7 +898,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) sysfs_remove_file(&dc->kobj, &sysfs_attach); */ - if (is_zero(u->uuid, 16)) { + if (bch_is_zero(u->uuid, 16)) { struct closure cl; closure_init_stack(&cl); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 5c7e77073b1f..4d9cca47e4c6 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -105,9 +105,9 @@ SHOW(__bch_cached_dev) #define var(stat) (dc->stat) if (attr == &sysfs_cache_mode) - return snprint_string_list(buf, PAGE_SIZE, - bch_cache_modes + 1, - BDEV_CACHE_MODE(&dc->sb)); + return bch_snprint_string_list(buf, PAGE_SIZE, + bch_cache_modes + 1, + BDEV_CACHE_MODE(&dc->sb)); sysfs_printf(data_csum, "%i", dc->disk.data_csum); var_printf(verify, "%i"); @@ -126,10 +126,10 @@ SHOW(__bch_cached_dev) char dirty[20]; char derivative[20]; char target[20]; - hprint(dirty, + bch_hprint(dirty, atomic_long_read(&dc->disk.sectors_dirty) << 9); - hprint(derivative, dc->writeback_rate_derivative << 9); - hprint(target, dc->writeback_rate_target << 9); + bch_hprint(derivative, dc->writeback_rate_derivative << 9); + bch_hprint(target, dc->writeback_rate_target << 9); return sprintf(buf, "rate:\t\t%u\n" @@ -202,7 +202,7 @@ STORE(__cached_dev) bch_cached_dev_run(dc); if (attr == &sysfs_cache_mode) { - ssize_t v = read_string_list(buf, bch_cache_modes + 1); + ssize_t v = bch_read_string_list(buf, bch_cache_modes + 1); if (v < 0) return v; @@ -224,7 +224,7 @@ STORE(__cached_dev) } if (attr == &sysfs_attach) { - if (parse_uuid(buf, dc->sb.set_uuid) < 16) + if (bch_parse_uuid(buf, dc->sb.set_uuid) < 16) return -EINVAL; list_for_each_entry(c, &bch_cache_sets, list) { @@ -657,9 +657,9 @@ SHOW(__bch_cache) ((size_t) ca->sb.nbuckets)); if (attr == &sysfs_cache_replacement_policy) - return snprint_string_list(buf, PAGE_SIZE, - cache_replacement_policies, - CACHE_REPLACEMENT(&ca->sb)); + return bch_snprint_string_list(buf, PAGE_SIZE, + cache_replacement_policies, + CACHE_REPLACEMENT(&ca->sb)); if (attr == &sysfs_priority_stats) { int cmp(const void *l, const void *r) @@ -747,7 +747,7 @@ STORE(__bch_cache) } if (attr == &sysfs_cache_replacement_policy) { - ssize_t v = read_string_list(buf, cache_replacement_policies); + ssize_t v = bch_read_string_list(buf, cache_replacement_policies); if (v < 0) return v; diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h index 34e4ba1184fe..0526fe92a683 100644 --- a/drivers/md/bcache/sysfs.h +++ b/drivers/md/bcache/sysfs.h @@ -62,7 +62,7 @@ do { \ #define sysfs_hprint(file, val) \ do { \ if (attr == &sysfs_ ## file) { \ - ssize_t ret = hprint(buf, val); \ + ssize_t ret = bch_hprint(buf, val); \ strcat(buf, "\n"); \ return ret + 1; \ } \ diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index dcec2e4f84ad..22324d8b2840 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -19,7 +19,7 @@ #define simple_strtouint(c, end, base) simple_strtoul(c, end, base) #define STRTO_H(name, type) \ -int name ## _h(const char *cp, type *res) \ +int bch_ ## name ## _h(const char *cp, type *res) \ { \ int u = 0; \ char *e; \ @@ -67,14 +67,13 @@ int name ## _h(const char *cp, type *res) \ *res = i; \ return 0; \ } \ -EXPORT_SYMBOL_GPL(name ## _h); STRTO_H(strtoint, int) STRTO_H(strtouint, unsigned int) STRTO_H(strtoll, long long) STRTO_H(strtoull, unsigned long long) -ssize_t hprint(char *buf, int64_t v) +ssize_t bch_hprint(char *buf, int64_t v) { static const char units[] = "?kMGTPEZY"; char dec[3] = ""; @@ -93,9 +92,8 @@ ssize_t hprint(char *buf, int64_t v) return sprintf(buf, "%lli%s%c", v, dec, units[u]); } -EXPORT_SYMBOL_GPL(hprint); -ssize_t snprint_string_list(char *buf, size_t size, const char * const list[], +ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[], size_t selected) { char *out = buf; @@ -108,9 +106,8 @@ ssize_t snprint_string_list(char *buf, size_t size, const char * const list[], out[-1] = '\n'; return out - buf; } -EXPORT_SYMBOL_GPL(snprint_string_list); -ssize_t read_string_list(const char *buf, const char * const list[]) +ssize_t bch_read_string_list(const char *buf, const char * const list[]) { size_t i; char *s, *d = kstrndup(buf, PAGE_SIZE - 1, GFP_KERNEL); @@ -130,9 +127,8 @@ ssize_t read_string_list(const char *buf, const char * const list[]) return i; } -EXPORT_SYMBOL_GPL(read_string_list); -bool is_zero(const char *p, size_t n) +bool bch_is_zero(const char *p, size_t n) { size_t i; @@ -141,9 +137,8 @@ bool is_zero(const char *p, size_t n) return false; return true; } -EXPORT_SYMBOL_GPL(is_zero); -int parse_uuid(const char *s, char *uuid) +int bch_parse_uuid(const char *s, char *uuid) { size_t i, j, x; memset(uuid, 0, 16); @@ -170,9 +165,8 @@ int parse_uuid(const char *s, char *uuid) } return i; } -EXPORT_SYMBOL_GPL(parse_uuid); -void time_stats_update(struct time_stats *stats, uint64_t start_time) +void bch_time_stats_update(struct time_stats *stats, uint64_t start_time) { uint64_t now = local_clock(); uint64_t duration = time_after64(now, start_time) @@ -195,9 +189,8 @@ void time_stats_update(struct time_stats *stats, uint64_t start_time) stats->last = now ?: 1; } -EXPORT_SYMBOL_GPL(time_stats_update); -unsigned next_delay(struct ratelimit *d, uint64_t done) +unsigned bch_next_delay(struct ratelimit *d, uint64_t done) { uint64_t now = local_clock(); @@ -207,9 +200,8 @@ unsigned next_delay(struct ratelimit *d, uint64_t done) ? div_u64(d->next - now, NSEC_PER_SEC / HZ) : 0; } -EXPORT_SYMBOL_GPL(next_delay); -void bio_map(struct bio *bio, void *base) +void bch_bio_map(struct bio *bio, void *base) { size_t size = bio->bi_size; struct bio_vec *bv = bio->bi_io_vec; @@ -235,9 +227,8 @@ start: bv->bv_len = min_t(size_t, PAGE_SIZE - bv->bv_offset, size -= bv->bv_len; } } -EXPORT_SYMBOL_GPL(bio_map); -int bio_alloc_pages(struct bio *bio, gfp_t gfp) +int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp) { int i; struct bio_vec *bv; @@ -253,7 +244,6 @@ int bio_alloc_pages(struct bio *bio, gfp_t gfp) return 0; } -EXPORT_SYMBOL_GPL(bio_alloc_pages); /* * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any @@ -365,7 +355,7 @@ static const uint64_t crc_table[256] = { 0x9AFCE626CE85B507 }; -uint64_t crc64_update(uint64_t crc, const void *_data, size_t len) +uint64_t bch_crc64_update(uint64_t crc, const void *_data, size_t len) { const unsigned char *data = _data; @@ -376,14 +366,12 @@ uint64_t crc64_update(uint64_t crc, const void *_data, size_t len) return crc; } -EXPORT_SYMBOL(crc64_update); -uint64_t crc64(const void *data, size_t len) +uint64_t bch_crc64(const void *data, size_t len) { uint64_t crc = 0xffffffffffffffff; - crc = crc64_update(crc, data, len); + crc = bch_crc64_update(crc, data, len); return crc ^ 0xffffffffffffffff; } -EXPORT_SYMBOL(crc64); diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 56705fdcc149..577393e38c3a 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -307,42 +307,42 @@ do { \ #define ANYSINT_MAX(t) \ ((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1) -int strtoint_h(const char *, int *); -int strtouint_h(const char *, unsigned int *); -int strtoll_h(const char *, long long *); -int strtoull_h(const char *, unsigned long long *); +int bch_strtoint_h(const char *, int *); +int bch_strtouint_h(const char *, unsigned int *); +int bch_strtoll_h(const char *, long long *); +int bch_strtoull_h(const char *, unsigned long long *); -static inline int strtol_h(const char *cp, long *res) +static inline int bch_strtol_h(const char *cp, long *res) { #if BITS_PER_LONG == 32 - return strtoint_h(cp, (int *) res); + return bch_strtoint_h(cp, (int *) res); #else - return strtoll_h(cp, (long long *) res); + return bch_strtoll_h(cp, (long long *) res); #endif } -static inline int strtoul_h(const char *cp, long *res) +static inline int bch_strtoul_h(const char *cp, long *res) { #if BITS_PER_LONG == 32 - return strtouint_h(cp, (unsigned int *) res); + return bch_strtouint_h(cp, (unsigned int *) res); #else - return strtoull_h(cp, (unsigned long long *) res); + return bch_strtoull_h(cp, (unsigned long long *) res); #endif } #define strtoi_h(cp, res) \ (__builtin_types_compatible_p(typeof(*res), int) \ - ? strtoint_h(cp, (void *) res) \ + ? bch_strtoint_h(cp, (void *) res) \ : __builtin_types_compatible_p(typeof(*res), long) \ - ? strtol_h(cp, (void *) res) \ + ? bch_strtol_h(cp, (void *) res) \ : __builtin_types_compatible_p(typeof(*res), long long) \ - ? strtoll_h(cp, (void *) res) \ + ? bch_strtoll_h(cp, (void *) res) \ : __builtin_types_compatible_p(typeof(*res), unsigned int) \ - ? strtouint_h(cp, (void *) res) \ + ? bch_strtouint_h(cp, (void *) res) \ : __builtin_types_compatible_p(typeof(*res), unsigned long) \ - ? strtoul_h(cp, (void *) res) \ + ? bch_strtoul_h(cp, (void *) res) \ : __builtin_types_compatible_p(typeof(*res), unsigned long long)\ - ? strtoull_h(cp, (void *) res) : -EINVAL) + ? bch_strtoull_h(cp, (void *) res) : -EINVAL) #define strtoul_safe(cp, var) \ ({ \ @@ -379,15 +379,15 @@ static inline int strtoul_h(const char *cp, long *res) __builtin_types_compatible_p(typeof(var), const char *) \ ? "%s\n" : "%i\n", var) -ssize_t hprint(char *buf, int64_t v); +ssize_t bch_hprint(char *buf, int64_t v); -bool is_zero(const char *p, size_t n); -int parse_uuid(const char *s, char *uuid); +bool bch_is_zero(const char *p, size_t n); +int bch_parse_uuid(const char *s, char *uuid); -ssize_t snprint_string_list(char *buf, size_t size, const char * const list[], +ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[], size_t selected); -ssize_t read_string_list(const char *buf, const char * const list[]); +ssize_t bch_read_string_list(const char *buf, const char * const list[]); struct time_stats { /* @@ -400,7 +400,7 @@ struct time_stats { uint64_t last; }; -void time_stats_update(struct time_stats *stats, uint64_t time); +void bch_time_stats_update(struct time_stats *stats, uint64_t time); #define NSEC_PER_ns 1L #define NSEC_PER_us NSEC_PER_USEC @@ -462,7 +462,7 @@ static inline void ratelimit_reset(struct ratelimit *d) d->next = local_clock(); } -unsigned next_delay(struct ratelimit *d, uint64_t done); +unsigned bch_next_delay(struct ratelimit *d, uint64_t done); #define __DIV_SAFE(n, d, zero) \ ({ \ @@ -568,9 +568,9 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) #define bio_end(bio) ((bio)->bi_sector + bio_sectors(bio)) -void bio_map(struct bio *bio, void *base); +void bch_bio_map(struct bio *bio, void *base); -int bio_alloc_pages(struct bio *bio, gfp_t gfp); +int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp); static inline sector_t bdev_sectors(struct block_device *bdev) { @@ -583,7 +583,7 @@ do { \ bch_generic_make_request(bio, &(dev)->bio_split_hook); \ } while (0) -uint64_t crc64_update(uint64_t, const void *, size_t); -uint64_t crc64(const void *, size_t); +uint64_t bch_crc64_update(uint64_t, const void *, size_t); +uint64_t bch_crc64(const void *, size_t); #endif /* _BCACHE_UTIL_H */ diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index a80ee5373fd8..93e7e31a4bd3 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -95,7 +95,7 @@ static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) !dc->writeback_percent) return 0; - return next_delay(&dc->writeback_rate, sectors * 10000000ULL); + return bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL); } /* Background writeback */ @@ -118,7 +118,7 @@ static void dirty_init(struct keybuf_key *w) bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS); bio->bi_private = w; bio->bi_io_vec = bio->bi_inline_vecs; - bio_map(bio, NULL); + bch_bio_map(bio, NULL); } static void refill_dirty(struct closure *cl) @@ -349,7 +349,7 @@ static void read_dirty(struct closure *cl) io->bio.bi_rw = READ; io->bio.bi_end_io = read_dirty_endio; - if (bio_alloc_pages(&io->bio, GFP_KERNEL)) + if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL)) goto err_free; pr_debug("%s", pkey(&w->key)); -- GitLab From fddddb52a6c4e2438f4514ed979183653ca0732a Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 21 Mar 2013 17:59:14 +0100 Subject: [PATCH 0263/3163] bus: introduce an Marvell EBU MBus driver The Marvell EBU SoCs have a configurable physical address space layout: the physical ranges of memory used to address PCI(e) interfaces, NOR flashes, SRAM and various other types of memory are configurable by software, through a mechanism of so-called 'address decoding windows'. This new driver mvebu-mbus consolidates the existing code to address the configuration of these memory ranges, which is spread into mach-mvebu, mach-orion5x, mach-mv78xx0, mach-dove and mach-kirkwood. Following patches convert each Marvell EBU SoC family to use this driver, therefore removing the old code that was configuring the address decoding windows. It is worth mentioning that the MVEBU_MBUS Kconfig option is intentionally added as a blind option. The new driver implements and exports the mv_mbus_dram_info() function, which is used by various Marvell drivers throughout the tree to get access to window configuration parameters that they require. This function is also implemented in arch/arm/plat-orion/addr-map.c, which ultimately gets removed at the end of this patch series. So, in order to preserve bisectability, we want to ensure that *either* this new driver, *or* the legacy code in plat-orion/addr-map.c gets compiled in. By making MVEBU_MBUS a blind option, we are sure that only a platform that does 'select MVEBU_MBUS' will get this new driver compiled in. Therefore, throughout the next patches that convert the Marvell sub-architectures one after the other to this new driver, we add the 'select MVEBU_MBUS' and also ensure to remove plat-orion/addr-map.c from the build for this specific sub-architecture. This ensures that bisectability is preserved. Ealier versions of this driver had a DT binding, but since those were not yet agreed upon, they were removed. The driver still uses of_device_id to find the SoC specific details according to the string passed to mvebu_mbus_init(). The plan is to re-introduce a proper DT binding as a followup set of patches. Signed-off-by: Thomas Petazzoni Acked-by: Arnd Bergmann Signed-off-by: Jason Cooper --- drivers/bus/Kconfig | 7 + drivers/bus/Makefile | 1 + drivers/bus/mvebu-mbus.c | 867 +++++++++++++++++++++++++++++++++++++++ include/linux/mbus.h | 24 +- 4 files changed, 898 insertions(+), 1 deletion(-) create mode 100644 drivers/bus/mvebu-mbus.c diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index 0f51ed687dc8..b05ecab915c4 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -4,6 +4,13 @@ menu "Bus devices" +config MVEBU_MBUS + bool + depends on PLAT_ORION + help + Driver needed for the MBus configuration on Marvell EBU SoCs + (Kirkwood, Dove, Orion5x, MV78XX0 and Armada 370/XP). + config OMAP_OCP2SCP tristate "OMAP OCP2SCP DRIVER" depends on ARCH_OMAP2PLUS diff --git a/drivers/bus/Makefile b/drivers/bus/Makefile index 45d997c85453..3c7b53c12091 100644 --- a/drivers/bus/Makefile +++ b/drivers/bus/Makefile @@ -2,6 +2,7 @@ # Makefile for the bus drivers. # +obj-$(CONFIG_MVEBU_MBUS) += mvebu-mbus.o obj-$(CONFIG_OMAP_OCP2SCP) += omap-ocp2scp.o # Interconnect bus driver for OMAP SoCs. diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c new file mode 100644 index 000000000000..586d03e29e9e --- /dev/null +++ b/drivers/bus/mvebu-mbus.c @@ -0,0 +1,867 @@ +/* + * Address map functions for Marvell EBU SoCs (Kirkwood, Armada + * 370/XP, Dove, Orion5x and MV78xx0) + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + * + * The Marvell EBU SoCs have a configurable physical address space: + * the physical address at which certain devices (PCIe, NOR, NAND, + * etc.) sit can be configured. The configuration takes place through + * two sets of registers: + * + * - One to configure the access of the CPU to the devices. Depending + * on the families, there are between 8 and 20 configurable windows, + * each can be use to create a physical memory window that maps to a + * specific device. Devices are identified by a tuple (target, + * attribute). + * + * - One to configure the access to the CPU to the SDRAM. There are + * either 2 (for Dove) or 4 (for other families) windows to map the + * SDRAM into the physical address space. + * + * This driver: + * + * - Reads out the SDRAM address decoding windows at initialization + * time, and fills the mvebu_mbus_dram_info structure with these + * informations. The exported function mv_mbus_dram_info() allow + * device drivers to get those informations related to the SDRAM + * address decoding windows. This is because devices also have their + * own windows (configured through registers that are part of each + * device register space), and therefore the drivers for Marvell + * devices have to configure those device -> SDRAM windows to ensure + * that DMA works properly. + * + * - Provides an API for platform code or device drivers to + * dynamically add or remove address decoding windows for the CPU -> + * device accesses. This API is mvebu_mbus_add_window(), + * mvebu_mbus_add_window_remap_flags() and + * mvebu_mbus_del_window(). Since the (target, attribute) values + * differ from one SoC family to another, the API uses a 'const char + * *' string to identify devices, and this driver is responsible for + * knowing the mapping between the name of a device and its + * corresponding (target, attribute) in the current SoC family. + * + * - Provides a debugfs interface in /sys/kernel/debug/mvebu-mbus/ to + * see the list of CPU -> SDRAM windows and their configuration + * (file 'sdram') and the list of CPU -> devices windows and their + * configuration (file 'devices'). + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * DDR target is the same on all platforms. + */ +#define TARGET_DDR 0 + +/* + * CPU Address Decode Windows registers + */ +#define WIN_CTRL_OFF 0x0000 +#define WIN_CTRL_ENABLE BIT(0) +#define WIN_CTRL_TGT_MASK 0xf0 +#define WIN_CTRL_TGT_SHIFT 4 +#define WIN_CTRL_ATTR_MASK 0xff00 +#define WIN_CTRL_ATTR_SHIFT 8 +#define WIN_CTRL_SIZE_MASK 0xffff0000 +#define WIN_CTRL_SIZE_SHIFT 16 +#define WIN_BASE_OFF 0x0004 +#define WIN_BASE_LOW 0xffff0000 +#define WIN_BASE_HIGH 0xf +#define WIN_REMAP_LO_OFF 0x0008 +#define WIN_REMAP_LOW 0xffff0000 +#define WIN_REMAP_HI_OFF 0x000c + +#define ATTR_HW_COHERENCY (0x1 << 4) + +#define DDR_BASE_CS_OFF(n) (0x0000 + ((n) << 3)) +#define DDR_BASE_CS_HIGH_MASK 0xf +#define DDR_BASE_CS_LOW_MASK 0xff000000 +#define DDR_SIZE_CS_OFF(n) (0x0004 + ((n) << 3)) +#define DDR_SIZE_ENABLED BIT(0) +#define DDR_SIZE_CS_MASK 0x1c +#define DDR_SIZE_CS_SHIFT 2 +#define DDR_SIZE_MASK 0xff000000 + +#define DOVE_DDR_BASE_CS_OFF(n) ((n) << 4) + +struct mvebu_mbus_mapping { + const char *name; + u8 target; + u8 attr; + u8 attrmask; +}; + +/* + * Masks used for the 'attrmask' field of mvebu_mbus_mapping. They + * allow to get the real attribute value, discarding the special bits + * used to select a PCI MEM region or a PCI WA region. This allows the + * debugfs code to reverse-match the name of a device from its + * target/attr values. + * + * For all devices except PCI, all bits of 'attr' must be + * considered. For most SoCs, only bit 3 should be ignored (it allows + * to select between PCI MEM and PCI I/O). On Orion5x however, there + * is the special bit 5 to select a PCI WA region. + */ +#define MAPDEF_NOMASK 0xff +#define MAPDEF_PCIMASK 0xf7 +#define MAPDEF_ORIONPCIMASK 0xd7 + +/* Macro used to define one mvebu_mbus_mapping entry */ +#define MAPDEF(__n, __t, __a, __m) \ + { .name = __n, .target = __t, .attr = __a, .attrmask = __m } + +struct mvebu_mbus_state; + +struct mvebu_mbus_soc_data { + unsigned int num_wins; + unsigned int num_remappable_wins; + unsigned int (*win_cfg_offset)(const int win); + void (*setup_cpu_target)(struct mvebu_mbus_state *s); + int (*show_cpu_target)(struct mvebu_mbus_state *s, + struct seq_file *seq, void *v); + const struct mvebu_mbus_mapping *map; +}; + +struct mvebu_mbus_state { + void __iomem *mbuswins_base; + void __iomem *sdramwins_base; + struct dentry *debugfs_root; + struct dentry *debugfs_sdram; + struct dentry *debugfs_devs; + const struct mvebu_mbus_soc_data *soc; + int hw_io_coherency; +}; + +static struct mvebu_mbus_state mbus_state; + +static struct mbus_dram_target_info mvebu_mbus_dram_info; +const struct mbus_dram_target_info *mv_mbus_dram_info(void) +{ + return &mvebu_mbus_dram_info; +} +EXPORT_SYMBOL_GPL(mv_mbus_dram_info); + +/* + * Functions to manipulate the address decoding windows + */ + +static void mvebu_mbus_read_window(struct mvebu_mbus_state *mbus, + int win, int *enabled, u64 *base, + u32 *size, u8 *target, u8 *attr, + u64 *remap) +{ + void __iomem *addr = mbus->mbuswins_base + + mbus->soc->win_cfg_offset(win); + u32 basereg = readl(addr + WIN_BASE_OFF); + u32 ctrlreg = readl(addr + WIN_CTRL_OFF); + + if (!(ctrlreg & WIN_CTRL_ENABLE)) { + *enabled = 0; + return; + } + + *enabled = 1; + *base = ((u64)basereg & WIN_BASE_HIGH) << 32; + *base |= (basereg & WIN_BASE_LOW); + *size = (ctrlreg | ~WIN_CTRL_SIZE_MASK) + 1; + + if (target) + *target = (ctrlreg & WIN_CTRL_TGT_MASK) >> WIN_CTRL_TGT_SHIFT; + + if (attr) + *attr = (ctrlreg & WIN_CTRL_ATTR_MASK) >> WIN_CTRL_ATTR_SHIFT; + + if (remap) { + if (win < mbus->soc->num_remappable_wins) { + u32 remap_low = readl(addr + WIN_REMAP_LO_OFF); + u32 remap_hi = readl(addr + WIN_REMAP_HI_OFF); + *remap = ((u64)remap_hi << 32) | remap_low; + } else + *remap = 0; + } +} + +static void mvebu_mbus_disable_window(struct mvebu_mbus_state *mbus, + int win) +{ + void __iomem *addr; + + addr = mbus->mbuswins_base + mbus->soc->win_cfg_offset(win); + + writel(0, addr + WIN_BASE_OFF); + writel(0, addr + WIN_CTRL_OFF); + if (win < mbus->soc->num_remappable_wins) { + writel(0, addr + WIN_REMAP_LO_OFF); + writel(0, addr + WIN_REMAP_HI_OFF); + } +} + +/* Checks whether the given window number is available */ +static int mvebu_mbus_window_is_free(struct mvebu_mbus_state *mbus, + const int win) +{ + void __iomem *addr = mbus->mbuswins_base + + mbus->soc->win_cfg_offset(win); + u32 ctrl = readl(addr + WIN_CTRL_OFF); + return !(ctrl & WIN_CTRL_ENABLE); +} + +/* + * Checks whether the given (base, base+size) area doesn't overlap an + * existing region + */ +static int mvebu_mbus_window_conflicts(struct mvebu_mbus_state *mbus, + phys_addr_t base, size_t size, + u8 target, u8 attr) +{ + u64 end = (u64)base + size; + int win; + + for (win = 0; win < mbus->soc->num_wins; win++) { + u64 wbase, wend; + u32 wsize; + u8 wtarget, wattr; + int enabled; + + mvebu_mbus_read_window(mbus, win, + &enabled, &wbase, &wsize, + &wtarget, &wattr, NULL); + + if (!enabled) + continue; + + wend = wbase + wsize; + + /* + * Check if the current window overlaps with the + * proposed physical range + */ + if ((u64)base < wend && end > wbase) + return 0; + + /* + * Check if target/attribute conflicts + */ + if (target == wtarget && attr == wattr) + return 0; + } + + return 1; +} + +static int mvebu_mbus_find_window(struct mvebu_mbus_state *mbus, + phys_addr_t base, size_t size) +{ + int win; + + for (win = 0; win < mbus->soc->num_wins; win++) { + u64 wbase; + u32 wsize; + int enabled; + + mvebu_mbus_read_window(mbus, win, + &enabled, &wbase, &wsize, + NULL, NULL, NULL); + + if (!enabled) + continue; + + if (base == wbase && size == wsize) + return win; + } + + return -ENODEV; +} + +static int mvebu_mbus_setup_window(struct mvebu_mbus_state *mbus, + int win, phys_addr_t base, size_t size, + phys_addr_t remap, u8 target, + u8 attr) +{ + void __iomem *addr = mbus->mbuswins_base + + mbus->soc->win_cfg_offset(win); + u32 ctrl, remap_addr; + + ctrl = ((size - 1) & WIN_CTRL_SIZE_MASK) | + (attr << WIN_CTRL_ATTR_SHIFT) | + (target << WIN_CTRL_TGT_SHIFT) | + WIN_CTRL_ENABLE; + + writel(base & WIN_BASE_LOW, addr + WIN_BASE_OFF); + writel(ctrl, addr + WIN_CTRL_OFF); + if (win < mbus->soc->num_remappable_wins) { + if (remap == MVEBU_MBUS_NO_REMAP) + remap_addr = base; + else + remap_addr = remap; + writel(remap_addr & WIN_REMAP_LOW, addr + WIN_REMAP_LO_OFF); + writel(0, addr + WIN_REMAP_HI_OFF); + } + + return 0; +} + +static int mvebu_mbus_alloc_window(struct mvebu_mbus_state *mbus, + phys_addr_t base, size_t size, + phys_addr_t remap, u8 target, + u8 attr) +{ + int win; + + if (remap == MVEBU_MBUS_NO_REMAP) { + for (win = mbus->soc->num_remappable_wins; + win < mbus->soc->num_wins; win++) + if (mvebu_mbus_window_is_free(mbus, win)) + return mvebu_mbus_setup_window(mbus, win, base, + size, remap, + target, attr); + } + + + for (win = 0; win < mbus->soc->num_wins; win++) + if (mvebu_mbus_window_is_free(mbus, win)) + return mvebu_mbus_setup_window(mbus, win, base, size, + remap, target, attr); + + return -ENOMEM; +} + +/* + * Debugfs debugging + */ + +/* Common function used for Dove, Kirkwood, Armada 370/XP and Orion 5x */ +static int mvebu_sdram_debug_show_orion(struct mvebu_mbus_state *mbus, + struct seq_file *seq, void *v) +{ + int i; + + for (i = 0; i < 4; i++) { + u32 basereg = readl(mbus->sdramwins_base + DDR_BASE_CS_OFF(i)); + u32 sizereg = readl(mbus->sdramwins_base + DDR_SIZE_CS_OFF(i)); + u64 base; + u32 size; + + if (!(sizereg & DDR_SIZE_ENABLED)) { + seq_printf(seq, "[%d] disabled\n", i); + continue; + } + + base = ((u64)basereg & DDR_BASE_CS_HIGH_MASK) << 32; + base |= basereg & DDR_BASE_CS_LOW_MASK; + size = (sizereg | ~DDR_SIZE_MASK); + + seq_printf(seq, "[%d] %016llx - %016llx : cs%d\n", + i, (unsigned long long)base, + (unsigned long long)base + size + 1, + (sizereg & DDR_SIZE_CS_MASK) >> DDR_SIZE_CS_SHIFT); + } + + return 0; +} + +/* Special function for Dove */ +static int mvebu_sdram_debug_show_dove(struct mvebu_mbus_state *mbus, + struct seq_file *seq, void *v) +{ + int i; + + for (i = 0; i < 2; i++) { + u32 map = readl(mbus->sdramwins_base + DOVE_DDR_BASE_CS_OFF(i)); + u64 base; + u32 size; + + if (!(map & 1)) { + seq_printf(seq, "[%d] disabled\n", i); + continue; + } + + base = map & 0xff800000; + size = 0x100000 << (((map & 0x000f0000) >> 16) - 4); + + seq_printf(seq, "[%d] %016llx - %016llx : cs%d\n", + i, (unsigned long long)base, + (unsigned long long)base + size, i); + } + + return 0; +} + +static int mvebu_sdram_debug_show(struct seq_file *seq, void *v) +{ + struct mvebu_mbus_state *mbus = &mbus_state; + return mbus->soc->show_cpu_target(mbus, seq, v); +} + +static int mvebu_sdram_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, mvebu_sdram_debug_show, inode->i_private); +} + +static const struct file_operations mvebu_sdram_debug_fops = { + .open = mvebu_sdram_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int mvebu_devs_debug_show(struct seq_file *seq, void *v) +{ + struct mvebu_mbus_state *mbus = &mbus_state; + int win; + + for (win = 0; win < mbus->soc->num_wins; win++) { + u64 wbase, wremap; + u32 wsize; + u8 wtarget, wattr; + int enabled, i; + const char *name; + + mvebu_mbus_read_window(mbus, win, + &enabled, &wbase, &wsize, + &wtarget, &wattr, &wremap); + + if (!enabled) { + seq_printf(seq, "[%02d] disabled\n", win); + continue; + } + + + for (i = 0; mbus->soc->map[i].name; i++) + if (mbus->soc->map[i].target == wtarget && + mbus->soc->map[i].attr == + (wattr & mbus->soc->map[i].attrmask)) + break; + + name = mbus->soc->map[i].name ?: "unknown"; + + seq_printf(seq, "[%02d] %016llx - %016llx : %s", + win, (unsigned long long)wbase, + (unsigned long long)(wbase + wsize), name); + + if (win < mbus->soc->num_remappable_wins) { + seq_printf(seq, " (remap %016llx)\n", + (unsigned long long)wremap); + } else + seq_printf(seq, "\n"); + } + + return 0; +} + +static int mvebu_devs_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, mvebu_devs_debug_show, inode->i_private); +} + +static const struct file_operations mvebu_devs_debug_fops = { + .open = mvebu_devs_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * SoC-specific functions and definitions + */ + +static unsigned int orion_mbus_win_offset(int win) +{ + return win << 4; +} + +static unsigned int armada_370_xp_mbus_win_offset(int win) +{ + /* The register layout is a bit annoying and the below code + * tries to cope with it. + * - At offset 0x0, there are the registers for the first 8 + * windows, with 4 registers of 32 bits per window (ctrl, + * base, remap low, remap high) + * - Then at offset 0x80, there is a hole of 0x10 bytes for + * the internal registers base address and internal units + * sync barrier register. + * - Then at offset 0x90, there the registers for 12 + * windows, with only 2 registers of 32 bits per window + * (ctrl, base). + */ + if (win < 8) + return win << 4; + else + return 0x90 + ((win - 8) << 3); +} + +static unsigned int mv78xx0_mbus_win_offset(int win) +{ + if (win < 8) + return win << 4; + else + return 0x900 + ((win - 8) << 4); +} + +static void __init +mvebu_mbus_default_setup_cpu_target(struct mvebu_mbus_state *mbus) +{ + int i; + int cs; + + mvebu_mbus_dram_info.mbus_dram_target_id = TARGET_DDR; + + for (i = 0, cs = 0; i < 4; i++) { + u32 base = readl(mbus->sdramwins_base + DDR_BASE_CS_OFF(i)); + u32 size = readl(mbus->sdramwins_base + DDR_SIZE_CS_OFF(i)); + + /* + * We only take care of entries for which the chip + * select is enabled, and that don't have high base + * address bits set (devices can only access the first + * 32 bits of the memory). + */ + if ((size & DDR_SIZE_ENABLED) && + !(base & DDR_BASE_CS_HIGH_MASK)) { + struct mbus_dram_window *w; + + w = &mvebu_mbus_dram_info.cs[cs++]; + w->cs_index = i; + w->mbus_attr = 0xf & ~(1 << i); + if (mbus->hw_io_coherency) + w->mbus_attr |= ATTR_HW_COHERENCY; + w->base = base & DDR_BASE_CS_LOW_MASK; + w->size = (size | ~DDR_SIZE_MASK) + 1; + } + } + mvebu_mbus_dram_info.num_cs = cs; +} + +static void __init +mvebu_mbus_dove_setup_cpu_target(struct mvebu_mbus_state *mbus) +{ + int i; + int cs; + + mvebu_mbus_dram_info.mbus_dram_target_id = TARGET_DDR; + + for (i = 0, cs = 0; i < 2; i++) { + u32 map = readl(mbus->sdramwins_base + DOVE_DDR_BASE_CS_OFF(i)); + + /* + * Chip select enabled? + */ + if (map & 1) { + struct mbus_dram_window *w; + + w = &mvebu_mbus_dram_info.cs[cs++]; + w->cs_index = i; + w->mbus_attr = 0; /* CS address decoding done inside */ + /* the DDR controller, no need to */ + /* provide attributes */ + w->base = map & 0xff800000; + w->size = 0x100000 << (((map & 0x000f0000) >> 16) - 4); + } + } + + mvebu_mbus_dram_info.num_cs = cs; +} + +static const struct mvebu_mbus_mapping armada_370_map[] = { + MAPDEF("bootrom", 1, 0xe0, MAPDEF_NOMASK), + MAPDEF("devbus-boot", 1, 0x2f, MAPDEF_NOMASK), + MAPDEF("devbus-cs0", 1, 0x3e, MAPDEF_NOMASK), + MAPDEF("devbus-cs1", 1, 0x3d, MAPDEF_NOMASK), + MAPDEF("devbus-cs2", 1, 0x3b, MAPDEF_NOMASK), + MAPDEF("devbus-cs3", 1, 0x37, MAPDEF_NOMASK), + MAPDEF("pcie0.0", 4, 0xe0, MAPDEF_PCIMASK), + MAPDEF("pcie1.0", 8, 0xe0, MAPDEF_PCIMASK), + {}, +}; + +static const struct mvebu_mbus_soc_data armada_370_mbus_data = { + .num_wins = 20, + .num_remappable_wins = 8, + .win_cfg_offset = armada_370_xp_mbus_win_offset, + .setup_cpu_target = mvebu_mbus_default_setup_cpu_target, + .show_cpu_target = mvebu_sdram_debug_show_orion, + .map = armada_370_map, +}; + +static const struct mvebu_mbus_mapping armada_xp_map[] = { + MAPDEF("bootrom", 1, 0x1d, MAPDEF_NOMASK), + MAPDEF("devbus-boot", 1, 0x2f, MAPDEF_NOMASK), + MAPDEF("devbus-cs0", 1, 0x3e, MAPDEF_NOMASK), + MAPDEF("devbus-cs1", 1, 0x3d, MAPDEF_NOMASK), + MAPDEF("devbus-cs2", 1, 0x3b, MAPDEF_NOMASK), + MAPDEF("devbus-cs3", 1, 0x37, MAPDEF_NOMASK), + MAPDEF("pcie0.0", 4, 0xe0, MAPDEF_PCIMASK), + MAPDEF("pcie0.1", 4, 0xd0, MAPDEF_PCIMASK), + MAPDEF("pcie0.2", 4, 0xb0, MAPDEF_PCIMASK), + MAPDEF("pcie0.3", 4, 0x70, MAPDEF_PCIMASK), + MAPDEF("pcie1.0", 8, 0xe0, MAPDEF_PCIMASK), + MAPDEF("pcie1.1", 8, 0xd0, MAPDEF_PCIMASK), + MAPDEF("pcie1.2", 8, 0xb0, MAPDEF_PCIMASK), + MAPDEF("pcie1.3", 8, 0x70, MAPDEF_PCIMASK), + MAPDEF("pcie2.0", 4, 0xf0, MAPDEF_PCIMASK), + MAPDEF("pcie3.0", 8, 0xf0, MAPDEF_PCIMASK), + {}, +}; + +static const struct mvebu_mbus_soc_data armada_xp_mbus_data = { + .num_wins = 20, + .num_remappable_wins = 8, + .win_cfg_offset = armada_370_xp_mbus_win_offset, + .setup_cpu_target = mvebu_mbus_default_setup_cpu_target, + .show_cpu_target = mvebu_sdram_debug_show_orion, + .map = armada_xp_map, +}; + +static const struct mvebu_mbus_mapping kirkwood_map[] = { + MAPDEF("pcie0.0", 4, 0xe0, MAPDEF_PCIMASK), + MAPDEF("pcie1.0", 8, 0xe0, MAPDEF_PCIMASK), + MAPDEF("sram", 3, 0x01, MAPDEF_NOMASK), + MAPDEF("nand", 1, 0x2f, MAPDEF_NOMASK), + {}, +}; + +static const struct mvebu_mbus_soc_data kirkwood_mbus_data = { + .num_wins = 8, + .num_remappable_wins = 4, + .win_cfg_offset = orion_mbus_win_offset, + .setup_cpu_target = mvebu_mbus_default_setup_cpu_target, + .show_cpu_target = mvebu_sdram_debug_show_orion, + .map = kirkwood_map, +}; + +static const struct mvebu_mbus_mapping dove_map[] = { + MAPDEF("pcie0.0", 0x4, 0xe0, MAPDEF_PCIMASK), + MAPDEF("pcie1.0", 0x8, 0xe0, MAPDEF_PCIMASK), + MAPDEF("cesa", 0x3, 0x01, MAPDEF_NOMASK), + MAPDEF("bootrom", 0x1, 0xfd, MAPDEF_NOMASK), + MAPDEF("scratchpad", 0xd, 0x0, MAPDEF_NOMASK), + {}, +}; + +static const struct mvebu_mbus_soc_data dove_mbus_data = { + .num_wins = 8, + .num_remappable_wins = 4, + .win_cfg_offset = orion_mbus_win_offset, + .setup_cpu_target = mvebu_mbus_dove_setup_cpu_target, + .show_cpu_target = mvebu_sdram_debug_show_dove, + .map = dove_map, +}; + +static const struct mvebu_mbus_mapping orion5x_map[] = { + MAPDEF("pcie0.0", 4, 0x51, MAPDEF_ORIONPCIMASK), + MAPDEF("pci0.0", 3, 0x51, MAPDEF_ORIONPCIMASK), + MAPDEF("devbus-boot", 1, 0x0f, MAPDEF_NOMASK), + MAPDEF("devbus-cs0", 1, 0x1e, MAPDEF_NOMASK), + MAPDEF("devbus-cs1", 1, 0x1d, MAPDEF_NOMASK), + MAPDEF("devbus-cs2", 1, 0x1b, MAPDEF_NOMASK), + MAPDEF("sram", 0, 0x00, MAPDEF_NOMASK), + {}, +}; + +/* + * Some variants of Orion5x have 4 remappable windows, some other have + * only two of them. + */ +static const struct mvebu_mbus_soc_data orion5x_4win_mbus_data = { + .num_wins = 8, + .num_remappable_wins = 4, + .win_cfg_offset = orion_mbus_win_offset, + .setup_cpu_target = mvebu_mbus_default_setup_cpu_target, + .show_cpu_target = mvebu_sdram_debug_show_orion, + .map = orion5x_map, +}; + +static const struct mvebu_mbus_soc_data orion5x_2win_mbus_data = { + .num_wins = 8, + .num_remappable_wins = 2, + .win_cfg_offset = orion_mbus_win_offset, + .setup_cpu_target = mvebu_mbus_default_setup_cpu_target, + .show_cpu_target = mvebu_sdram_debug_show_orion, + .map = orion5x_map, +}; + +static const struct mvebu_mbus_mapping mv78xx0_map[] = { + MAPDEF("pcie0.0", 4, 0xe0, MAPDEF_PCIMASK), + MAPDEF("pcie0.1", 4, 0xd0, MAPDEF_PCIMASK), + MAPDEF("pcie0.2", 4, 0xb0, MAPDEF_PCIMASK), + MAPDEF("pcie0.3", 4, 0x70, MAPDEF_PCIMASK), + MAPDEF("pcie1.0", 8, 0xe0, MAPDEF_PCIMASK), + MAPDEF("pcie1.1", 8, 0xd0, MAPDEF_PCIMASK), + MAPDEF("pcie1.2", 8, 0xb0, MAPDEF_PCIMASK), + MAPDEF("pcie1.3", 8, 0x70, MAPDEF_PCIMASK), + MAPDEF("pcie2.0", 4, 0xf0, MAPDEF_PCIMASK), + MAPDEF("pcie3.0", 8, 0xf0, MAPDEF_PCIMASK), + {}, +}; + +static const struct mvebu_mbus_soc_data mv78xx0_mbus_data = { + .num_wins = 14, + .num_remappable_wins = 8, + .win_cfg_offset = mv78xx0_mbus_win_offset, + .setup_cpu_target = mvebu_mbus_default_setup_cpu_target, + .show_cpu_target = mvebu_sdram_debug_show_orion, + .map = mv78xx0_map, +}; + +/* + * The driver doesn't yet have a DT binding because the details of + * this DT binding still need to be sorted out. However, as a + * preparation, we already use of_device_id to match a SoC description + * string against the SoC specific details of this driver. + */ +static const struct of_device_id of_mvebu_mbus_ids[] = { + { .compatible = "marvell,armada370-mbus", + .data = &armada_370_mbus_data, }, + { .compatible = "marvell,armadaxp-mbus", + .data = &armada_xp_mbus_data, }, + { .compatible = "marvell,kirkwood-mbus", + .data = &kirkwood_mbus_data, }, + { .compatible = "marvell,dove-mbus", + .data = &dove_mbus_data, }, + { .compatible = "marvell,orion5x-88f5281-mbus", + .data = &orion5x_4win_mbus_data, }, + { .compatible = "marvell,orion5x-88f5182-mbus", + .data = &orion5x_2win_mbus_data, }, + { .compatible = "marvell,orion5x-88f5181-mbus", + .data = &orion5x_2win_mbus_data, }, + { .compatible = "marvell,orion5x-88f6183-mbus", + .data = &orion5x_4win_mbus_data, }, + { .compatible = "marvell,mv78xx0-mbus", + .data = &mv78xx0_mbus_data, }, + { }, +}; + +/* + * Public API of the driver + */ +int mvebu_mbus_add_window_remap_flags(const char *devname, phys_addr_t base, + size_t size, phys_addr_t remap, + unsigned int flags) +{ + struct mvebu_mbus_state *s = &mbus_state; + u8 target, attr; + int i; + + if (!s->soc->map) + return -ENODEV; + + for (i = 0; s->soc->map[i].name; i++) + if (!strcmp(s->soc->map[i].name, devname)) + break; + + if (!s->soc->map[i].name) { + pr_err("mvebu-mbus: unknown device '%s'\n", devname); + return -ENODEV; + } + + target = s->soc->map[i].target; + attr = s->soc->map[i].attr; + + if (flags == MVEBU_MBUS_PCI_MEM) + attr |= 0x8; + else if (flags == MVEBU_MBUS_PCI_WA) + attr |= 0x28; + + if (!mvebu_mbus_window_conflicts(s, base, size, target, attr)) { + pr_err("mvebu-mbus: cannot add window '%s', conflicts with another window\n", + devname); + return -EINVAL; + } + + return mvebu_mbus_alloc_window(s, base, size, remap, target, attr); + +} + +int mvebu_mbus_add_window(const char *devname, phys_addr_t base, size_t size) +{ + return mvebu_mbus_add_window_remap_flags(devname, base, size, + MVEBU_MBUS_NO_REMAP, 0); +} + +int mvebu_mbus_del_window(phys_addr_t base, size_t size) +{ + int win; + + win = mvebu_mbus_find_window(&mbus_state, base, size); + if (win < 0) + return win; + + mvebu_mbus_disable_window(&mbus_state, win); + return 0; +} + +static __init int mvebu_mbus_debugfs_init(void) +{ + struct mvebu_mbus_state *s = &mbus_state; + + /* + * If no base has been initialized, doesn't make sense to + * register the debugfs entries. We may be on a multiplatform + * kernel that isn't running a Marvell EBU SoC. + */ + if (!s->mbuswins_base) + return 0; + + s->debugfs_root = debugfs_create_dir("mvebu-mbus", NULL); + if (s->debugfs_root) { + s->debugfs_sdram = debugfs_create_file("sdram", S_IRUGO, + s->debugfs_root, NULL, + &mvebu_sdram_debug_fops); + s->debugfs_devs = debugfs_create_file("devices", S_IRUGO, + s->debugfs_root, NULL, + &mvebu_devs_debug_fops); + } + + return 0; +} +fs_initcall(mvebu_mbus_debugfs_init); + +int __init mvebu_mbus_init(const char *soc, phys_addr_t mbuswins_phys_base, + size_t mbuswins_size, + phys_addr_t sdramwins_phys_base, + size_t sdramwins_size) +{ + struct mvebu_mbus_state *mbus = &mbus_state; + const struct of_device_id *of_id; + int win; + + for (of_id = of_mvebu_mbus_ids; of_id->compatible; of_id++) + if (!strcmp(of_id->compatible, soc)) + break; + + if (!of_id->compatible) { + pr_err("mvebu-mbus: could not find a matching SoC family\n"); + return -ENODEV; + } + + mbus->soc = of_id->data; + + mbus->mbuswins_base = ioremap(mbuswins_phys_base, mbuswins_size); + if (!mbus->mbuswins_base) + return -ENOMEM; + + mbus->sdramwins_base = ioremap(sdramwins_phys_base, sdramwins_size); + if (!mbus->sdramwins_base) { + iounmap(mbus_state.mbuswins_base); + return -ENOMEM; + } + + for (win = 0; win < mbus->soc->num_wins; win++) + mvebu_mbus_disable_window(mbus, win); + + mbus->soc->setup_cpu_target(mbus); + + return 0; +} diff --git a/include/linux/mbus.h b/include/linux/mbus.h index efa1a6d7aca8..462eb9791012 100644 --- a/include/linux/mbus.h +++ b/include/linux/mbus.h @@ -32,6 +32,17 @@ struct mbus_dram_target_info } cs[4]; }; +/* Flags for PCI/PCIe address decoding regions */ +#define MVEBU_MBUS_PCI_IO 0x1 +#define MVEBU_MBUS_PCI_MEM 0x2 +#define MVEBU_MBUS_PCI_WA 0x3 + +/* + * Magic value that explicits that we don't need a remapping-capable + * address decoding window. + */ +#define MVEBU_MBUS_NO_REMAP (0xffffffff) + /* * The Marvell mbus is to be found only on SOCs from the Orion family * at the moment. Provide a dummy stub for other architectures. @@ -44,4 +55,15 @@ static inline const struct mbus_dram_target_info *mv_mbus_dram_info(void) return NULL; } #endif -#endif + +int mvebu_mbus_add_window_remap_flags(const char *devname, phys_addr_t base, + size_t size, phys_addr_t remap, + unsigned int flags); +int mvebu_mbus_add_window(const char *devname, phys_addr_t base, + size_t size); +int mvebu_mbus_del_window(phys_addr_t base, size_t size); +int mvebu_mbus_init(const char *soc, phys_addr_t mbus_phys_base, + size_t mbus_size, phys_addr_t sdram_phys_base, + size_t sdram_size); + +#endif /* __LINUX_MBUS_H */ -- GitLab From 8d007488731981e921346a46997dfe9f08cb8201 Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Sun, 24 Mar 2013 15:45:30 +0100 Subject: [PATCH 0264/3163] ARM: Orion: add dbg_show function to gpio-orion driver This patch adds a dedicated dbg_show function to the gpio-mvebu driver. In addition to the generic gpiolib informations, this function displays informations related with the specific Marvell registers (blink enable, data in polarity, interrupt masks and cause). Signed-off-by: Simon Guinot Signed-off-by: Jason Cooper --- arch/arm/plat-orion/gpio.c | 59 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c index c29ee7ea200b..e39c2ba6e2fb 100644 --- a/arch/arm/plat-orion/gpio.c +++ b/arch/arm/plat-orion/gpio.c @@ -439,6 +439,64 @@ static void gpio_irq_handler(unsigned irq, struct irq_desc *desc) } } +#ifdef CONFIG_DEBUG_FS +#include + +static void orion_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) +{ + struct orion_gpio_chip *ochip = + container_of(chip, struct orion_gpio_chip, chip); + u32 out, io_conf, blink, in_pol, data_in, cause, edg_msk, lvl_msk; + int i; + + out = readl_relaxed(GPIO_OUT(ochip)); + io_conf = readl_relaxed(GPIO_IO_CONF(ochip)); + blink = readl_relaxed(GPIO_BLINK_EN(ochip)); + in_pol = readl_relaxed(GPIO_IN_POL(ochip)); + data_in = readl_relaxed(GPIO_DATA_IN(ochip)); + cause = readl_relaxed(GPIO_EDGE_CAUSE(ochip)); + edg_msk = readl_relaxed(GPIO_EDGE_MASK(ochip)); + lvl_msk = readl_relaxed(GPIO_LEVEL_MASK(ochip)); + + for (i = 0; i < chip->ngpio; i++) { + const char *label; + u32 msk; + bool is_out; + + label = gpiochip_is_requested(chip, i); + if (!label) + continue; + + msk = 1 << i; + is_out = !(io_conf & msk); + + seq_printf(s, " gpio-%-3d (%-20.20s)", chip->base + i, label); + + if (is_out) { + seq_printf(s, " out %s %s\n", + out & msk ? "hi" : "lo", + blink & msk ? "(blink )" : ""); + continue; + } + + seq_printf(s, " in %s (act %s) - IRQ", + (data_in ^ in_pol) & msk ? "hi" : "lo", + in_pol & msk ? "lo" : "hi"); + if (!((edg_msk | lvl_msk) & msk)) { + seq_printf(s, " disabled\n"); + continue; + } + if (edg_msk & msk) + seq_printf(s, " edge "); + if (lvl_msk & msk) + seq_printf(s, " level"); + seq_printf(s, " (%s)\n", cause & msk ? "pending" : "clear "); + } +} +#else +#define orion_gpio_dbg_show NULL +#endif + void __init orion_gpio_init(struct device_node *np, int gpio_base, int ngpio, void __iomem *base, int mask_offset, @@ -471,6 +529,7 @@ void __init orion_gpio_init(struct device_node *np, #ifdef CONFIG_OF ochip->chip.of_node = np; #endif + ochip->chip.dbg_show = orion_gpio_dbg_show; spin_lock_init(&ochip->lock); ochip->base = (void __iomem *)base; -- GitLab From fe0cd96c009b14cc85d41a97f84a6453e4bbfd7f Mon Sep 17 00:00:00 2001 From: Neil Greatorex Date: Sat, 30 Mar 2013 20:41:20 +0000 Subject: [PATCH 0265/3163] bus: mvebu-mbus: Restore checking for coherency fabric hardware The new mvebu-mbus driver was not checking the device tree for coherency fabric hardware and hence was not setting the hw_io_coherency flag in mbus_state. This prevented the mvsdio driver from operating correctly. This patch restores the check. Signed-off-by: Neil Greatorex Signed-off-by: Jason Cooper --- drivers/bus/mvebu-mbus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c index 586d03e29e9e..a0250c65440e 100644 --- a/drivers/bus/mvebu-mbus.c +++ b/drivers/bus/mvebu-mbus.c @@ -858,6 +858,9 @@ int __init mvebu_mbus_init(const char *soc, phys_addr_t mbuswins_phys_base, return -ENOMEM; } + if (of_find_compatible_node(NULL, NULL, "marvell,coherency-fabric")) + mbus->hw_io_coherency = 1; + for (win = 0; win < mbus->soc->num_wins; win++) mvebu_mbus_disable_window(mbus, win); -- GitLab From 79b5793be44d97c0a0e905c221858af08e5ebd85 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghiu Date: Thu, 28 Mar 2013 02:24:53 +0200 Subject: [PATCH 0266/3163] f2fs: use kmemdup Use kmemdup instead of kzalloc and memcpy. Signed-off-by: Alexandru Gheorghiu Acked-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 12 +++++------- fs/f2fs/segment.c | 3 +-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0177f9434c25..10cbee9dc3d1 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1666,19 +1666,17 @@ static int init_node_manager(struct f2fs_sb_info *sbi) spin_lock_init(&nm_i->free_nid_list_lock); rwlock_init(&nm_i->nat_tree_lock); - nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); - - nm_i->nat_bitmap = kzalloc(nm_i->bitmap_size, GFP_KERNEL); - if (!nm_i->nat_bitmap) - return -ENOMEM; + nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); if (!version_bitmap) return -EFAULT; - /* copy version bitmap */ - memcpy(nm_i->nat_bitmap, version_bitmap, nm_i->bitmap_size); + nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size, + GFP_KERNEL); + if (!nm_i->nat_bitmap) + return -ENOMEM; return 0; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 777f17e496e6..17581495bafb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1403,10 +1403,9 @@ static int build_sit_info(struct f2fs_sb_info *sbi) bitmap_size = __bitmap_size(sbi, SIT_BITMAP); src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); - dst_bitmap = kzalloc(bitmap_size, GFP_KERNEL); + dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); if (!dst_bitmap) return -ENOMEM; - memcpy(dst_bitmap, src_bitmap, bitmap_size); /* init SIT information */ sit_i->s_ops = &default_salloc_ops; -- GitLab From ff45262a85dbf1bc74463c5dcea1d71a406d4d8e Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Mon, 18 Feb 2013 21:10:14 -0800 Subject: [PATCH 0267/3163] leds: add new LP5562 LED driver LP5562 can drive up to 4 channels, RGB and White. LEDs can be controlled directly via the led class control interface. LP55xx common driver LP5562 is one of LP55xx family device, so LP55xx common code are used. On the other hand, chip specific configuration is defined in the structure 'lp55xx_device_config' LED pattern data LP5562 has also internal program memory which is used for running various LED patterns. LP5562 driver supports the firmware interface and the predefined pattern data as well. LP5562 device attributes: 'led_pattern' and 'engine_mux' A 'led_pattern' is an index code which runs the predefined pattern data. And 'engine_mux' is updated with the firmware interface is activated. Detailed description has been updated in the documentation files, 'leds-lp55xx.txt' and 'leds-lp5562.txt'. Changes on the header file LP5562 configurable definitions are added. Pattern RGB data is fixed as constant value. (No side effect on other devices, LP5521 or LP5523.) (cooloney@gmail.com: remove redundant mutex_unlock(). Reported by Dan Carpenter ) Signed-off-by: Milo(Woogyom) Kim Signed-off-by: Bryan Wu --- Documentation/leds/00-INDEX | 2 + Documentation/leds/leds-lp5562.txt | 135 +++++ Documentation/leds/leds-lp55xx.txt | 46 +- drivers/leds/Kconfig | 14 +- drivers/leds/Makefile | 1 + drivers/leds/leds-lp5562.c | 593 ++++++++++++++++++++++ drivers/leds/leds-lp55xx-common.c | 2 +- include/linux/platform_data/leds-lp55xx.h | 13 +- 8 files changed, 799 insertions(+), 7 deletions(-) create mode 100644 Documentation/leds/leds-lp5562.txt create mode 100644 drivers/leds/leds-lp5562.c diff --git a/Documentation/leds/00-INDEX b/Documentation/leds/00-INDEX index 5246090ef15c..1ecd1596633e 100644 --- a/Documentation/leds/00-INDEX +++ b/Documentation/leds/00-INDEX @@ -6,6 +6,8 @@ leds-lp5521.txt - notes on how to use the leds-lp5521 driver. leds-lp5523.txt - notes on how to use the leds-lp5523 driver. +leds-lp5562.txt + - notes on how to use the leds-lp5562 driver. leds-lp55xx.txt - description about lp55xx common driver. leds-lm3556.txt diff --git a/Documentation/leds/leds-lp5562.txt b/Documentation/leds/leds-lp5562.txt new file mode 100644 index 000000000000..96061000dd93 --- /dev/null +++ b/Documentation/leds/leds-lp5562.txt @@ -0,0 +1,135 @@ +Kernel driver for LP5562 +======================== + +* TI LP5562 LED Driver + +Author: Milo(Woogyom) Kim + +Description + + LP5562 can drive up to 4 channels. R/G/B and White. + LEDs can be controlled directly via the led class control interface. + + All four channels can be also controlled using the engine micro programs. + LP5562 has the internal program memory for running various LED patterns. + For the details, please refer to 'firmware' section in leds-lp55xx.txt + +Device attribute: engine_mux + + 3 Engines are allocated in LP5562, but the number of channel is 4. + Therefore each channel should be mapped to the engine number. + Value : RGB or W + + This attribute is used for programming LED data with the firmware interface. + Unlike the LP5521/LP5523/55231, LP5562 has unique feature for the engine mux, + so additional sysfs is required. + + LED Map + Red ... Engine 1 (fixed) + Green ... Engine 2 (fixed) + Blue ... Engine 3 (fixed) + White ... Engine 1 or 2 or 3 (selective) + +How to load the program data using engine_mux + + Before loading the LP5562 program data, engine_mux should be written between + the engine selection and loading the firmware. + Engine mux has two different mode, RGB and W. + RGB is used for loading RGB program data, W is used for W program data. + + For example, run blinking green channel pattern, + echo 2 > /sys/bus/i2c/devices/xxxx/select_engine # 2 is for green channel + echo "RGB" > /sys/bus/i2c/devices/xxxx/engine_mux # engine mux for RGB + echo 1 > /sys/class/firmware/lp5562/loading + echo "4000600040FF6000" > /sys/class/firmware/lp5562/data + echo 0 > /sys/class/firmware/lp5562/loading + echo 1 > /sys/bus/i2c/devices/xxxx/run_engine + + To run a blinking white pattern, + echo 1 or 2 or 3 > /sys/bus/i2c/devices/xxxx/select_engine + echo "W" > /sys/bus/i2c/devices/xxxx/engine_mux + echo 1 > /sys/class/firmware/lp5562/loading + echo "4000600040FF6000" > /sys/class/firmware/lp5562/data + echo 0 > /sys/class/firmware/lp5562/loading + echo 1 > /sys/bus/i2c/devices/xxxx/run_engine + +How to load the predefined patterns + + Please refer to 'leds-lp55xx.txt" + +Setting Current of Each Channel + + Like LP5521 and LP5523/55231, LP5562 provides LED current settings. + The 'led_current' and 'max_current' are used. + +(Example of Platform data) + +To configure the platform specific data, lp55xx_platform_data structure is used. + +static struct lp55xx_led_config lp5562_led_config[] = { + { + .name = "R", + .chan_nr = 0, + .led_current = 20, + .max_current = 40, + }, + { + .name = "G", + .chan_nr = 1, + .led_current = 20, + .max_current = 40, + }, + { + .name = "B", + .chan_nr = 2, + .led_current = 20, + .max_current = 40, + }, + { + .name = "W", + .chan_nr = 3, + .led_current = 20, + .max_current = 40, + }, +}; + +static int lp5562_setup(void) +{ + /* setup HW resources */ +} + +static void lp5562_release(void) +{ + /* Release HW resources */ +} + +static void lp5562_enable(bool state) +{ + /* Control of chip enable signal */ +} + +static struct lp55xx_platform_data lp5562_platform_data = { + .led_config = lp5562_led_config, + .num_channels = ARRAY_SIZE(lp5562_led_config), + .setup_resources = lp5562_setup, + .release_resources = lp5562_release, + .enable = lp5562_enable, +}; + +If the current is set to 0 in the platform data, that channel is +disabled and it is not visible in the sysfs. + +The 'update_config' : CONFIG register (ADDR 08h) +This value is platform-specific data. +If update_config is not defined, the CONFIG register is set with +'LP5562_PWRSAVE_EN | LP5562_CLK_AUTO'. +(Enable auto-powersave, set automatic clock source selection) + +#define LP5562_CONFIGS (LP5562_PWM_HF | LP5562_PWRSAVE_EN | \ + LP5562_CLK_SRC_EXT) + +static struct lp55xx_platform_data lp5562_pdata = { + .led_config = lp5562_led_config, + .num_channels = ARRAY_SIZE(lp5562_led_config), + .update_config = LP5562_CONFIGS, +}; diff --git a/Documentation/leds/leds-lp55xx.txt b/Documentation/leds/leds-lp55xx.txt index ced41868d2d1..eec8fa2ffe4e 100644 --- a/Documentation/leds/leds-lp55xx.txt +++ b/Documentation/leds/leds-lp55xx.txt @@ -5,7 +5,7 @@ Authors: Milo(Woogyom) Kim Description ----------- -LP5521, LP5523/55231 have common features as below. +LP5521, LP5523/55231 and LP5562 have common features as below. Register access via the I2C Device initialization/deinitialization @@ -116,3 +116,47 @@ To support this, 'run_engine' and 'firmware_cb' are configurable in each driver. run_engine : Control the selected engine firmware_cb : The callback function after loading the firmware is done. Chip specific commands for loading and updating program memory. + +( Predefined pattern data ) + +Without the firmware interface, LP55xx driver provides another method for +loading a LED pattern. That is 'predefined' pattern. +A predefined pattern is defined in the platform data and load it(or them) +via the sysfs if needed. +To use the predefined pattern concept, 'patterns' and 'num_patterns' should be +configured. + + Example of predefined pattern data: + + /* mode_1: blinking data */ + static const u8 mode_1[] = { + 0x40, 0x00, 0x60, 0x00, 0x40, 0xFF, 0x60, 0x00, + }; + + /* mode_2: always on */ + static const u8 mode_2[] = { 0x40, 0xFF, }; + + struct lp55xx_predef_pattern board_led_patterns[] = { + { + .r = mode_1, + .size_r = ARRAY_SIZE(mode_1), + }, + { + .b = mode_2, + .size_b = ARRAY_SIZE(mode_2), + }, + } + + struct lp55xx_platform_data lp5562_pdata = { + ... + .patterns = board_led_patterns, + .num_patterns = ARRAY_SIZE(board_led_patterns), + }; + +Then, mode_1 and mode_2 can be run via through the sysfs. + + echo 1 > /sys/bus/i2c/devices/xxxx/led_pattern # red blinking LED pattern + echo 2 > /sys/bus/i2c/devices/xxxx/led_pattern # blue LED always on + +To stop running pattern, + echo 0 > /sys/bus/i2c/devices/xxxx/led_pattern diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index ec50824c02ec..c7f755034375 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -194,8 +194,8 @@ config LEDS_LP3944 module will be called leds-lp3944. config LEDS_LP55XX_COMMON - tristate "Common Driver for TI/National LP5521 and LP5523/55231" - depends on LEDS_LP5521 || LEDS_LP5523 + tristate "Common Driver for TI/National LP5521, LP5523/55231 and LP5562" + depends on LEDS_LP5521 || LEDS_LP5523 || LEDS_LP5562 select FW_LOADER help This option supports common operations for LP5521 and LP5523/55231 @@ -222,6 +222,16 @@ config LEDS_LP5523 Driver provides direct control via LED class and interface for programming the engines. +config LEDS_LP5562 + tristate "LED Support for TI LP5562 LED driver chip" + depends on LEDS_CLASS && I2C + select LEDS_LP55XX_COMMON + help + If you say yes here you get support for TI LP5562 LED driver. + It is 4 channels chip with programmable engines. + Driver provides direct control via LED class and interface for + programming the engines. + config LEDS_LP8788 tristate "LED support for the TI LP8788 PMIC" depends on LEDS_CLASS diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index 215e7e3b6173..ab8f5c549ad3 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_LEDS_LP3944) += leds-lp3944.o obj-$(CONFIG_LEDS_LP55XX_COMMON) += leds-lp55xx-common.o obj-$(CONFIG_LEDS_LP5521) += leds-lp5521.o obj-$(CONFIG_LEDS_LP5523) += leds-lp5523.o +obj-$(CONFIG_LEDS_LP5562) += leds-lp5562.o obj-$(CONFIG_LEDS_LP8788) += leds-lp8788.o obj-$(CONFIG_LEDS_TCA6507) += leds-tca6507.o obj-$(CONFIG_LEDS_CLEVO_MAIL) += leds-clevo-mail.o diff --git a/drivers/leds/leds-lp5562.c b/drivers/leds/leds-lp5562.c new file mode 100644 index 000000000000..f8b927788c3a --- /dev/null +++ b/drivers/leds/leds-lp5562.c @@ -0,0 +1,593 @@ +/* + * LP5562 LED driver + * + * Copyright (C) 2013 Texas Instruments + * + * Author: Milo(Woogyom) Kim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "leds-lp55xx-common.h" + +#define LP5562_PROGRAM_LENGTH 32 +#define LP5562_MAX_LEDS 4 + +/* ENABLE Register 00h */ +#define LP5562_REG_ENABLE 0x00 +#define LP5562_EXEC_ENG1_M 0x30 +#define LP5562_EXEC_ENG2_M 0x0C +#define LP5562_EXEC_ENG3_M 0x03 +#define LP5562_EXEC_M 0x3F +#define LP5562_MASTER_ENABLE 0x40 /* Chip master enable */ +#define LP5562_LOGARITHMIC_PWM 0x80 /* Logarithmic PWM adjustment */ +#define LP5562_EXEC_RUN 0x2A +#define LP5562_ENABLE_DEFAULT \ + (LP5562_MASTER_ENABLE | LP5562_LOGARITHMIC_PWM) +#define LP5562_ENABLE_RUN_PROGRAM \ + (LP5562_ENABLE_DEFAULT | LP5562_EXEC_RUN) + +/* OPMODE Register 01h */ +#define LP5562_REG_OP_MODE 0x01 +#define LP5562_MODE_ENG1_M 0x30 +#define LP5562_MODE_ENG2_M 0x0C +#define LP5562_MODE_ENG3_M 0x03 +#define LP5562_LOAD_ENG1 0x10 +#define LP5562_LOAD_ENG2 0x04 +#define LP5562_LOAD_ENG3 0x01 +#define LP5562_RUN_ENG1 0x20 +#define LP5562_RUN_ENG2 0x08 +#define LP5562_RUN_ENG3 0x02 +#define LP5562_ENG1_IS_LOADING(mode) \ + ((mode & LP5562_MODE_ENG1_M) == LP5562_LOAD_ENG1) +#define LP5562_ENG2_IS_LOADING(mode) \ + ((mode & LP5562_MODE_ENG2_M) == LP5562_LOAD_ENG2) +#define LP5562_ENG3_IS_LOADING(mode) \ + ((mode & LP5562_MODE_ENG3_M) == LP5562_LOAD_ENG3) + +/* BRIGHTNESS Registers */ +#define LP5562_REG_R_PWM 0x04 +#define LP5562_REG_G_PWM 0x03 +#define LP5562_REG_B_PWM 0x02 +#define LP5562_REG_W_PWM 0x0E + +/* CURRENT Registers */ +#define LP5562_REG_R_CURRENT 0x07 +#define LP5562_REG_G_CURRENT 0x06 +#define LP5562_REG_B_CURRENT 0x05 +#define LP5562_REG_W_CURRENT 0x0F + +/* CONFIG Register 08h */ +#define LP5562_REG_CONFIG 0x08 +#define LP5562_DEFAULT_CFG \ + (LP5562_PWM_HF | LP5562_PWRSAVE_EN | LP5562_CLK_INT) + +/* RESET Register 0Dh */ +#define LP5562_REG_RESET 0x0D +#define LP5562_RESET 0xFF + +/* PROGRAM ENGINE Registers */ +#define LP5562_REG_PROG_MEM_ENG1 0x10 +#define LP5562_REG_PROG_MEM_ENG2 0x30 +#define LP5562_REG_PROG_MEM_ENG3 0x50 + +/* LEDMAP Register 70h */ +#define LP5562_REG_ENG_SEL 0x70 +#define LP5562_ENG_SEL_PWM 0 +#define LP5562_ENG_FOR_RGB_M 0x3F +#define LP5562_ENG_SEL_RGB 0x1B /* R:ENG1, G:ENG2, B:ENG3 */ +#define LP5562_ENG_FOR_W_M 0xC0 +#define LP5562_ENG1_FOR_W 0x40 /* W:ENG1 */ +#define LP5562_ENG2_FOR_W 0x80 /* W:ENG2 */ +#define LP5562_ENG3_FOR_W 0xC0 /* W:ENG3 */ + +/* Program Commands */ +#define LP5562_CMD_DISABLE 0x00 +#define LP5562_CMD_LOAD 0x15 +#define LP5562_CMD_RUN 0x2A +#define LP5562_CMD_DIRECT 0x3F +#define LP5562_PATTERN_OFF 0 + +static inline void lp5562_wait_opmode_done(void) +{ + /* operation mode change needs to be longer than 153 us */ + usleep_range(200, 300); +} + +static inline void lp5562_wait_enable_done(void) +{ + /* it takes more 488 us to update ENABLE register */ + usleep_range(500, 600); +} + +static void lp5562_set_led_current(struct lp55xx_led *led, u8 led_current) +{ + u8 addr[] = { + LP5562_REG_R_CURRENT, + LP5562_REG_G_CURRENT, + LP5562_REG_B_CURRENT, + LP5562_REG_W_CURRENT, + }; + + led->led_current = led_current; + lp55xx_write(led->chip, addr[led->chan_nr], led_current); +} + +static void lp5562_load_engine(struct lp55xx_chip *chip) +{ + enum lp55xx_engine_index idx = chip->engine_idx; + u8 mask[] = { + [LP55XX_ENGINE_1] = LP5562_MODE_ENG1_M, + [LP55XX_ENGINE_2] = LP5562_MODE_ENG2_M, + [LP55XX_ENGINE_3] = LP5562_MODE_ENG3_M, + }; + + u8 val[] = { + [LP55XX_ENGINE_1] = LP5562_LOAD_ENG1, + [LP55XX_ENGINE_2] = LP5562_LOAD_ENG2, + [LP55XX_ENGINE_3] = LP5562_LOAD_ENG3, + }; + + lp55xx_update_bits(chip, LP5562_REG_OP_MODE, mask[idx], val[idx]); + + lp5562_wait_opmode_done(); +} + +static void lp5562_stop_engine(struct lp55xx_chip *chip) +{ + lp55xx_write(chip, LP5562_REG_OP_MODE, LP5562_CMD_DISABLE); + lp5562_wait_opmode_done(); +} + +static void lp5562_run_engine(struct lp55xx_chip *chip, bool start) +{ + int ret; + u8 mode; + u8 exec; + + /* stop engine */ + if (!start) { + lp55xx_write(chip, LP5562_REG_ENABLE, LP5562_ENABLE_DEFAULT); + lp5562_wait_enable_done(); + lp5562_stop_engine(chip); + lp55xx_write(chip, LP5562_REG_ENG_SEL, LP5562_ENG_SEL_PWM); + lp55xx_write(chip, LP5562_REG_OP_MODE, LP5562_CMD_DIRECT); + lp5562_wait_opmode_done(); + return; + } + + /* + * To run the engine, + * operation mode and enable register should updated at the same time + */ + + ret = lp55xx_read(chip, LP5562_REG_OP_MODE, &mode); + if (ret) + return; + + ret = lp55xx_read(chip, LP5562_REG_ENABLE, &exec); + if (ret) + return; + + /* change operation mode to RUN only when each engine is loading */ + if (LP5562_ENG1_IS_LOADING(mode)) { + mode = (mode & ~LP5562_MODE_ENG1_M) | LP5562_RUN_ENG1; + exec = (exec & ~LP5562_EXEC_ENG1_M) | LP5562_RUN_ENG1; + } + + if (LP5562_ENG2_IS_LOADING(mode)) { + mode = (mode & ~LP5562_MODE_ENG2_M) | LP5562_RUN_ENG2; + exec = (exec & ~LP5562_EXEC_ENG2_M) | LP5562_RUN_ENG2; + } + + if (LP5562_ENG3_IS_LOADING(mode)) { + mode = (mode & ~LP5562_MODE_ENG3_M) | LP5562_RUN_ENG3; + exec = (exec & ~LP5562_EXEC_ENG3_M) | LP5562_RUN_ENG3; + } + + lp55xx_write(chip, LP5562_REG_OP_MODE, mode); + lp5562_wait_opmode_done(); + + lp55xx_update_bits(chip, LP5562_REG_ENABLE, LP5562_EXEC_M, exec); + lp5562_wait_enable_done(); +} + +static int lp5562_update_firmware(struct lp55xx_chip *chip, + const u8 *data, size_t size) +{ + enum lp55xx_engine_index idx = chip->engine_idx; + u8 pattern[LP5562_PROGRAM_LENGTH] = {0}; + u8 addr[] = { + [LP55XX_ENGINE_1] = LP5562_REG_PROG_MEM_ENG1, + [LP55XX_ENGINE_2] = LP5562_REG_PROG_MEM_ENG2, + [LP55XX_ENGINE_3] = LP5562_REG_PROG_MEM_ENG3, + }; + unsigned cmd; + char c[3]; + int program_size; + int nrchars; + int offset = 0; + int ret; + int i; + + /* clear program memory before updating */ + for (i = 0; i < LP5562_PROGRAM_LENGTH; i++) + lp55xx_write(chip, addr[idx] + i, 0); + + i = 0; + while ((offset < size - 1) && (i < LP5562_PROGRAM_LENGTH)) { + /* separate sscanfs because length is working only for %s */ + ret = sscanf(data + offset, "%2s%n ", c, &nrchars); + if (ret != 1) + goto err; + + ret = sscanf(c, "%2x", &cmd); + if (ret != 1) + goto err; + + pattern[i] = (u8)cmd; + offset += nrchars; + i++; + } + + /* Each instruction is 16bit long. Check that length is even */ + if (i % 2) + goto err; + + program_size = i; + for (i = 0; i < program_size; i++) + lp55xx_write(chip, addr[idx] + i, pattern[i]); + + return 0; + +err: + dev_err(&chip->cl->dev, "wrong pattern format\n"); + return -EINVAL; +} + +static void lp5562_firmware_loaded(struct lp55xx_chip *chip) +{ + const struct firmware *fw = chip->fw; + + if (fw->size > LP5562_PROGRAM_LENGTH) { + dev_err(&chip->cl->dev, "firmware data size overflow: %zu\n", + fw->size); + return; + } + + /* + * Program momery sequence + * 1) set engine mode to "LOAD" + * 2) write firmware data into program memory + */ + + lp5562_load_engine(chip); + lp5562_update_firmware(chip, fw->data, fw->size); +} + +static int lp5562_post_init_device(struct lp55xx_chip *chip) +{ + int ret; + u8 update_cfg = chip->pdata->update_config ? : LP5562_DEFAULT_CFG; + + /* Set all PWMs to direct control mode */ + ret = lp55xx_write(chip, LP5562_REG_OP_MODE, LP5562_CMD_DIRECT); + if (ret) + return ret; + + lp5562_wait_opmode_done(); + + ret = lp55xx_write(chip, LP5562_REG_CONFIG, update_cfg); + if (ret) + return ret; + + /* Initialize all channels PWM to zero -> leds off */ + lp55xx_write(chip, LP5562_REG_R_PWM, 0); + lp55xx_write(chip, LP5562_REG_G_PWM, 0); + lp55xx_write(chip, LP5562_REG_B_PWM, 0); + lp55xx_write(chip, LP5562_REG_W_PWM, 0); + + /* Set LED map as register PWM by default */ + lp55xx_write(chip, LP5562_REG_ENG_SEL, LP5562_ENG_SEL_PWM); + + return 0; +} + +static void lp5562_led_brightness_work(struct work_struct *work) +{ + struct lp55xx_led *led = container_of(work, struct lp55xx_led, + brightness_work); + struct lp55xx_chip *chip = led->chip; + u8 addr[] = { + LP5562_REG_R_PWM, + LP5562_REG_G_PWM, + LP5562_REG_B_PWM, + LP5562_REG_W_PWM, + }; + + mutex_lock(&chip->lock); + lp55xx_write(chip, addr[led->chan_nr], led->brightness); + mutex_unlock(&chip->lock); +} + +static void lp5562_write_program_memory(struct lp55xx_chip *chip, + u8 base, const u8 *rgb, int size) +{ + int i; + + if (!rgb || size <= 0) + return; + + for (i = 0; i < size; i++) + lp55xx_write(chip, base + i, *(rgb + i)); + + lp55xx_write(chip, base + i, 0); + lp55xx_write(chip, base + i + 1, 0); +} + +/* check the size of program count */ +static inline bool _is_pc_overflow(struct lp55xx_predef_pattern *ptn) +{ + return (ptn->size_r >= LP5562_PROGRAM_LENGTH || + ptn->size_g >= LP5562_PROGRAM_LENGTH || + ptn->size_b >= LP5562_PROGRAM_LENGTH); +} + +static int lp5562_run_predef_led_pattern(struct lp55xx_chip *chip, int mode) +{ + struct lp55xx_predef_pattern *ptn; + int i; + + if (mode == LP5562_PATTERN_OFF) { + lp5562_run_engine(chip, false); + return 0; + } + + ptn = chip->pdata->patterns + (mode - 1); + if (!ptn || _is_pc_overflow(ptn)) { + dev_err(&chip->cl->dev, "invalid pattern data\n"); + return -EINVAL; + } + + lp5562_stop_engine(chip); + + /* Set LED map as RGB */ + lp55xx_write(chip, LP5562_REG_ENG_SEL, LP5562_ENG_SEL_RGB); + + /* Load engines */ + for (i = LP55XX_ENGINE_1; i <= LP55XX_ENGINE_3; i++) { + chip->engine_idx = i; + lp5562_load_engine(chip); + } + + /* Clear program registers */ + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG1, 0); + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG1 + 1, 0); + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG2, 0); + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG2 + 1, 0); + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG3, 0); + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG3 + 1, 0); + + /* Program engines */ + lp5562_write_program_memory(chip, LP5562_REG_PROG_MEM_ENG1, + ptn->r, ptn->size_r); + lp5562_write_program_memory(chip, LP5562_REG_PROG_MEM_ENG2, + ptn->g, ptn->size_g); + lp5562_write_program_memory(chip, LP5562_REG_PROG_MEM_ENG3, + ptn->b, ptn->size_b); + + /* Run engines */ + lp5562_run_engine(chip, true); + + return 0; +} + +static ssize_t lp5562_store_pattern(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + struct lp55xx_predef_pattern *ptn = chip->pdata->patterns; + int num_patterns = chip->pdata->num_patterns; + unsigned long mode; + int ret; + + ret = kstrtoul(buf, 0, &mode); + if (ret) + return ret; + + if (mode > num_patterns || !ptn) + return -EINVAL; + + mutex_lock(&chip->lock); + ret = lp5562_run_predef_led_pattern(chip, mode); + mutex_unlock(&chip->lock); + + if (ret) + return ret; + + return len; +} + +static ssize_t lp5562_store_engine_mux(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + u8 mask; + u8 val; + + /* LED map + * R ... Engine 1 (fixed) + * G ... Engine 2 (fixed) + * B ... Engine 3 (fixed) + * W ... Engine 1 or 2 or 3 + */ + + if (sysfs_streq(buf, "RGB")) { + mask = LP5562_ENG_FOR_RGB_M; + val = LP5562_ENG_SEL_RGB; + } else if (sysfs_streq(buf, "W")) { + enum lp55xx_engine_index idx = chip->engine_idx; + + mask = LP5562_ENG_FOR_W_M; + switch (idx) { + case LP55XX_ENGINE_1: + val = LP5562_ENG1_FOR_W; + break; + case LP55XX_ENGINE_2: + val = LP5562_ENG2_FOR_W; + break; + case LP55XX_ENGINE_3: + val = LP5562_ENG3_FOR_W; + break; + default: + return -EINVAL; + } + + } else { + dev_err(dev, "choose RGB or W\n"); + return -EINVAL; + } + + mutex_lock(&chip->lock); + lp55xx_update_bits(chip, LP5562_REG_ENG_SEL, mask, val); + mutex_unlock(&chip->lock); + + return len; +} + +static DEVICE_ATTR(led_pattern, S_IWUSR, NULL, lp5562_store_pattern); +static DEVICE_ATTR(engine_mux, S_IWUSR, NULL, lp5562_store_engine_mux); + +static struct attribute *lp5562_attributes[] = { + &dev_attr_led_pattern.attr, + &dev_attr_engine_mux.attr, + NULL, +}; + +static const struct attribute_group lp5562_group = { + .attrs = lp5562_attributes, +}; + +/* Chip specific configurations */ +static struct lp55xx_device_config lp5562_cfg = { + .max_channel = LP5562_MAX_LEDS, + .reset = { + .addr = LP5562_REG_RESET, + .val = LP5562_RESET, + }, + .enable = { + .addr = LP5562_REG_ENABLE, + .val = LP5562_ENABLE_DEFAULT, + }, + .post_init_device = lp5562_post_init_device, + .set_led_current = lp5562_set_led_current, + .brightness_work_fn = lp5562_led_brightness_work, + .run_engine = lp5562_run_engine, + .firmware_cb = lp5562_firmware_loaded, + .dev_attr_group = &lp5562_group, +}; + +static int lp5562_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int ret; + struct lp55xx_chip *chip; + struct lp55xx_led *led; + struct lp55xx_platform_data *pdata = client->dev.platform_data; + + if (!pdata) { + dev_err(&client->dev, "no platform data\n"); + return -EINVAL; + } + + chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + led = devm_kzalloc(&client->dev, + sizeof(*led) * pdata->num_channels, GFP_KERNEL); + if (!led) + return -ENOMEM; + + chip->cl = client; + chip->pdata = pdata; + chip->cfg = &lp5562_cfg; + + mutex_init(&chip->lock); + + i2c_set_clientdata(client, led); + + ret = lp55xx_init_device(chip); + if (ret) + goto err_init; + + ret = lp55xx_register_leds(led, chip); + if (ret) + goto err_register_leds; + + ret = lp55xx_register_sysfs(chip); + if (ret) { + dev_err(&client->dev, "registering sysfs failed\n"); + goto err_register_sysfs; + } + + return 0; + +err_register_sysfs: + lp55xx_unregister_leds(led, chip); +err_register_leds: + lp55xx_deinit_device(chip); +err_init: + return ret; +} + +static int lp5562_remove(struct i2c_client *client) +{ + struct lp55xx_led *led = i2c_get_clientdata(client); + struct lp55xx_chip *chip = led->chip; + + lp5562_stop_engine(chip); + + lp55xx_unregister_sysfs(chip); + lp55xx_unregister_leds(led, chip); + lp55xx_deinit_device(chip); + + return 0; +} + +static const struct i2c_device_id lp5562_id[] = { + { "lp5562", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, lp5562_id); + +static struct i2c_driver lp5562_driver = { + .driver = { + .name = "lp5562", + }, + .probe = lp5562_probe, + .remove = lp5562_remove, + .id_table = lp5562_id, +}; + +module_i2c_driver(lp5562_driver); + +MODULE_DESCRIPTION("Texas Instruments LP5562 LED Driver"); +MODULE_AUTHOR("Milo Kim"); +MODULE_LICENSE("GPL"); diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c index d9eb84157423..8a388a4afed7 100644 --- a/drivers/leds/leds-lp55xx-common.c +++ b/drivers/leds/leds-lp55xx-common.c @@ -1,5 +1,5 @@ /* - * LP5521/LP5523/LP55231 Common Driver + * LP5521/LP5523/LP55231/LP5562 Common Driver * * Copyright 2012 Texas Instruments * diff --git a/include/linux/platform_data/leds-lp55xx.h b/include/linux/platform_data/leds-lp55xx.h index 1509570d5a3f..1f1041e8b4fc 100644 --- a/include/linux/platform_data/leds-lp55xx.h +++ b/include/linux/platform_data/leds-lp55xx.h @@ -32,6 +32,13 @@ #define LP5521_CLK_INT 1 /* Internal clock */ #define LP5521_CLK_AUTO 2 /* Automatic clock selection */ +/* Bits in LP5562 CONFIG register */ +#define LP5562_PWM_HF LP5521_PWM_HF +#define LP5562_PWRSAVE_EN LP5521_PWRSAVE_EN +#define LP5562_CLK_SRC_EXT LP5521_CLK_SRC_EXT +#define LP5562_CLK_INT LP5521_CLK_INT +#define LP5562_CLK_AUTO LP5521_CLK_AUTO + struct lp55xx_led_config { const char *name; u8 chan_nr; @@ -40,9 +47,9 @@ struct lp55xx_led_config { }; struct lp55xx_predef_pattern { - u8 *r; - u8 *g; - u8 *b; + const u8 *r; + const u8 *g; + const u8 *b; u8 size_r; u8 size_g; u8 size_b; -- GitLab From f07fb52107c881f35eaff09fe990a4dfd0f7e62a Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Wed, 20 Feb 2013 00:36:01 -0800 Subject: [PATCH 0268/3163] leds: move LED trigger drivers into new subdirectory For better driver management, new subdirectory, 'trigger' is created. All LED trigger drivers are moved into this directory. Internal header, 'leds.h' is included in each LED trigger drivers. Fix the location of header file, "leds.h" -> "../leds.h" in driver files. One exception is here, 'ledtrig-timer.c'. There is no need to include 'leds.h'. so '#include "leds.h"' line was removed. Signed-off-by: Milo(Woogyom) Kim Signed-off-by: Bryan Wu --- drivers/leds/Kconfig | 101 +---------------- drivers/leds/Makefile | 10 +- drivers/leds/trigger/Kconfig | 103 ++++++++++++++++++ drivers/leds/trigger/Makefile | 9 ++ .../leds/{ => trigger}/ledtrig-backlight.c | 2 +- drivers/leds/{ => trigger}/ledtrig-cpu.c | 2 +- .../leds/{ => trigger}/ledtrig-default-on.c | 2 +- drivers/leds/{ => trigger}/ledtrig-gpio.c | 2 +- .../leds/{ => trigger}/ledtrig-heartbeat.c | 2 +- drivers/leds/{ => trigger}/ledtrig-ide-disk.c | 0 drivers/leds/{ => trigger}/ledtrig-oneshot.c | 2 +- drivers/leds/{ => trigger}/ledtrig-timer.c | 1 - .../leds/{ => trigger}/ledtrig-transient.c | 2 +- 13 files changed, 121 insertions(+), 117 deletions(-) create mode 100644 drivers/leds/trigger/Kconfig create mode 100644 drivers/leds/trigger/Makefile rename drivers/leds/{ => trigger}/ledtrig-backlight.c (99%) rename drivers/leds/{ => trigger}/ledtrig-cpu.c (99%) rename drivers/leds/{ => trigger}/ledtrig-default-on.c (98%) rename drivers/leds/{ => trigger}/ledtrig-gpio.c (99%) rename drivers/leds/{ => trigger}/ledtrig-heartbeat.c (99%) rename drivers/leds/{ => trigger}/ledtrig-ide-disk.c (100%) rename drivers/leds/{ => trigger}/ledtrig-oneshot.c (99%) rename drivers/leds/{ => trigger}/ledtrig-timer.c (99%) rename drivers/leds/{ => trigger}/ledtrig-transient.c (99%) diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index c7f755034375..d44806d41b44 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -479,106 +479,7 @@ config LEDS_BLINKM This option enables support for the BlinkM RGB LED connected through I2C. Say Y to enable support for the BlinkM LED. -config LEDS_TRIGGERS - bool "LED Trigger support" - depends on LEDS_CLASS - help - This option enables trigger support for the leds class. - These triggers allow kernel events to drive the LEDs and can - be configured via sysfs. If unsure, say Y. - comment "LED Triggers" - -config LEDS_TRIGGER_TIMER - tristate "LED Timer Trigger" - depends on LEDS_TRIGGERS - help - This allows LEDs to be controlled by a programmable timer - via sysfs. Some LED hardware can be programmed to start - blinking the LED without any further software interaction. - For more details read Documentation/leds/leds-class.txt. - - If unsure, say Y. - -config LEDS_TRIGGER_ONESHOT - tristate "LED One-shot Trigger" - depends on LEDS_TRIGGERS - help - This allows LEDs to blink in one-shot pulses with parameters - controlled via sysfs. It's useful to notify the user on - sporadic events, when there are no clear begin and end trap points, - or on dense events, where this blinks the LED at constant rate if - rearmed continuously. - - It also shows how to use the led_blink_set_oneshot() function. - - If unsure, say Y. - -config LEDS_TRIGGER_IDE_DISK - bool "LED IDE Disk Trigger" - depends on IDE_GD_ATA - depends on LEDS_TRIGGERS - help - This allows LEDs to be controlled by IDE disk activity. - If unsure, say Y. - -config LEDS_TRIGGER_HEARTBEAT - tristate "LED Heartbeat Trigger" - depends on LEDS_TRIGGERS - help - This allows LEDs to be controlled by a CPU load average. - The flash frequency is a hyperbolic function of the 1-minute - load average. - If unsure, say Y. - -config LEDS_TRIGGER_BACKLIGHT - tristate "LED backlight Trigger" - depends on LEDS_TRIGGERS - help - This allows LEDs to be controlled as a backlight device: they - turn off and on when the display is blanked and unblanked. - - If unsure, say N. - -config LEDS_TRIGGER_CPU - bool "LED CPU Trigger" - depends on LEDS_TRIGGERS - help - This allows LEDs to be controlled by active CPUs. This shows - the active CPUs across an array of LEDs so you can see which - CPUs are active on the system at any given moment. - - If unsure, say N. - -config LEDS_TRIGGER_GPIO - tristate "LED GPIO Trigger" - depends on LEDS_TRIGGERS - depends on GPIOLIB - help - This allows LEDs to be controlled by gpio events. It's good - when using gpios as switches and triggering the needed LEDs - from there. One use case is n810's keypad LEDs that could - be triggered by this trigger when user slides up to show - keypad. - - If unsure, say N. - -config LEDS_TRIGGER_DEFAULT_ON - tristate "LED Default ON Trigger" - depends on LEDS_TRIGGERS - help - This allows LEDs to be initialised in the ON state. - If unsure, say Y. - -comment "iptables trigger is under Netfilter config (LED target)" - depends on LEDS_TRIGGERS - -config LEDS_TRIGGER_TRANSIENT - tristate "LED Transient Trigger" - depends on LEDS_TRIGGERS - help - This allows one time activation of a transient state on - GPIO/PWM based hardware. - If unsure, say Y. +source "drivers/leds/trigger/Kconfig" endif # NEW_LEDS diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index ab8f5c549ad3..ac2897732b02 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -58,12 +58,4 @@ obj-$(CONFIG_LEDS_BLINKM) += leds-blinkm.o obj-$(CONFIG_LEDS_DAC124S085) += leds-dac124s085.o # LED Triggers -obj-$(CONFIG_LEDS_TRIGGER_TIMER) += ledtrig-timer.o -obj-$(CONFIG_LEDS_TRIGGER_ONESHOT) += ledtrig-oneshot.o -obj-$(CONFIG_LEDS_TRIGGER_IDE_DISK) += ledtrig-ide-disk.o -obj-$(CONFIG_LEDS_TRIGGER_HEARTBEAT) += ledtrig-heartbeat.o -obj-$(CONFIG_LEDS_TRIGGER_BACKLIGHT) += ledtrig-backlight.o -obj-$(CONFIG_LEDS_TRIGGER_GPIO) += ledtrig-gpio.o -obj-$(CONFIG_LEDS_TRIGGER_CPU) += ledtrig-cpu.o -obj-$(CONFIG_LEDS_TRIGGER_DEFAULT_ON) += ledtrig-default-on.o -obj-$(CONFIG_LEDS_TRIGGER_TRANSIENT) += ledtrig-transient.o +obj-$(CONFIG_LEDS_TRIGGERS) += trigger/ diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig new file mode 100644 index 000000000000..eaa286dc494e --- /dev/null +++ b/drivers/leds/trigger/Kconfig @@ -0,0 +1,103 @@ +menuconfig LEDS_TRIGGERS + bool "LED Trigger support" + depends on LEDS_CLASS + help + This option enables trigger support for the leds class. + These triggers allow kernel events to drive the LEDs and can + be configured via sysfs. If unsure, say Y. + +if LEDS_TRIGGERS + +config LEDS_TRIGGER_TIMER + tristate "LED Timer Trigger" + depends on LEDS_TRIGGERS + help + This allows LEDs to be controlled by a programmable timer + via sysfs. Some LED hardware can be programmed to start + blinking the LED without any further software interaction. + For more details read Documentation/leds/leds-class.txt. + + If unsure, say Y. + +config LEDS_TRIGGER_ONESHOT + tristate "LED One-shot Trigger" + depends on LEDS_TRIGGERS + help + This allows LEDs to blink in one-shot pulses with parameters + controlled via sysfs. It's useful to notify the user on + sporadic events, when there are no clear begin and end trap points, + or on dense events, where this blinks the LED at constant rate if + rearmed continuously. + + It also shows how to use the led_blink_set_oneshot() function. + + If unsure, say Y. + +config LEDS_TRIGGER_IDE_DISK + bool "LED IDE Disk Trigger" + depends on IDE_GD_ATA + depends on LEDS_TRIGGERS + help + This allows LEDs to be controlled by IDE disk activity. + If unsure, say Y. + +config LEDS_TRIGGER_HEARTBEAT + tristate "LED Heartbeat Trigger" + depends on LEDS_TRIGGERS + help + This allows LEDs to be controlled by a CPU load average. + The flash frequency is a hyperbolic function of the 1-minute + load average. + If unsure, say Y. + +config LEDS_TRIGGER_BACKLIGHT + tristate "LED backlight Trigger" + depends on LEDS_TRIGGERS + help + This allows LEDs to be controlled as a backlight device: they + turn off and on when the display is blanked and unblanked. + + If unsure, say N. + +config LEDS_TRIGGER_CPU + bool "LED CPU Trigger" + depends on LEDS_TRIGGERS + help + This allows LEDs to be controlled by active CPUs. This shows + the active CPUs across an array of LEDs so you can see which + CPUs are active on the system at any given moment. + + If unsure, say N. + +config LEDS_TRIGGER_GPIO + tristate "LED GPIO Trigger" + depends on LEDS_TRIGGERS + depends on GPIOLIB + help + This allows LEDs to be controlled by gpio events. It's good + when using gpios as switches and triggering the needed LEDs + from there. One use case is n810's keypad LEDs that could + be triggered by this trigger when user slides up to show + keypad. + + If unsure, say N. + +config LEDS_TRIGGER_DEFAULT_ON + tristate "LED Default ON Trigger" + depends on LEDS_TRIGGERS + help + This allows LEDs to be initialised in the ON state. + If unsure, say Y. + +comment "iptables trigger is under Netfilter config (LED target)" + depends on LEDS_TRIGGERS + +config LEDS_TRIGGER_TRANSIENT + tristate "LED Transient Trigger" + depends on LEDS_TRIGGERS + help + This allows one time activation of a transient state on + GPIO/PWM based hardware. + If unsure, say Y. + +endif # LEDS_TRIGGERS diff --git a/drivers/leds/trigger/Makefile b/drivers/leds/trigger/Makefile new file mode 100644 index 000000000000..554e46ee4c24 --- /dev/null +++ b/drivers/leds/trigger/Makefile @@ -0,0 +1,9 @@ +obj-$(CONFIG_LEDS_TRIGGER_TIMER) += ledtrig-timer.o +obj-$(CONFIG_LEDS_TRIGGER_ONESHOT) += ledtrig-oneshot.o +obj-$(CONFIG_LEDS_TRIGGER_IDE_DISK) += ledtrig-ide-disk.o +obj-$(CONFIG_LEDS_TRIGGER_HEARTBEAT) += ledtrig-heartbeat.o +obj-$(CONFIG_LEDS_TRIGGER_BACKLIGHT) += ledtrig-backlight.o +obj-$(CONFIG_LEDS_TRIGGER_GPIO) += ledtrig-gpio.o +obj-$(CONFIG_LEDS_TRIGGER_CPU) += ledtrig-cpu.o +obj-$(CONFIG_LEDS_TRIGGER_DEFAULT_ON) += ledtrig-default-on.o +obj-$(CONFIG_LEDS_TRIGGER_TRANSIENT) += ledtrig-transient.o diff --git a/drivers/leds/ledtrig-backlight.c b/drivers/leds/trigger/ledtrig-backlight.c similarity index 99% rename from drivers/leds/ledtrig-backlight.c rename to drivers/leds/trigger/ledtrig-backlight.c index 027a2b15d7d8..3c9c88a07eb8 100644 --- a/drivers/leds/ledtrig-backlight.c +++ b/drivers/leds/trigger/ledtrig-backlight.c @@ -16,7 +16,7 @@ #include #include #include -#include "leds.h" +#include "../leds.h" #define BLANK 1 #define UNBLANK 0 diff --git a/drivers/leds/ledtrig-cpu.c b/drivers/leds/trigger/ledtrig-cpu.c similarity index 99% rename from drivers/leds/ledtrig-cpu.c rename to drivers/leds/trigger/ledtrig-cpu.c index 4239b3955ff0..118335eccc56 100644 --- a/drivers/leds/ledtrig-cpu.c +++ b/drivers/leds/trigger/ledtrig-cpu.c @@ -26,7 +26,7 @@ #include #include #include -#include "leds.h" +#include "../leds.h" #define MAX_NAME_LEN 8 diff --git a/drivers/leds/ledtrig-default-on.c b/drivers/leds/trigger/ledtrig-default-on.c similarity index 98% rename from drivers/leds/ledtrig-default-on.c rename to drivers/leds/trigger/ledtrig-default-on.c index eac1f1b1adac..81a91be8e18d 100644 --- a/drivers/leds/ledtrig-default-on.c +++ b/drivers/leds/trigger/ledtrig-default-on.c @@ -15,7 +15,7 @@ #include #include #include -#include "leds.h" +#include "../leds.h" static void defon_trig_activate(struct led_classdev *led_cdev) { diff --git a/drivers/leds/ledtrig-gpio.c b/drivers/leds/trigger/ledtrig-gpio.c similarity index 99% rename from drivers/leds/ledtrig-gpio.c rename to drivers/leds/trigger/ledtrig-gpio.c index 72e3ebfc281f..35812e3a37f2 100644 --- a/drivers/leds/ledtrig-gpio.c +++ b/drivers/leds/trigger/ledtrig-gpio.c @@ -17,7 +17,7 @@ #include #include #include -#include "leds.h" +#include "../leds.h" struct gpio_trig_data { struct led_classdev *led; diff --git a/drivers/leds/ledtrig-heartbeat.c b/drivers/leds/trigger/ledtrig-heartbeat.c similarity index 99% rename from drivers/leds/ledtrig-heartbeat.c rename to drivers/leds/trigger/ledtrig-heartbeat.c index 1edc7463ce83..5c8464a33172 100644 --- a/drivers/leds/ledtrig-heartbeat.c +++ b/drivers/leds/trigger/ledtrig-heartbeat.c @@ -19,7 +19,7 @@ #include #include #include -#include "leds.h" +#include "../leds.h" static int panic_heartbeats; diff --git a/drivers/leds/ledtrig-ide-disk.c b/drivers/leds/trigger/ledtrig-ide-disk.c similarity index 100% rename from drivers/leds/ledtrig-ide-disk.c rename to drivers/leds/trigger/ledtrig-ide-disk.c diff --git a/drivers/leds/ledtrig-oneshot.c b/drivers/leds/trigger/ledtrig-oneshot.c similarity index 99% rename from drivers/leds/ledtrig-oneshot.c rename to drivers/leds/trigger/ledtrig-oneshot.c index 2c029aa5c4f1..cb4c7466692a 100644 --- a/drivers/leds/ledtrig-oneshot.c +++ b/drivers/leds/trigger/ledtrig-oneshot.c @@ -18,7 +18,7 @@ #include #include #include -#include "leds.h" +#include "../leds.h" #define DEFAULT_DELAY 100 diff --git a/drivers/leds/ledtrig-timer.c b/drivers/leds/trigger/ledtrig-timer.c similarity index 99% rename from drivers/leds/ledtrig-timer.c rename to drivers/leds/trigger/ledtrig-timer.c index f774d0592204..8d09327b5719 100644 --- a/drivers/leds/ledtrig-timer.c +++ b/drivers/leds/trigger/ledtrig-timer.c @@ -17,7 +17,6 @@ #include #include #include -#include "leds.h" static ssize_t led_delay_on_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/drivers/leds/ledtrig-transient.c b/drivers/leds/trigger/ledtrig-transient.c similarity index 99% rename from drivers/leds/ledtrig-transient.c rename to drivers/leds/trigger/ledtrig-transient.c index 398f1042c43e..e5abc00bb00c 100644 --- a/drivers/leds/ledtrig-transient.c +++ b/drivers/leds/trigger/ledtrig-transient.c @@ -25,7 +25,7 @@ #include #include #include -#include "leds.h" +#include "../leds.h" struct transient_trig_data { int activate; -- GitLab From 6fd796279179608a4047099930a674c7379650cd Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 27 Feb 2013 19:54:10 -0800 Subject: [PATCH 0269/3163] leds: atmel-pwm: remove erroneous __exit annotation CONFIG_HOTPLUG was removed, so __devexit or __exit of remove() should not be used. Signed-off-by: Jingoo Han Signed-off-by: Bryan Wu --- drivers/leds/leds-atmel-pwm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/leds/leds-atmel-pwm.c b/drivers/leds/leds-atmel-pwm.c index 386773532d95..8a39c5b20f76 100644 --- a/drivers/leds/leds-atmel-pwm.c +++ b/drivers/leds/leds-atmel-pwm.c @@ -113,7 +113,7 @@ static int pwmled_probe(struct platform_device *pdev) return status; } -static int __exit pwmled_remove(struct platform_device *pdev) +static int pwmled_remove(struct platform_device *pdev) { const struct gpio_led_platform_data *pdata; struct pwmled *leds; @@ -140,7 +140,7 @@ static struct platform_driver pwmled_driver = { }, /* REVISIT add suspend() and resume() methods */ .probe = pwmled_probe, - .remove = __exit_p(pwmled_remove), + .remove = pwmled_remove, }; module_platform_driver(pwmled_driver); -- GitLab From e9dd68cf148994bf2eb718c540a472170b1ad4d3 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 27 Feb 2013 19:55:14 -0800 Subject: [PATCH 0270/3163] leds: leds-bd2802: remove erroneous __exit annotation CONFIG_HOTPLUG was removed, so __devexit or __exit of remove() should not be used. Signed-off-by: Jingoo Han Signed-off-by: Bryan Wu --- drivers/leds/leds-bd2802.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/leds/leds-bd2802.c b/drivers/leds/leds-bd2802.c index 851517030cc1..9f9177d29ed7 100644 --- a/drivers/leds/leds-bd2802.c +++ b/drivers/leds/leds-bd2802.c @@ -732,7 +732,7 @@ static int bd2802_probe(struct i2c_client *client, return ret; } -static int __exit bd2802_remove(struct i2c_client *client) +static int bd2802_remove(struct i2c_client *client) { struct bd2802_led *led = i2c_get_clientdata(client); int i; @@ -804,7 +804,7 @@ static struct i2c_driver bd2802_i2c_driver = { .pm = BD2802_PM, }, .probe = bd2802_probe, - .remove = __exit_p(bd2802_remove), + .remove = bd2802_remove, .id_table = bd2802_id, }; -- GitLab From 84f6942cde279e812c59eed456d1f43a39ca6c40 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 7 Mar 2013 18:37:38 -0800 Subject: [PATCH 0271/3163] leds: leds-lt3593: set devm_gpio_request_one() flags param correctly The devm_gpio_request_one() flags parameter was set to: GPIOF_DIR_OUT | state GPIOF_DIR_OUT and GPIOF_DIR_IN are defined as below: GPIOF_DIR_OUT (0 << 0) GPIOF_DIR_IN (1 << 0) So, when 'state' is 1, the gpio pin can be set as input, instead of output. To prevent this problem, GPIOF_OUT_INIT flags should be used when using devm_gpio_request_one(). Signed-off-by: Jingoo Han Signed-off-by: Bryan Wu --- drivers/leds/leds-lt3593.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/leds/leds-lt3593.c b/drivers/leds/leds-lt3593.c index c9b9e1fec587..ca48a7d5502d 100644 --- a/drivers/leds/leds-lt3593.c +++ b/drivers/leds/leds-lt3593.c @@ -106,8 +106,9 @@ static int create_lt3593_led(const struct gpio_led *template, if (!template->retain_state_suspended) led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME; - ret = devm_gpio_request_one(parent, template->gpio, - GPIOF_DIR_OUT | state, template->name); + ret = devm_gpio_request_one(parent, template->gpio, state ? + GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW, + template->name); if (ret < 0) return ret; -- GitLab From 9d04cbaadf563db3ba04426c317d3e84bc27bac1 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 7 Mar 2013 18:38:26 -0800 Subject: [PATCH 0272/3163] leds: leds-ns2: set devm_gpio_request_one() flags param correctly The devm_gpio_request_one() flags parameter was set to: GPIOF_DIR_OUT | gpio_get_value(template->cmd) GPIOF_DIR_OUT and GPIOF_DIR_IN are defined as below: GPIOF_DIR_OUT (0 << 0) GPIOF_DIR_IN (1 << 0) So, when 'gpio_get_value(template->cmd)' is 1, the gpio pin can be set as input, instead of output. To prevent this problem, GPIOF_OUT_INIT flags should be used when using devm_gpio_request_one(). Same goes for 'gpio_get_value(template->slow)' case. Signed-off-by: Jingoo Han Signed-off-by: Bryan Wu --- drivers/leds/leds-ns2.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/leds/leds-ns2.c b/drivers/leds/leds-ns2.c index d978171c25b4..e02b3136273f 100644 --- a/drivers/leds/leds-ns2.c +++ b/drivers/leds/leds-ns2.c @@ -193,7 +193,8 @@ create_ns2_led(struct platform_device *pdev, struct ns2_led_data *led_dat, enum ns2_led_modes mode; ret = devm_gpio_request_one(&pdev->dev, template->cmd, - GPIOF_DIR_OUT | gpio_get_value(template->cmd), + gpio_get_value(template->cmd) ? + GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW, template->name); if (ret) { dev_err(&pdev->dev, "%s: failed to setup command GPIO\n", @@ -202,7 +203,8 @@ create_ns2_led(struct platform_device *pdev, struct ns2_led_data *led_dat, } ret = devm_gpio_request_one(&pdev->dev, template->slow, - GPIOF_DIR_OUT | gpio_get_value(template->slow), + gpio_get_value(template->slow) ? + GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW, template->name); if (ret) { dev_err(&pdev->dev, "%s: failed to setup slow GPIO\n", -- GitLab From 901b74a3c4044a07c7149365aa9c4eaa6c71964b Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 7 Mar 2013 18:39:04 -0800 Subject: [PATCH 0273/3163] leds: renesas: set gpio_request_one() flags param correctly The gpio_request_one() flags parameter was set to: GPIOF_DIR_OUT | !!brightness GPIOF_DIR_OUT and GPIOF_DIR_IN are defined as below: GPIOF_DIR_OUT (0 << 0) GPIOF_DIR_IN (1 << 0) So, when '!!brightness' is 1, the gpio pin can be set as input, instead of output. To prevent this problem, GPIOF_OUT_INIT flags should be used when using gpio_request_one(). Signed-off-by: Jingoo Han Signed-off-by: Bryan Wu --- drivers/leds/leds-renesas-tpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/leds/leds-renesas-tpu.c b/drivers/leds/leds-renesas-tpu.c index d3c2b7e68fbc..9483f1c1078d 100644 --- a/drivers/leds/leds-renesas-tpu.c +++ b/drivers/leds/leds-renesas-tpu.c @@ -205,7 +205,8 @@ static void r_tpu_set_pin(struct r_tpu_priv *p, enum r_tpu_pin new_state, gpio_free(cfg->pin_gpio_fn); if (new_state == R_TPU_PIN_GPIO) - gpio_request_one(cfg->pin_gpio, GPIOF_DIR_OUT | !!brightness, + gpio_request_one(cfg->pin_gpio, !!brightness ? + GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW, cfg->name); if (new_state == R_TPU_PIN_GPIO_FN) -- GitLab From d67eb8e66c53dd2d26d294b49969f0c3101a5368 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 5 Mar 2013 21:01:37 -0800 Subject: [PATCH 0274/3163] leds: wm8350: Complain if we fail to reenable DCDC Provide some trace, though the hardware is most likely non-functional if this happens. Signed-off-by: Mark Brown Signed-off-by: Bryan Wu --- drivers/leds/leds-wm8350.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/leds/leds-wm8350.c b/drivers/leds/leds-wm8350.c index ed15157c8f6c..8a181d56602d 100644 --- a/drivers/leds/leds-wm8350.c +++ b/drivers/leds/leds-wm8350.c @@ -129,7 +129,10 @@ static void wm8350_led_disable(struct wm8350_led *led) ret = regulator_disable(led->isink); if (ret != 0) { dev_err(led->cdev.dev, "Failed to disable ISINK: %d\n", ret); - regulator_enable(led->dcdc); + ret = regulator_enable(led->dcdc); + if (ret != 0) + dev_err(led->cdev.dev, "Failed to reenable DCDC: %d\n", + ret); return; } -- GitLab From fbd9df28faeda17b1a9d3e9ab976e969be98d379 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Thu, 14 Mar 2013 03:09:49 -0700 Subject: [PATCH 0275/3163] leds: tca6507: Use of_match_ptr() macro This eliminates having an #ifdef returning NULL for the case when OF is disabled. Signed-off-by: Sachin Kamat Signed-off-by: Bryan Wu --- drivers/leds/leds-tca6507.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/leds/leds-tca6507.c b/drivers/leds/leds-tca6507.c index 070ba0741b21..98fe021ba276 100644 --- a/drivers/leds/leds-tca6507.c +++ b/drivers/leds/leds-tca6507.c @@ -85,6 +85,7 @@ #include #include #include +#include /* LED select registers determine the source that drives LED outputs */ #define TCA6507_LS_LED_OFF 0x0 /* Output HI-Z (off) */ @@ -724,7 +725,6 @@ tca6507_led_dt_init(struct i2c_client *client) return ERR_PTR(-ENODEV); } -#define of_tca6507_leds_match NULL #endif static int tca6507_probe(struct i2c_client *client, @@ -813,7 +813,7 @@ static struct i2c_driver tca6507_driver = { .driver = { .name = "leds-tca6507", .owner = THIS_MODULE, - .of_match_table = of_tca6507_leds_match, + .of_match_table = of_match_ptr(of_tca6507_leds_match), }, .probe = tca6507_probe, .remove = tca6507_remove, -- GitLab From 39f7e08af3fd9ca1cb94a8270354afb2ea5cfcd3 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Thu, 14 Mar 2013 04:29:19 -0700 Subject: [PATCH 0276/3163] leds: trigger: use inline functions instead of macros Macros are used in case that an inline function doesn't work. Otherwise, use an empty inline function. (a) Case of !CONFIG_LEDS_TRIGGERS Following macros are replaced with inline functions. led_trigger_register_simple() led_trigger_unregister_simple() led_trigger_event() To make inline types, the structure, 'led_trigger' should be defined. This structure has no member at all. (b) Case of !CONFIG_LEDS_TRIGGER_IDE_DISK ledtrig_ide_activity() macro is replaced with an inline function as well. (c) DEFINE_LED_TRIGGER() and DEFINE_LED_TRIGGER_GLOBAL() Struct 'led_trigger' is defined both cases, with CONFIG_LEDS_TRIGGERS and without CONFIG_LEDS_TRIGGERS. Those macros are moved out of CONFIG_LED_TRIGGERS because of no-dependency on CONFIG_LEDS_TRIGGERS. (d) Fix build errors in mmc-core driver After replacing macros with inline functions, following build errors occur. (condition: CONFIG_LEDS_TRIGGERS is not set) drivers/mmc/core/core.c: In function 'mmc_request_done': drivers/mmc/core/core.c:164:25: error: 'struct mmc_host' has no member named 'led' drivers/mmc/core/core.c: In function 'mmc_start_request': drivers/mmc/core/core.c:254:24: error: 'struct mmc_host' has no member named 'led' make[3]: *** [drivers/mmc/core/core.o] Error 1 The reason of these errors is non-existent member variable, 'led'. It is only valid when CONFIG_LEDS_TRIGGERS is set. But now, it can be used without this dependency. To fix build errors, member 'led' is always used without its config option in 'include/linux/mmc/host.h'. Signed-off-by: Milo(Woogyom) Kim Signed-off-by: Bryan Wu --- include/linux/leds.h | 25 ++++++++++++++----------- include/linux/mmc/host.h | 2 -- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/include/linux/leds.h b/include/linux/leds.h index 0d9b5eed714e..2d8c0b4f2f76 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -142,6 +142,10 @@ extern void led_set_brightness(struct led_classdev *led_cdev, /* * LED Triggers */ +/* Registration functions for simple triggers */ +#define DEFINE_LED_TRIGGER(x) static struct led_trigger *x; +#define DEFINE_LED_TRIGGER_GLOBAL(x) struct led_trigger *x; + #ifdef CONFIG_LEDS_TRIGGERS #define TRIG_NAME_MAX 50 @@ -164,9 +168,6 @@ struct led_trigger { extern int led_trigger_register(struct led_trigger *trigger); extern void led_trigger_unregister(struct led_trigger *trigger); -/* Registration functions for simple triggers */ -#define DEFINE_LED_TRIGGER(x) static struct led_trigger *x; -#define DEFINE_LED_TRIGGER_GLOBAL(x) struct led_trigger *x; extern void led_trigger_register_simple(const char *name, struct led_trigger **trigger); extern void led_trigger_unregister_simple(struct led_trigger *trigger); @@ -199,20 +200,22 @@ extern void led_trigger_rename_static(const char *name, #else -/* Triggers aren't active - null macros */ -#define DEFINE_LED_TRIGGER(x) -#define DEFINE_LED_TRIGGER_GLOBAL(x) -#define led_trigger_register_simple(x, y) do {} while(0) -#define led_trigger_unregister_simple(x) do {} while(0) -#define led_trigger_event(x, y) do {} while(0) +/* Trigger has no members */ +struct led_trigger {}; -#endif +/* Trigger inline empty functions */ +static inline void led_trigger_register_simple(const char *name, + struct led_trigger **trigger) {} +static inline void led_trigger_unregister_simple(struct led_trigger *trigger) {} +static inline void led_trigger_event(struct led_trigger *trigger, + enum led_brightness event) {} +#endif /* CONFIG_LEDS_TRIGGERS */ /* Trigger specific functions */ #ifdef CONFIG_LEDS_TRIGGER_IDE_DISK extern void ledtrig_ide_activity(void); #else -#define ledtrig_ide_activity() do {} while(0) +static inline void ledtrig_ide_activity(void) {} #endif /* diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index d6f20cc6415e..357e80efcde0 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -341,9 +341,7 @@ struct mmc_host { mmc_pm_flag_t pm_flags; /* requested pm features */ -#ifdef CONFIG_LEDS_TRIGGERS struct led_trigger *led; /* activity led */ -#endif #ifdef CONFIG_REGULATOR bool regulator_enabled; /* regulator state */ -- GitLab From 48a1d032c954b9b06c3adbf35ef4735dd70ab757 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Thu, 14 Mar 2013 04:29:24 -0700 Subject: [PATCH 0277/3163] leds: add camera LED triggers Some LED devices support flash/torch functionality through the LED subsystem. This patch enables direct LED trigger controls by the driver. Flash on/off and torch on/off can be done simply by other driver space. Two trigger APIs are added, ledtrig_flash_ctrl() and ledtrig_torch_ctrl(). Signed-off-by: Milo(Woogyom) Kim Signed-off-by: Bryan Wu --- drivers/leds/trigger/Kconfig | 8 ++++ drivers/leds/trigger/Makefile | 1 + drivers/leds/trigger/ledtrig-camera.c | 57 +++++++++++++++++++++++++++ include/linux/leds.h | 8 ++++ 4 files changed, 74 insertions(+) create mode 100644 drivers/leds/trigger/ledtrig-camera.c diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig index eaa286dc494e..49794b47b51c 100644 --- a/drivers/leds/trigger/Kconfig +++ b/drivers/leds/trigger/Kconfig @@ -100,4 +100,12 @@ config LEDS_TRIGGER_TRANSIENT GPIO/PWM based hardware. If unsure, say Y. +config LEDS_TRIGGER_CAMERA + tristate "LED Camera Flash/Torch Trigger" + depends on LEDS_TRIGGERS + help + This allows LEDs to be controlled as a camera flash/torch device. + This enables direct flash/torch on/off by the driver, kernel space. + If unsure, say Y. + endif # LEDS_TRIGGERS diff --git a/drivers/leds/trigger/Makefile b/drivers/leds/trigger/Makefile index 554e46ee4c24..1abf48dacf7e 100644 --- a/drivers/leds/trigger/Makefile +++ b/drivers/leds/trigger/Makefile @@ -7,3 +7,4 @@ obj-$(CONFIG_LEDS_TRIGGER_GPIO) += ledtrig-gpio.o obj-$(CONFIG_LEDS_TRIGGER_CPU) += ledtrig-cpu.o obj-$(CONFIG_LEDS_TRIGGER_DEFAULT_ON) += ledtrig-default-on.o obj-$(CONFIG_LEDS_TRIGGER_TRANSIENT) += ledtrig-transient.o +obj-$(CONFIG_LEDS_TRIGGER_CAMERA) += ledtrig-camera.o diff --git a/drivers/leds/trigger/ledtrig-camera.c b/drivers/leds/trigger/ledtrig-camera.c new file mode 100644 index 000000000000..9bd73a8bad5c --- /dev/null +++ b/drivers/leds/trigger/ledtrig-camera.c @@ -0,0 +1,57 @@ +/* + * Camera Flash and Torch On/Off Trigger + * + * based on ledtrig-ide-disk.c + * + * Copyright 2013 Texas Instruments + * + * Author: Milo(Woogyom) Kim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include + +DEFINE_LED_TRIGGER(ledtrig_flash); +DEFINE_LED_TRIGGER(ledtrig_torch); + +void ledtrig_flash_ctrl(bool on) +{ + enum led_brightness brt = on ? LED_FULL : LED_OFF; + + led_trigger_event(ledtrig_flash, brt); +} +EXPORT_SYMBOL_GPL(ledtrig_flash_ctrl); + +void ledtrig_torch_ctrl(bool on) +{ + enum led_brightness brt = on ? LED_FULL : LED_OFF; + + led_trigger_event(ledtrig_torch, brt); +} +EXPORT_SYMBOL_GPL(ledtrig_torch_ctrl); + +static int __init ledtrig_camera_init(void) +{ + led_trigger_register_simple("flash", &ledtrig_flash); + led_trigger_register_simple("torch", &ledtrig_torch); + return 0; +} +module_init(ledtrig_camera_init); + +static void __exit ledtrig_camera_exit(void) +{ + led_trigger_unregister_simple(ledtrig_torch); + led_trigger_unregister_simple(ledtrig_flash); +} +module_exit(ledtrig_camera_exit); + +MODULE_DESCRIPTION("LED Trigger for Camera Flash/Torch Control"); +MODULE_AUTHOR("Milo Kim"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/leds.h b/include/linux/leds.h index 2d8c0b4f2f76..0287ab296689 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -218,6 +218,14 @@ extern void ledtrig_ide_activity(void); static inline void ledtrig_ide_activity(void) {} #endif +#if defined(CONFIG_LEDS_TRIGGER_CAMERA) || defined(CONFIG_LEDS_TRIGGER_CAMERA_MODULE) +extern void ledtrig_flash_ctrl(bool on); +extern void ledtrig_torch_ctrl(bool on); +#else +static inline void ledtrig_flash_ctrl(bool on) {} +static inline void ledtrig_torch_ctrl(bool on) {} +#endif + /* * Generic LED platform data for describing LED names and default triggers. */ -- GitLab From 313bf0b1a0eaeaac17ea8c4b748f16e28fce8b7a Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Thu, 14 Mar 2013 04:29:26 -0700 Subject: [PATCH 0278/3163] leds: lm355x, lm3642: support camera LED triggers for flash and torch LM355x and LM3642 support flash and torch functionality. (Camera driver) (LED trigger for camera) (LED driver) Turn on the flash ...> ledtrig_flash_ctrl(true) ...> LM355x or LM3642 brightness ctrl Flash/torch LEDs are controlled by other driver using LED camera trigger APIs, ledtrig_flash_ctrl()/ledtrig_torch_ctrl(). Then, actual device control is activated by each LED driver such like LM355x or LM3642. Signed-off-by: Milo(Woogyom) Kim Signed-off-by: Bryan Wu --- drivers/leds/leds-lm355x.c | 2 ++ drivers/leds/leds-lm3642.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/leds/leds-lm355x.c b/drivers/leds/leds-lm355x.c index 4117235ba618..d81a8e7afd6c 100644 --- a/drivers/leds/leds-lm355x.c +++ b/drivers/leds/leds-lm355x.c @@ -477,6 +477,7 @@ static int lm355x_probe(struct i2c_client *client, chip->cdev_flash.name = "flash"; chip->cdev_flash.max_brightness = 16; chip->cdev_flash.brightness_set = lm355x_strobe_brightness_set; + chip->cdev_flash.default_trigger = "flash"; err = led_classdev_register((struct device *) &client->dev, &chip->cdev_flash); if (err < 0) @@ -486,6 +487,7 @@ static int lm355x_probe(struct i2c_client *client, chip->cdev_torch.name = "torch"; chip->cdev_torch.max_brightness = 8; chip->cdev_torch.brightness_set = lm355x_torch_brightness_set; + chip->cdev_torch.default_trigger = "torch"; err = led_classdev_register((struct device *) &client->dev, &chip->cdev_torch); if (err < 0) diff --git a/drivers/leds/leds-lm3642.c b/drivers/leds/leds-lm3642.c index 9f428d9dfe91..f361bbef2dec 100644 --- a/drivers/leds/leds-lm3642.c +++ b/drivers/leds/leds-lm3642.c @@ -363,6 +363,7 @@ static int lm3642_probe(struct i2c_client *client, chip->cdev_flash.name = "flash"; chip->cdev_flash.max_brightness = 16; chip->cdev_flash.brightness_set = lm3642_strobe_brightness_set; + chip->cdev_flash.default_trigger = "flash"; err = led_classdev_register((struct device *) &client->dev, &chip->cdev_flash); if (err < 0) { @@ -380,6 +381,7 @@ static int lm3642_probe(struct i2c_client *client, chip->cdev_torch.name = "torch"; chip->cdev_torch.max_brightness = 8; chip->cdev_torch.brightness_set = lm3642_torch_brightness_set; + chip->cdev_torch.default_trigger = "torch"; err = led_classdev_register((struct device *) &client->dev, &chip->cdev_torch); if (err < 0) { -- GitLab From 24d321284745cbc593fba8115585329d48703704 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Thu, 14 Mar 2013 17:19:36 -0700 Subject: [PATCH 0279/3163] leds: lp55xx: fix the sysfs read operation According to a sysfs documentation(Documentation/filesystem/sysfs.txt), scnprintf() should be used in a read operation method. It guarantees safe buffer size(PAGE_SIZE) which is allocated by the sysfs. Signed-off-by: Milo(Woogyom) Kim Signed-off-by: Bryan Wu --- drivers/leds/leds-lp5521.c | 3 ++- drivers/leds/leds-lp55xx-common.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index 1001347ba70b..7f10304219ea 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -360,7 +360,8 @@ static ssize_t lp5521_selftest(struct device *dev, mutex_lock(&chip->lock); ret = lp5521_run_selftest(chip, buf); mutex_unlock(&chip->lock); - return sprintf(buf, "%s\n", ret ? "FAIL" : "OK"); + + return scnprintf(buf, PAGE_SIZE, "%s\n", ret ? "FAIL" : "OK"); } /* device attributes */ diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c index 8a388a4afed7..715a6027316f 100644 --- a/drivers/leds/leds-lp55xx-common.c +++ b/drivers/leds/leds-lp55xx-common.c @@ -80,7 +80,7 @@ static ssize_t lp55xx_show_current(struct device *dev, { struct lp55xx_led *led = dev_to_lp55xx_led(dev); - return sprintf(buf, "%d\n", led->led_current); + return scnprintf(buf, PAGE_SIZE, "%d\n", led->led_current); } static ssize_t lp55xx_store_current(struct device *dev, @@ -113,7 +113,7 @@ static ssize_t lp55xx_show_max_current(struct device *dev, { struct lp55xx_led *led = dev_to_lp55xx_led(dev); - return sprintf(buf, "%d\n", led->max_current); + return scnprintf(buf, PAGE_SIZE, "%d\n", led->max_current); } static DEVICE_ATTR(led_current, S_IRUGO | S_IWUSR, lp55xx_show_current, -- GitLab From c971ff185f6443e834686f140ba6d6e341ced600 Mon Sep 17 00:00:00 2001 From: Florian Vaussard Date: Mon, 28 Jan 2013 06:00:59 -0800 Subject: [PATCH 0280/3163] leds: leds-pwm: Defer led_pwm_set() if PWM can sleep Call to led_pwm_set() can happen inside atomic context, like triggers. If the PWM call can sleep, defer using a worker. Signed-off-by: Florian Vaussard Reviewed-by: Peter Ujfalusi Acked-by: Thierry Reding Signed-off-by: Bryan Wu --- drivers/leds/leds-pwm.c | 50 ++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/drivers/leds/leds-pwm.c b/drivers/leds/leds-pwm.c index a1ea5f6a8d39..faf52c005e8c 100644 --- a/drivers/leds/leds-pwm.c +++ b/drivers/leds/leds-pwm.c @@ -23,12 +23,16 @@ #include #include #include +#include struct led_pwm_data { struct led_classdev cdev; struct pwm_device *pwm; + struct work_struct work; unsigned int active_low; unsigned int period; + int duty; + bool can_sleep; }; struct led_pwm_priv { @@ -36,6 +40,26 @@ struct led_pwm_priv { struct led_pwm_data leds[0]; }; +static void __led_pwm_set(struct led_pwm_data *led_dat) +{ + int new_duty = led_dat->duty; + + pwm_config(led_dat->pwm, new_duty, led_dat->period); + + if (new_duty == 0) + pwm_disable(led_dat->pwm); + else + pwm_enable(led_dat->pwm); +} + +static void led_pwm_work(struct work_struct *work) +{ + struct led_pwm_data *led_dat = + container_of(work, struct led_pwm_data, work); + + __led_pwm_set(led_dat); +} + static void led_pwm_set(struct led_classdev *led_cdev, enum led_brightness brightness) { @@ -44,13 +68,12 @@ static void led_pwm_set(struct led_classdev *led_cdev, unsigned int max = led_dat->cdev.max_brightness; unsigned int period = led_dat->period; - if (brightness == 0) { - pwm_config(led_dat->pwm, 0, period); - pwm_disable(led_dat->pwm); - } else { - pwm_config(led_dat->pwm, brightness * period / max, period); - pwm_enable(led_dat->pwm); - } + led_dat->duty = brightness * period / max; + + if (led_dat->can_sleep) + schedule_work(&led_dat->work); + else + __led_pwm_set(led_dat); } static inline size_t sizeof_pwm_leds_priv(int num_leds) @@ -100,6 +123,10 @@ static struct led_pwm_priv *led_pwm_create_of(struct platform_device *pdev) led_dat->cdev.brightness = LED_OFF; led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME; + led_dat->can_sleep = pwm_can_sleep(led_dat->pwm); + if (led_dat->can_sleep) + INIT_WORK(&led_dat->work, led_pwm_work); + ret = led_classdev_register(&pdev->dev, &led_dat->cdev); if (ret < 0) { dev_err(&pdev->dev, "failed to register for %s\n", @@ -153,6 +180,10 @@ static int led_pwm_probe(struct platform_device *pdev) led_dat->cdev.max_brightness = cur_led->max_brightness; led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME; + led_dat->can_sleep = pwm_can_sleep(led_dat->pwm); + if (led_dat->can_sleep) + INIT_WORK(&led_dat->work, led_pwm_work); + ret = led_classdev_register(&pdev->dev, &led_dat->cdev); if (ret < 0) goto err; @@ -180,8 +211,11 @@ static int led_pwm_remove(struct platform_device *pdev) struct led_pwm_priv *priv = platform_get_drvdata(pdev); int i; - for (i = 0; i < priv->num_leds; i++) + for (i = 0; i < priv->num_leds; i++) { led_classdev_unregister(&priv->leds[i].cdev); + if (priv->leds[i].can_sleep) + cancel_work_sync(&priv->leds[i].work); + } return 0; } -- GitLab From 3de1929ba61df87a0561c4f1fb7161c401fe3d04 Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Tue, 19 Mar 2013 11:07:29 -0700 Subject: [PATCH 0281/3163] leds: leds-ns2: fix oops at module removal This patch fixes a regression introduced by commit 72052fcc10 ("leds: leds-ns2: add device tree binding"). When the driver is initialized with device tree data, platform_data pointer is NULL. This causes a kernel oops at removal. To fix this bug, num_leds is moved into driver_data and platform_data is not longer used from ns2_led_remove(). Signed-off-by: Simon Guinot Signed-off-by: Bryan Wu --- drivers/leds/leds-ns2.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/drivers/leds/leds-ns2.c b/drivers/leds/leds-ns2.c index e02b3136273f..70137b1eecf5 100644 --- a/drivers/leds/leds-ns2.c +++ b/drivers/leds/leds-ns2.c @@ -308,10 +308,21 @@ static const struct of_device_id of_ns2_leds_match[] = { }; #endif /* CONFIG_OF_GPIO */ +struct ns2_led_priv { + int num_leds; + struct ns2_led_data leds_data[]; +}; + +static inline int sizeof_ns2_led_priv(int num_leds) +{ + return sizeof(struct ns2_led_priv) + + (sizeof(struct ns2_led_data) * num_leds); +} + static int ns2_led_probe(struct platform_device *pdev) { struct ns2_led_platform_data *pdata = pdev->dev.platform_data; - struct ns2_led_data *leds_data; + struct ns2_led_priv *priv; int i; int ret; @@ -332,21 +343,23 @@ static int ns2_led_probe(struct platform_device *pdev) return -EINVAL; #endif /* CONFIG_OF_GPIO */ - leds_data = devm_kzalloc(&pdev->dev, sizeof(struct ns2_led_data) * - pdata->num_leds, GFP_KERNEL); - if (!leds_data) + priv = devm_kzalloc(&pdev->dev, + sizeof_ns2_led_priv(pdata->num_leds), GFP_KERNEL); + if (!priv) return -ENOMEM; + priv->num_leds = pdata->num_leds; - for (i = 0; i < pdata->num_leds; i++) { - ret = create_ns2_led(pdev, &leds_data[i], &pdata->leds[i]); + for (i = 0; i < priv->num_leds; i++) { + ret = create_ns2_led(pdev, &priv->leds_data[i], + &pdata->leds[i]); if (ret < 0) { for (i = i - 1; i >= 0; i--) - delete_ns2_led(&leds_data[i]); + delete_ns2_led(&priv->leds_data[i]); return ret; } } - platform_set_drvdata(pdev, leds_data); + platform_set_drvdata(pdev, priv); return 0; } @@ -354,13 +367,12 @@ static int ns2_led_probe(struct platform_device *pdev) static int ns2_led_remove(struct platform_device *pdev) { int i; - struct ns2_led_platform_data *pdata = pdev->dev.platform_data; - struct ns2_led_data *leds_data; + struct ns2_led_priv *priv; - leds_data = platform_get_drvdata(pdev); + priv = platform_get_drvdata(pdev); - for (i = 0; i < pdata->num_leds; i++) - delete_ns2_led(&leds_data[i]); + for (i = 0; i < priv->num_leds; i++) + delete_ns2_led(&priv->leds_data[i]); platform_set_drvdata(pdev, NULL); -- GitLab From 53b4192266436e75dea96c8ef495eadd6f3df981 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Wed, 20 Mar 2013 17:37:00 -0700 Subject: [PATCH 0282/3163] leds: lp55xx: use common clock framework when external clock is used Program execution is timed with 32768Hz clock in the LP55xx family devices. To run LED functionalities, LP55xx devices provide two options. One is using internal clock. The other is using external clock. This patch enables external clock detection automatically. If external clock is not detected, then the internal clock will be used in the LP55xx driver. Valid clock rate is 32768Hz in LP55xx devices. This new API is used in each LP55xx driver like LP5521 and LP5562. Signed-off-by: Milo(Woogyom) Kim Signed-off-by: Bryan Wu --- drivers/leds/leds-lp55xx-common.c | 36 +++++++++++++++++++++++++++++++ drivers/leds/leds-lp55xx-common.h | 4 ++++ 2 files changed, 40 insertions(+) diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c index 715a6027316f..ba34199dc3d9 100644 --- a/drivers/leds/leds-lp55xx-common.c +++ b/drivers/leds/leds-lp55xx-common.c @@ -12,6 +12,7 @@ * Derived from leds-lp5521.c, leds-lp5523.c */ +#include #include #include #include @@ -21,6 +22,9 @@ #include "leds-lp55xx-common.h" +/* External clock rate */ +#define LP55XX_CLK_32K 32768 + static struct lp55xx_led *cdev_to_lp55xx_led(struct led_classdev *cdev) { return container_of(cdev, struct lp55xx_led, cdev); @@ -357,6 +361,35 @@ int lp55xx_update_bits(struct lp55xx_chip *chip, u8 reg, u8 mask, u8 val) } EXPORT_SYMBOL_GPL(lp55xx_update_bits); +bool lp55xx_is_extclk_used(struct lp55xx_chip *chip) +{ + struct clk *clk; + int err; + + clk = devm_clk_get(&chip->cl->dev, "32k_clk"); + if (IS_ERR(clk)) + goto use_internal_clk; + + err = clk_prepare_enable(clk); + if (err) + goto use_internal_clk; + + if (clk_get_rate(clk) != LP55XX_CLK_32K) { + clk_disable_unprepare(clk); + goto use_internal_clk; + } + + dev_info(&chip->cl->dev, "%dHz external clock used\n", LP55XX_CLK_32K); + + chip->clk = clk; + return true; + +use_internal_clk: + dev_info(&chip->cl->dev, "internal clock used\n"); + return false; +} +EXPORT_SYMBOL_GPL(lp55xx_is_extclk_used); + int lp55xx_init_device(struct lp55xx_chip *chip) { struct lp55xx_platform_data *pdata; @@ -421,6 +454,9 @@ void lp55xx_deinit_device(struct lp55xx_chip *chip) { struct lp55xx_platform_data *pdata = chip->pdata; + if (chip->clk) + clk_disable_unprepare(chip->clk); + if (pdata->enable) pdata->enable(0); diff --git a/drivers/leds/leds-lp55xx-common.h b/drivers/leds/leds-lp55xx-common.h index ece4761a1302..fa6a078bf547 100644 --- a/drivers/leds/leds-lp55xx-common.h +++ b/drivers/leds/leds-lp55xx-common.h @@ -83,6 +83,7 @@ struct lp55xx_device_config { */ struct lp55xx_chip { struct i2c_client *cl; + struct clk *clk; struct lp55xx_platform_data *pdata; struct mutex lock; /* lock for user-space interface */ int num_leds; @@ -117,6 +118,9 @@ extern int lp55xx_read(struct lp55xx_chip *chip, u8 reg, u8 *val); extern int lp55xx_update_bits(struct lp55xx_chip *chip, u8 reg, u8 mask, u8 val); +/* external clock detection */ +extern bool lp55xx_is_extclk_used(struct lp55xx_chip *chip); + /* common device init/deinit functions */ extern int lp55xx_init_device(struct lp55xx_chip *chip); extern void lp55xx_deinit_device(struct lp55xx_chip *chip); -- GitLab From 81f2a5b4a0570a662efd629c176fc1d67e56f7e3 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Wed, 20 Mar 2013 17:37:04 -0700 Subject: [PATCH 0283/3163] leds: lp55xx: configure the clock detection Now LP55xx provides automatic clock detection API, lp55xx_is_extclk_used(). The clock configuration can be done by the driver itself. (a) Concept The default value is set by each driver with clock selection. The internal clock selection bit is updated in case that the external clock is not detected or clock rate is not 32KHz. (b) Change on LP55xx platform data The clock configuration is done automatically, so no need to define 'update_config' in the platform side. Correlated information are removed in the documentations and header. (c) Definitions moved from header to driver files CONFIG register values are moved each driver, LP5521 and LP5562. Not necessary definitions are removed also. Signed-off-by: Milo(Woogyom) Kim Signed-off-by: Bryan Wu --- Documentation/leds/leds-lp5521.txt | 19 ------------------- Documentation/leds/leds-lp5562.txt | 15 --------------- drivers/leds/leds-lp5521.c | 19 +++++++++++++++++-- drivers/leds/leds-lp5562.c | 14 ++++++++++---- include/linux/platform_data/leds-lp55xx.h | 22 ---------------------- 5 files changed, 27 insertions(+), 62 deletions(-) diff --git a/Documentation/leds/leds-lp5521.txt b/Documentation/leds/leds-lp5521.txt index 270f57196339..79e4c2e6e5e8 100644 --- a/Documentation/leds/leds-lp5521.txt +++ b/Documentation/leds/leds-lp5521.txt @@ -81,22 +81,3 @@ static struct lp55xx_platform_data lp5521_platform_data = { If the current is set to 0 in the platform data, that channel is disabled and it is not visible in the sysfs. - -The 'update_config' : CONFIG register (ADDR 08h) -This value is platform-specific data. -If update_config is not defined, the CONFIG register is set with -'LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT'. -(Enable auto-powersave, set charge pump to auto, red to battery) - -example of update_config : - -#define LP5521_CONFIGS (LP5521_PWM_HF | LP5521_PWRSAVE_EN | \ - LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT | \ - LP5521_CLK_INT) - -static struct lp55xx_platform_data lp5521_pdata = { - .led_config = lp5521_led_config, - .num_channels = ARRAY_SIZE(lp5521_led_config), - .clock_mode = LP55XX_CLOCK_INT, - .update_config = LP5521_CONFIGS, -}; diff --git a/Documentation/leds/leds-lp5562.txt b/Documentation/leds/leds-lp5562.txt index 96061000dd93..5a823ff6b393 100644 --- a/Documentation/leds/leds-lp5562.txt +++ b/Documentation/leds/leds-lp5562.txt @@ -118,18 +118,3 @@ static struct lp55xx_platform_data lp5562_platform_data = { If the current is set to 0 in the platform data, that channel is disabled and it is not visible in the sysfs. - -The 'update_config' : CONFIG register (ADDR 08h) -This value is platform-specific data. -If update_config is not defined, the CONFIG register is set with -'LP5562_PWRSAVE_EN | LP5562_CLK_AUTO'. -(Enable auto-powersave, set automatic clock source selection) - -#define LP5562_CONFIGS (LP5562_PWM_HF | LP5562_PWRSAVE_EN | \ - LP5562_CLK_SRC_EXT) - -static struct lp55xx_platform_data lp5562_pdata = { - .led_config = lp5562_led_config, - .num_channels = ARRAY_SIZE(lp5562_led_config), - .update_config = LP5562_CONFIGS, -}; diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index 7f10304219ea..19752c928aa2 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -68,6 +68,18 @@ #define LP5521_ENABLE_RUN_PROGRAM \ (LP5521_ENABLE_DEFAULT | LP5521_EXEC_RUN) +/* CONFIG register */ +#define LP5521_PWM_HF 0x40 /* PWM: 0 = 256Hz, 1 = 558Hz */ +#define LP5521_PWRSAVE_EN 0x20 /* 1 = Power save mode */ +#define LP5521_CP_MODE_OFF 0 /* Charge pump (CP) off */ +#define LP5521_CP_MODE_BYPASS 8 /* CP forced to bypass mode */ +#define LP5521_CP_MODE_1X5 0x10 /* CP forced to 1.5x mode */ +#define LP5521_CP_MODE_AUTO 0x18 /* Automatic mode selection */ +#define LP5521_R_TO_BATT 0x04 /* R out: 0 = CP, 1 = Vbat */ +#define LP5521_CLK_INT 0x01 /* Internal clock */ +#define LP5521_DEFAULT_CFG \ + (LP5521_PWM_HF | LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO) + /* Status */ #define LP5521_EXT_CLK_USED 0x08 @@ -296,8 +308,11 @@ static int lp5521_post_init_device(struct lp55xx_chip *chip) /* Set all PWMs to direct control mode */ ret = lp55xx_write(chip, LP5521_REG_OP_MODE, LP5521_CMD_DIRECT); - val = chip->pdata->update_config ? - : (LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT); + /* Update configuration for the clock setting */ + val = LP5521_DEFAULT_CFG; + if (!lp55xx_is_extclk_used(chip)) + val |= LP5521_CLK_INT; + ret = lp55xx_write(chip, LP5521_REG_CONFIG, val); if (ret) return ret; diff --git a/drivers/leds/leds-lp5562.c b/drivers/leds/leds-lp5562.c index f8b927788c3a..513f2390ca2d 100644 --- a/drivers/leds/leds-lp5562.c +++ b/drivers/leds/leds-lp5562.c @@ -71,8 +71,10 @@ /* CONFIG Register 08h */ #define LP5562_REG_CONFIG 0x08 -#define LP5562_DEFAULT_CFG \ - (LP5562_PWM_HF | LP5562_PWRSAVE_EN | LP5562_CLK_INT) +#define LP5562_PWM_HF 0x40 +#define LP5562_PWRSAVE_EN 0x20 +#define LP5562_CLK_INT 0x01 /* Internal clock */ +#define LP5562_DEFAULT_CFG (LP5562_PWM_HF | LP5562_PWRSAVE_EN) /* RESET Register 0Dh */ #define LP5562_REG_RESET 0x0D @@ -280,7 +282,7 @@ static void lp5562_firmware_loaded(struct lp55xx_chip *chip) static int lp5562_post_init_device(struct lp55xx_chip *chip) { int ret; - u8 update_cfg = chip->pdata->update_config ? : LP5562_DEFAULT_CFG; + u8 cfg = LP5562_DEFAULT_CFG; /* Set all PWMs to direct control mode */ ret = lp55xx_write(chip, LP5562_REG_OP_MODE, LP5562_CMD_DIRECT); @@ -289,7 +291,11 @@ static int lp5562_post_init_device(struct lp55xx_chip *chip) lp5562_wait_opmode_done(); - ret = lp55xx_write(chip, LP5562_REG_CONFIG, update_cfg); + /* Update configuration for the clock setting */ + if (!lp55xx_is_extclk_used(chip)) + cfg |= LP5562_CLK_INT; + + ret = lp55xx_write(chip, LP5562_REG_CONFIG, cfg); if (ret) return ret; diff --git a/include/linux/platform_data/leds-lp55xx.h b/include/linux/platform_data/leds-lp55xx.h index 1f1041e8b4fc..202e290faea8 100644 --- a/include/linux/platform_data/leds-lp55xx.h +++ b/include/linux/platform_data/leds-lp55xx.h @@ -20,25 +20,6 @@ #define LP55XX_CLOCK_INT 1 #define LP55XX_CLOCK_EXT 2 -/* Bits in LP5521 CONFIG register. 'update_config' in lp55xx_platform_data */ -#define LP5521_PWM_HF 0x40 /* PWM: 0 = 256Hz, 1 = 558Hz */ -#define LP5521_PWRSAVE_EN 0x20 /* 1 = Power save mode */ -#define LP5521_CP_MODE_OFF 0 /* Charge pump (CP) off */ -#define LP5521_CP_MODE_BYPASS 8 /* CP forced to bypass mode */ -#define LP5521_CP_MODE_1X5 0x10 /* CP forced to 1.5x mode */ -#define LP5521_CP_MODE_AUTO 0x18 /* Automatic mode selection */ -#define LP5521_R_TO_BATT 4 /* R out: 0 = CP, 1 = Vbat */ -#define LP5521_CLK_SRC_EXT 0 /* Ext-clk source (CLK_32K) */ -#define LP5521_CLK_INT 1 /* Internal clock */ -#define LP5521_CLK_AUTO 2 /* Automatic clock selection */ - -/* Bits in LP5562 CONFIG register */ -#define LP5562_PWM_HF LP5521_PWM_HF -#define LP5562_PWRSAVE_EN LP5521_PWRSAVE_EN -#define LP5562_CLK_SRC_EXT LP5521_CLK_SRC_EXT -#define LP5562_CLK_INT LP5521_CLK_INT -#define LP5562_CLK_AUTO LP5521_CLK_AUTO - struct lp55xx_led_config { const char *name; u8 chan_nr; @@ -86,9 +67,6 @@ struct lp55xx_platform_data { /* Predefined pattern data */ struct lp55xx_predef_pattern *patterns; unsigned int num_patterns; - - /* _CONFIG register */ - u8 update_config; }; #endif /* _LEDS_LP55XX_H */ -- GitLab From cceba0e4b8f41a495dc7a5ab2795dcab0ddb64c1 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 21 Mar 2013 19:09:01 -0700 Subject: [PATCH 0284/3163] leds: leds-bd2802: add CONFIG_PM_SLEEP to suspend/resume functions Add CONFIG_PM_SLEEP to suspend/resume functions to fix the following build warning when CONFIG_PM_SLEEP is not selected. This is because sleep PM callbacks defined by SIMPLE_DEV_PM_OPS are only used when the CONFIG_PM_SLEEP is enabled. drivers/leds/leds-bd2802.c:766:12: warning: 'bd2802_suspend' defined but not used [-Wunused-function] drivers/leds/leds-bd2802.c:776:12: warning: 'bd2802_resume' defined but not used [-Wunused-function] Signed-off-by: Jingoo Han Signed-off-by: Bryan Wu --- drivers/leds/leds-bd2802.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/leds/leds-bd2802.c b/drivers/leds/leds-bd2802.c index 9f9177d29ed7..2db04231a792 100644 --- a/drivers/leds/leds-bd2802.c +++ b/drivers/leds/leds-bd2802.c @@ -747,8 +747,7 @@ static int bd2802_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM - +#ifdef CONFIG_PM_SLEEP static void bd2802_restore_state(struct bd2802_led *led) { int i; @@ -785,12 +784,9 @@ static int bd2802_resume(struct device *dev) return 0; } +#endif static SIMPLE_DEV_PM_OPS(bd2802_pm, bd2802_suspend, bd2802_resume); -#define BD2802_PM (&bd2802_pm) -#else /* CONFIG_PM */ -#define BD2802_PM NULL -#endif static const struct i2c_device_id bd2802_id[] = { { "BD2802", 0 }, @@ -801,7 +797,7 @@ MODULE_DEVICE_TABLE(i2c, bd2802_id); static struct i2c_driver bd2802_i2c_driver = { .driver = { .name = "BD2802", - .pm = BD2802_PM, + .pm = &bd2802_pm, }, .probe = bd2802_probe, .remove = bd2802_remove, -- GitLab From df92d5ff5e70999274f53884cc2c40ae620a109a Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Mon, 25 Mar 2013 23:47:19 -0700 Subject: [PATCH 0285/3163] leds: leds-asic3: switch to using SIMPLE_DEV_PM_OPS Switch to using SIMPLE_DEV_PM_OPS macro to declare the driver's pm_ops. It reduces code size. Also, CONFIG_PM_SLEEP is added to suspend/ resume functions to prevent build warnings when CONFIG_PM_SLEEP is not selected. Signed-off-by: Jingoo Han Signed-off-by: Bryan Wu --- drivers/leds/leds-asic3.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/leds/leds-asic3.c b/drivers/leds/leds-asic3.c index b474745e001b..cf9efe421c2b 100644 --- a/drivers/leds/leds-asic3.c +++ b/drivers/leds/leds-asic3.c @@ -134,6 +134,7 @@ static int asic3_led_remove(struct platform_device *pdev) return mfd_cell_disable(pdev); } +#ifdef CONFIG_PM_SLEEP static int asic3_led_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -159,11 +160,9 @@ static int asic3_led_resume(struct device *dev) return ret; } +#endif -static const struct dev_pm_ops asic3_led_pm_ops = { - .suspend = asic3_led_suspend, - .resume = asic3_led_resume, -}; +static SIMPLE_DEV_PM_OPS(asic3_led_pm_ops, asic3_led_suspend, asic3_led_resume); static struct platform_driver asic3_led_driver = { .probe = asic3_led_probe, -- GitLab From eccf0607e450f5c6ca2af5d826d9308e8cdb6848 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Mar 2013 10:34:24 +0900 Subject: [PATCH 0286/3163] ARM: shmobile: Initial r8a73a4 SoC support V3 V3 of initial support for the r8a73a4 SoC including: - Single Cortex-A15 CPU Core - GIC - Architecture timer No static virtual mappings are used, all the components make use of ioremap(). DT_MACHINE_START is still wrapped in CONFIG_USE_OF to match other mach-shmobile code. Signed-off-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/boot/dts/r8a73a4.dtsi | 55 +++++++++++ arch/arm/mach-shmobile/Kconfig | 7 ++ arch/arm/mach-shmobile/Makefile | 1 + arch/arm/mach-shmobile/clock-r8a73a4.c | 91 +++++++++++++++++++ arch/arm/mach-shmobile/include/mach/r8a73a4.h | 7 ++ arch/arm/mach-shmobile/setup-r8a73a4.c | 50 ++++++++++ 6 files changed, 211 insertions(+) create mode 100644 arch/arm/boot/dts/r8a73a4.dtsi create mode 100644 arch/arm/mach-shmobile/clock-r8a73a4.c create mode 100644 arch/arm/mach-shmobile/include/mach/r8a73a4.h create mode 100644 arch/arm/mach-shmobile/setup-r8a73a4.c diff --git a/arch/arm/boot/dts/r8a73a4.dtsi b/arch/arm/boot/dts/r8a73a4.dtsi new file mode 100644 index 000000000000..72c58c172e9d --- /dev/null +++ b/arch/arm/boot/dts/r8a73a4.dtsi @@ -0,0 +1,55 @@ +/* + * Device Tree Source for the r8a73a4 SoC + * + * Copyright (C) 2013 Renesas Solutions Corp. + * Copyright (C) 2013 Magnus Damm + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +/include/ "skeleton.dtsi" + +/ { + compatible = "renesas,r8a73a4"; + interrupt-parent = <&gic>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + clock-frequency = <1500000000>; + }; + }; + + gic: interrupt-controller@f1001000 { + compatible = "arm,cortex-a15-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0xf1001000 0x1000>, + <0xf1002000 0x1000>, + <0xf1004000 0x2000>, + <0xf1006000 0x2000>; + interrupts = <1 9 0xf04>; + + gic-cpuif@4 { + compatible = "arm,gic-cpuif"; + cpuif-id = <4>; + cpu = <&cpu0>; + }; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; +}; diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index 75d413c004b6..663d27b39880 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -18,6 +18,13 @@ config ARCH_SH73A0 select SH_CLK_CPG select RENESAS_INTC_IRQPIN +config ARCH_R8A73A4 + bool "R-Mobile APE6 (R8A73A40)" + select ARM_GIC + select CPU_V7 + select ARM_ARCH_TIMER + select SH_CLK_CPG + config ARCH_R8A7740 bool "R-Mobile A1 (R8A77400)" select ARCH_WANT_OPTIONAL_GPIOLIB diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile index b646ff4d742a..c5a43ef7cebf 100644 --- a/arch/arm/mach-shmobile/Makefile +++ b/arch/arm/mach-shmobile/Makefile @@ -8,6 +8,7 @@ obj-y := timer.o console.o clock.o # CPU objects obj-$(CONFIG_ARCH_SH7372) += setup-sh7372.o clock-sh7372.o intc-sh7372.o obj-$(CONFIG_ARCH_SH73A0) += setup-sh73a0.o clock-sh73a0.o intc-sh73a0.o +obj-$(CONFIG_ARCH_R8A73A4) += setup-r8a73a4.o clock-r8a73a4.o obj-$(CONFIG_ARCH_R8A7740) += setup-r8a7740.o clock-r8a7740.o intc-r8a7740.o obj-$(CONFIG_ARCH_R8A7779) += setup-r8a7779.o clock-r8a7779.o intc-r8a7779.o obj-$(CONFIG_ARCH_EMEV2) += setup-emev2.o clock-emev2.o diff --git a/arch/arm/mach-shmobile/clock-r8a73a4.c b/arch/arm/mach-shmobile/clock-r8a73a4.c new file mode 100644 index 000000000000..15d479dbb132 --- /dev/null +++ b/arch/arm/mach-shmobile/clock-r8a73a4.c @@ -0,0 +1,91 @@ +/* + * r8a73a4 clock framework support + * + * Copyright (C) 2013 Renesas Solutions Corp. + * Copyright (C) 2013 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include +#include +#include + +#define CPG_BASE 0xe6150000 +#define CPG_LEN 0x270 + +#define MPCKCR 0xe6150080 + +static struct clk_mapping cpg_mapping = { + .phys = CPG_BASE, + .len = CPG_LEN, +}; + +static struct clk extalr_clk = { + .rate = 32768, + .mapping = &cpg_mapping, +}; + +static struct clk extal1_clk = { + .rate = 26000000, + .mapping = &cpg_mapping, +}; + +static struct clk extal2_clk = { + .rate = 48000000, + .mapping = &cpg_mapping, +}; + +static struct clk *main_clks[] = { + &extalr_clk, + &extal1_clk, + &extal2_clk, +}; + +enum { MSTP_NR }; +static struct clk mstp_clks[MSTP_NR] = { +}; + +static struct clk_lookup lookups[] = { +}; + +void __init r8a73a4_clock_init(void) +{ + void __iomem *cpg_base, *reg; + int k, ret = 0; + + /* fix MPCLK to EXTAL2 for now. + * this is needed until more detailed clock topology is supported + */ + cpg_base = ioremap_nocache(CPG_BASE, CPG_LEN); + BUG_ON(!cpg_base); + reg = cpg_base + (MPCKCR - CPG_BASE); + iowrite32(ioread32(reg) | 1 << 7 | 0x0c, reg); /* set CKSEL */ + iounmap(cpg_base); + + for (k = 0; !ret && (k < ARRAY_SIZE(main_clks)); k++) + ret = clk_register(main_clks[k]); + + if (!ret) + ret = sh_clk_mstp_register(mstp_clks, MSTP_NR); + + clkdev_add_table(lookups, ARRAY_SIZE(lookups)); + + if (!ret) + shmobile_clk_init(); + else + panic("failed to setup r8a73a4 clocks\n"); +} diff --git a/arch/arm/mach-shmobile/include/mach/r8a73a4.h b/arch/arm/mach-shmobile/include/mach/r8a73a4.h new file mode 100644 index 000000000000..6db3495479d8 --- /dev/null +++ b/arch/arm/mach-shmobile/include/mach/r8a73a4.h @@ -0,0 +1,7 @@ +#ifndef __ASM_R8A73A4_H__ +#define __ASM_R8A73A4_H__ + +void r8a73a4_add_standard_devices(void); +void r8a73a4_clock_init(void); + +#endif /* __ASM_R8A73A4_H__ */ diff --git a/arch/arm/mach-shmobile/setup-r8a73a4.c b/arch/arm/mach-shmobile/setup-r8a73a4.c new file mode 100644 index 000000000000..69156bce76f7 --- /dev/null +++ b/arch/arm/mach-shmobile/setup-r8a73a4.c @@ -0,0 +1,50 @@ +/* + * r8a73a4 processor support + * + * Copyright (C) 2013 Renesas Solutions Corp. + * Copyright (C) 2013 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include +#include +#include +#include +#include + +void __init r8a73a4_add_standard_devices(void) +{ +} + +#ifdef CONFIG_USE_OF +void __init r8a73a4_add_standard_devices_dt(void) +{ + of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); +} + +static const char *r8a73a4_boards_compat_dt[] __initdata = { + "renesas,r8a73a4", + NULL, +}; + +DT_MACHINE_START(R8A73A4_DT, "Generic R8A73A4 (Flattened Device Tree)") + .init_irq = irqchip_init, + .init_machine = r8a73a4_add_standard_devices_dt, + .init_time = shmobile_timer_init, + .dt_compat = r8a73a4_boards_compat_dt, +MACHINE_END +#endif /* CONFIG_USE_OF */ -- GitLab From e481a528901d0cd18b5b5fcbdc55207ea3b6ef68 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Mar 2013 10:34:33 +0900 Subject: [PATCH 0287/3163] ARM: shmobile: r8a73a4 SCIF support V3 V3 of SCIF serial port support for the r8a73a4 SoC. This is done by adding platform devices for SCIFA0 -> SCIFA1 as well as SCIFB0 -> SCIFB3 together with clock bindings. DT device description is excluded at this point since such bindings are still under development. Signed-off-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-r8a73a4.c | 15 ++++++++- arch/arm/mach-shmobile/setup-r8a73a4.c | 43 ++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/clock-r8a73a4.c b/arch/arm/mach-shmobile/clock-r8a73a4.c index 15d479dbb132..037713bdff3f 100644 --- a/arch/arm/mach-shmobile/clock-r8a73a4.c +++ b/arch/arm/mach-shmobile/clock-r8a73a4.c @@ -28,6 +28,7 @@ #define CPG_LEN 0x270 #define MPCKCR 0xe6150080 +#define SMSTPCR2 0xe6150138 static struct clk_mapping cpg_mapping = { .phys = CPG_BASE, @@ -55,11 +56,23 @@ static struct clk *main_clks[] = { &extal2_clk, }; -enum { MSTP_NR }; +enum { MSTP217, MSTP216, MSTP207, MSTP206, MSTP204, MSTP203, MSTP_NR }; static struct clk mstp_clks[MSTP_NR] = { + [MSTP204] = SH_CLK_MSTP32(&extal2_clk, SMSTPCR2, 4, 0), /* SCIFA0 */ + [MSTP203] = SH_CLK_MSTP32(&extal2_clk, SMSTPCR2, 3, 0), /* SCIFA1 */ + [MSTP206] = SH_CLK_MSTP32(&extal2_clk, SMSTPCR2, 6, 0), /* SCIFB0 */ + [MSTP207] = SH_CLK_MSTP32(&extal2_clk, SMSTPCR2, 7, 0), /* SCIFB1 */ + [MSTP216] = SH_CLK_MSTP32(&extal2_clk, SMSTPCR2, 16, 0), /* SCIFB2 */ + [MSTP217] = SH_CLK_MSTP32(&extal2_clk, SMSTPCR2, 17, 0), /* SCIFB3 */ }; static struct clk_lookup lookups[] = { + CLKDEV_DEV_ID("sh-sci.0", &mstp_clks[MSTP204]), + CLKDEV_DEV_ID("sh-sci.1", &mstp_clks[MSTP203]), + CLKDEV_DEV_ID("sh-sci.2", &mstp_clks[MSTP206]), + CLKDEV_DEV_ID("sh-sci.3", &mstp_clks[MSTP207]), + CLKDEV_DEV_ID("sh-sci.4", &mstp_clks[MSTP216]), + CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP217]), }; void __init r8a73a4_clock_init(void) diff --git a/arch/arm/mach-shmobile/setup-r8a73a4.c b/arch/arm/mach-shmobile/setup-r8a73a4.c index 69156bce76f7..746a3dc4474d 100644 --- a/arch/arm/mach-shmobile/setup-r8a73a4.c +++ b/arch/arm/mach-shmobile/setup-r8a73a4.c @@ -21,13 +21,56 @@ #include #include #include +#include #include #include #include #include +#define SCIF_COMMON(scif_type, baseaddr, irq) \ + .type = scif_type, \ + .mapbase = baseaddr, \ + .flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP, \ + .scbrr_algo_id = SCBRR_ALGO_4, \ + .irqs = SCIx_IRQ_MUXED(irq) + +#define SCIFA_DATA(index, baseaddr, irq) \ +[index] = { \ + SCIF_COMMON(PORT_SCIFA, baseaddr, irq), \ + .scscr = SCSCR_RE | SCSCR_TE | SCSCR_CKE0, \ +} + +#define SCIFB_DATA(index, baseaddr, irq) \ +[index] = { \ + SCIF_COMMON(PORT_SCIFB, baseaddr, irq), \ + .scscr = SCSCR_RE | SCSCR_TE, \ +} + +enum { SCIFA0, SCIFA1, SCIFB0, SCIFB1, SCIFB2, SCIFB3 }; + +static const struct plat_sci_port scif[] = { + SCIFA_DATA(SCIFA0, 0xe6c40000, gic_spi(144)), /* SCIFA0 */ + SCIFA_DATA(SCIFA1, 0xe6c50000, gic_spi(145)), /* SCIFA1 */ + SCIFB_DATA(SCIFB0, 0xe6c50000, gic_spi(145)), /* SCIFB0 */ + SCIFB_DATA(SCIFB1, 0xe6c30000, gic_spi(149)), /* SCIFB1 */ + SCIFB_DATA(SCIFB2, 0xe6ce0000, gic_spi(150)), /* SCIFB2 */ + SCIFB_DATA(SCIFB3, 0xe6cf0000, gic_spi(151)), /* SCIFB3 */ +}; + +static inline void r8a73a4_register_scif(int idx) +{ + platform_device_register_data(&platform_bus, "sh-sci", idx, &scif[idx], + sizeof(struct plat_sci_port)); +} + void __init r8a73a4_add_standard_devices(void) { + r8a73a4_register_scif(SCIFA0); + r8a73a4_register_scif(SCIFA1); + r8a73a4_register_scif(SCIFB0); + r8a73a4_register_scif(SCIFB1); + r8a73a4_register_scif(SCIFB2); + r8a73a4_register_scif(SCIFB3); } #ifdef CONFIG_USE_OF -- GitLab From 984ca295010ad0113b986a404931566f9b1791d4 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Mar 2013 10:34:42 +0900 Subject: [PATCH 0288/3163] ARM: shmobile: r8a73a4 IRQC support V2 Add IRQC interrupt controller support to r8a73a4 by hooking up two IRQC instances to handle 58 external IRQ signals. There IRQC controllers are tied to SPIs of the GIC. On r8a73a4 exact IRQ pin routing is handled by the PFC which is excluded from this patch. Both platform devices and DT devices are added in this patch. The platform device versions are used to provide a static interrupt map configuration for board code written in C. Signed-off-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/boot/dts/r8a73a4.dtsi | 32 ++++++++++ arch/arm/mach-shmobile/Kconfig | 1 + arch/arm/mach-shmobile/setup-r8a73a4.c | 84 ++++++++++++++++++++++++++ 3 files changed, 117 insertions(+) diff --git a/arch/arm/boot/dts/r8a73a4.dtsi b/arch/arm/boot/dts/r8a73a4.dtsi index 72c58c172e9d..4c68ba15727c 100644 --- a/arch/arm/boot/dts/r8a73a4.dtsi +++ b/arch/arm/boot/dts/r8a73a4.dtsi @@ -52,4 +52,36 @@ <1 11 0xf08>, <1 10 0xf08>; }; + + irqc0: interrupt-controller@e61c0000 { + compatible = "renesas,irqc"; + #interrupt-cells = <2>; + interrupt-controller; + reg = <0xe61c0000 0x200>; + interrupt-parent = <&gic>; + interrupts = <0 0 4>, <0 1 4>, <0 2 4>, <0 3 4>, + <0 4 4>, <0 5 4>, <0 6 4>, <0 7 4>, + <0 8 4>, <0 9 4>, <0 10 4>, <0 11 4>, + <0 12 4>, <0 13 4>, <0 14 4>, <0 15 4>, + <0 16 4>, <0 17 4>, <0 18 4>, <0 19 4>, + <0 20 4>, <0 21 4>, <0 22 4>, <0 23 4>, + <0 24 4>, <0 25 4>, <0 26 4>, <0 27 4>, + <0 28 4>, <0 29 4>, <0 30 4>, <0 31 4>; + }; + + irqc1: interrupt-controller@e61c0200 { + compatible = "renesas,irqc"; + #interrupt-cells = <2>; + interrupt-controller; + reg = <0xe61c0200 0x200>; + interrupt-parent = <&gic>; + interrupts = <0 32 4>, <0 33 4>, <0 34 4>, <0 35 4>, + <0 36 4>, <0 37 4>, <0 38 4>, <0 39 4>, + <0 40 4>, <0 41 4>, <0 42 4>, <0 43 4>, + <0 44 4>, <0 45 4>, <0 46 4>, <0 47 4>, + <0 48 4>, <0 49 4>, <0 50 4>, <0 51 4>, + <0 52 4>, <0 53 4>, <0 54 4>, <0 55 4>, + <0 56 4>, <0 57 4>; + }; + }; diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index 663d27b39880..17a59cde826e 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -24,6 +24,7 @@ config ARCH_R8A73A4 select CPU_V7 select ARM_ARCH_TIMER select SH_CLK_CPG + select RENESAS_IRQC config ARCH_R8A7740 bool "R-Mobile A1 (R8A77400)" diff --git a/arch/arm/mach-shmobile/setup-r8a73a4.c b/arch/arm/mach-shmobile/setup-r8a73a4.c index 746a3dc4474d..da5ae1611518 100644 --- a/arch/arm/mach-shmobile/setup-r8a73a4.c +++ b/arch/arm/mach-shmobile/setup-r8a73a4.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -63,6 +64,87 @@ static inline void r8a73a4_register_scif(int idx) sizeof(struct plat_sci_port)); } +static const struct renesas_irqc_config irqc0_data = { + .irq_base = irq_pin(0), /* IRQ0 -> IRQ31 */ +}; + +static const struct resource irqc0_resources[] = { + DEFINE_RES_MEM(0xe61c0000, 0x200), /* IRQC Event Detector Block_0 */ + DEFINE_RES_IRQ(gic_spi(0)), /* IRQ0 */ + DEFINE_RES_IRQ(gic_spi(1)), /* IRQ1 */ + DEFINE_RES_IRQ(gic_spi(2)), /* IRQ2 */ + DEFINE_RES_IRQ(gic_spi(3)), /* IRQ3 */ + DEFINE_RES_IRQ(gic_spi(4)), /* IRQ4 */ + DEFINE_RES_IRQ(gic_spi(5)), /* IRQ5 */ + DEFINE_RES_IRQ(gic_spi(6)), /* IRQ6 */ + DEFINE_RES_IRQ(gic_spi(7)), /* IRQ7 */ + DEFINE_RES_IRQ(gic_spi(8)), /* IRQ8 */ + DEFINE_RES_IRQ(gic_spi(9)), /* IRQ9 */ + DEFINE_RES_IRQ(gic_spi(10)), /* IRQ10 */ + DEFINE_RES_IRQ(gic_spi(11)), /* IRQ11 */ + DEFINE_RES_IRQ(gic_spi(12)), /* IRQ12 */ + DEFINE_RES_IRQ(gic_spi(13)), /* IRQ13 */ + DEFINE_RES_IRQ(gic_spi(14)), /* IRQ14 */ + DEFINE_RES_IRQ(gic_spi(15)), /* IRQ15 */ + DEFINE_RES_IRQ(gic_spi(16)), /* IRQ16 */ + DEFINE_RES_IRQ(gic_spi(17)), /* IRQ17 */ + DEFINE_RES_IRQ(gic_spi(18)), /* IRQ18 */ + DEFINE_RES_IRQ(gic_spi(19)), /* IRQ19 */ + DEFINE_RES_IRQ(gic_spi(20)), /* IRQ20 */ + DEFINE_RES_IRQ(gic_spi(21)), /* IRQ21 */ + DEFINE_RES_IRQ(gic_spi(22)), /* IRQ22 */ + DEFINE_RES_IRQ(gic_spi(23)), /* IRQ23 */ + DEFINE_RES_IRQ(gic_spi(24)), /* IRQ24 */ + DEFINE_RES_IRQ(gic_spi(25)), /* IRQ25 */ + DEFINE_RES_IRQ(gic_spi(26)), /* IRQ26 */ + DEFINE_RES_IRQ(gic_spi(27)), /* IRQ27 */ + DEFINE_RES_IRQ(gic_spi(28)), /* IRQ28 */ + DEFINE_RES_IRQ(gic_spi(29)), /* IRQ29 */ + DEFINE_RES_IRQ(gic_spi(30)), /* IRQ30 */ + DEFINE_RES_IRQ(gic_spi(31)), /* IRQ31 */ +}; + +static const struct renesas_irqc_config irqc1_data = { + .irq_base = irq_pin(32), /* IRQ32 -> IRQ57 */ +}; + +static const struct resource irqc1_resources[] = { + DEFINE_RES_MEM(0xe61c0200, 0x200), /* IRQC Event Detector Block_1 */ + DEFINE_RES_IRQ(gic_spi(32)), /* IRQ32 */ + DEFINE_RES_IRQ(gic_spi(33)), /* IRQ33 */ + DEFINE_RES_IRQ(gic_spi(34)), /* IRQ34 */ + DEFINE_RES_IRQ(gic_spi(35)), /* IRQ35 */ + DEFINE_RES_IRQ(gic_spi(36)), /* IRQ36 */ + DEFINE_RES_IRQ(gic_spi(37)), /* IRQ37 */ + DEFINE_RES_IRQ(gic_spi(38)), /* IRQ38 */ + DEFINE_RES_IRQ(gic_spi(39)), /* IRQ39 */ + DEFINE_RES_IRQ(gic_spi(40)), /* IRQ40 */ + DEFINE_RES_IRQ(gic_spi(41)), /* IRQ41 */ + DEFINE_RES_IRQ(gic_spi(42)), /* IRQ42 */ + DEFINE_RES_IRQ(gic_spi(43)), /* IRQ43 */ + DEFINE_RES_IRQ(gic_spi(44)), /* IRQ44 */ + DEFINE_RES_IRQ(gic_spi(45)), /* IRQ45 */ + DEFINE_RES_IRQ(gic_spi(46)), /* IRQ46 */ + DEFINE_RES_IRQ(gic_spi(47)), /* IRQ47 */ + DEFINE_RES_IRQ(gic_spi(48)), /* IRQ48 */ + DEFINE_RES_IRQ(gic_spi(49)), /* IRQ49 */ + DEFINE_RES_IRQ(gic_spi(50)), /* IRQ50 */ + DEFINE_RES_IRQ(gic_spi(51)), /* IRQ51 */ + DEFINE_RES_IRQ(gic_spi(52)), /* IRQ52 */ + DEFINE_RES_IRQ(gic_spi(53)), /* IRQ53 */ + DEFINE_RES_IRQ(gic_spi(54)), /* IRQ54 */ + DEFINE_RES_IRQ(gic_spi(55)), /* IRQ55 */ + DEFINE_RES_IRQ(gic_spi(56)), /* IRQ56 */ + DEFINE_RES_IRQ(gic_spi(57)), /* IRQ57 */ +}; + +#define r8a73a4_register_irqc(idx) \ + platform_device_register_resndata(&platform_bus, "renesas_irqc", \ + idx, irqc##idx##_resources, \ + ARRAY_SIZE(irqc##idx##_resources), \ + &irqc##idx##_data, \ + sizeof(struct renesas_irqc_config)) + void __init r8a73a4_add_standard_devices(void) { r8a73a4_register_scif(SCIFA0); @@ -71,6 +153,8 @@ void __init r8a73a4_add_standard_devices(void) r8a73a4_register_scif(SCIFB1); r8a73a4_register_scif(SCIFB2); r8a73a4_register_scif(SCIFB3); + r8a73a4_register_irqc(0); + r8a73a4_register_irqc(1); } #ifdef CONFIG_USE_OF -- GitLab From d313d068d4b5801ea9c0c66bed66f37c64ad6807 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Mar 2013 10:34:52 +0900 Subject: [PATCH 0289/3163] ARM: shmobile: r8a73a4 PFC support Add a platform device for the r8a73a4 PFC. Signed-off-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/Kconfig | 1 + arch/arm/mach-shmobile/include/mach/r8a73a4.h | 1 + arch/arm/mach-shmobile/setup-r8a73a4.c | 10 ++++++++++ 3 files changed, 12 insertions(+) diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index 17a59cde826e..0e4a820bcbe8 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -20,6 +20,7 @@ config ARCH_SH73A0 config ARCH_R8A73A4 bool "R-Mobile APE6 (R8A73A40)" + select ARCH_WANT_OPTIONAL_GPIOLIB select ARM_GIC select CPU_V7 select ARM_ARCH_TIMER diff --git a/arch/arm/mach-shmobile/include/mach/r8a73a4.h b/arch/arm/mach-shmobile/include/mach/r8a73a4.h index 6db3495479d8..f043103e32c9 100644 --- a/arch/arm/mach-shmobile/include/mach/r8a73a4.h +++ b/arch/arm/mach-shmobile/include/mach/r8a73a4.h @@ -3,5 +3,6 @@ void r8a73a4_add_standard_devices(void); void r8a73a4_clock_init(void); +void r8a73a4_pinmux_init(void); #endif /* __ASM_R8A73A4_H__ */ diff --git a/arch/arm/mach-shmobile/setup-r8a73a4.c b/arch/arm/mach-shmobile/setup-r8a73a4.c index da5ae1611518..c2d86f30cde4 100644 --- a/arch/arm/mach-shmobile/setup-r8a73a4.c +++ b/arch/arm/mach-shmobile/setup-r8a73a4.c @@ -28,6 +28,16 @@ #include #include +static const struct resource pfc_resources[] = { + DEFINE_RES_MEM(0xe6050000, 0x9000), +}; + +void __init r8a73a4_pinmux_init(void) +{ + platform_device_register_simple("pfc-r8a73a4", -1, pfc_resources, + ARRAY_SIZE(pfc_resources)); +} + #define SCIF_COMMON(scif_type, baseaddr, irq) \ .type = scif_type, \ .mapbase = baseaddr, \ -- GitLab From 7653c318b73d8553d4c13bb7e371878ddc19f80d Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 28 Feb 2013 13:21:58 +0100 Subject: [PATCH 0290/3163] ARM: shmobile: sh73a0: wait for completion when kicking the clock To reconfigure clocks, controlled by FRQCRA and FRQCRB, a kick bit has to be set and to make sure the setting has taken effect, it has to be read back repeatedly until it is cleared by the hardware. This patch adds the waiting part, that was missing until now. Signed-off-by: Guennadi Liakhovetski Acked-by: Magnus Damm --- arch/arm/mach-shmobile/clock-sh73a0.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c index 71843dd39e16..34b5c5ae4cbd 100644 --- a/arch/arm/mach-shmobile/clock-sh73a0.c +++ b/arch/arm/mach-shmobile/clock-sh73a0.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #define FRQCRA IOMEM(0xe6150000) @@ -234,14 +235,24 @@ static struct clk *main_clks[] = { &sh73a0_extalr_clk, }; -static void div4_kick(struct clk *clk) +static int frqcr_kick(void) { - unsigned long value; + int i; + + /* set KICK bit in FRQCRB to update hardware setting, check success */ + __raw_writel(__raw_readl(FRQCRB) | (1 << 31), FRQCRB); + for (i = 1000; i; i--) + if (__raw_readl(FRQCRB) & (1 << 31)) + cpu_relax(); + else + return i; + + return -ETIMEDOUT; +} - /* set KICK bit in FRQCRB to update hardware setting */ - value = __raw_readl(FRQCRB); - value |= (1 << 31); - __raw_writel(value, FRQCRB); +static void div4_kick(struct clk *clk) +{ + frqcr_kick(); } static int divisors[] = { 2, 3, 4, 6, 8, 12, 16, 18, -- GitLab From ccb7cc749f78166178184f77dd95ea24db9d5bb0 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 21 Mar 2013 03:01:36 -0700 Subject: [PATCH 0291/3163] ARM: shmobile: add R8A7778 basis support Add initial support for the R8A7778 R-Car M1A SoC. No static virtual mappings are used, all the components make use of ioremap(). DT_MACHINE_START is still wrapped in CONFIG_USE_OF to match other mach-shmobile code. It is based on v1.0 datasheet Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/boot/dts/r8a7778.dtsi | 35 ++++ arch/arm/mach-shmobile/Kconfig | 6 + arch/arm/mach-shmobile/Makefile | 1 + arch/arm/mach-shmobile/clock-r8a7778.c | 91 ++++++++++ arch/arm/mach-shmobile/include/mach/r8a7778.h | 28 +++ arch/arm/mach-shmobile/setup-r8a7778.c | 167 ++++++++++++++++++ 6 files changed, 328 insertions(+) create mode 100644 arch/arm/boot/dts/r8a7778.dtsi create mode 100644 arch/arm/mach-shmobile/clock-r8a7778.c create mode 100644 arch/arm/mach-shmobile/include/mach/r8a7778.h create mode 100644 arch/arm/mach-shmobile/setup-r8a7778.c diff --git a/arch/arm/boot/dts/r8a7778.dtsi b/arch/arm/boot/dts/r8a7778.dtsi new file mode 100644 index 000000000000..474373559bdc --- /dev/null +++ b/arch/arm/boot/dts/r8a7778.dtsi @@ -0,0 +1,35 @@ +/* + * Device Tree Source for Renesas r8a7778 + * + * Copyright (C) 2013 Renesas Solutions Corp. + * Copyright (C) 2013 Kuninori Morimoto + * + * based on r8a7779 + * + * Copyright (C) 2013 Renesas Solutions Corp. + * Copyright (C) 2013 Simon Horman + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +/include/ "skeleton.dtsi" + +/ { + compatible = "renesas,r8a7778"; + + cpus { + cpu@0 { + compatible = "arm,cortex-a9"; + }; + }; + + gic: interrupt-controller@fe438000 { + compatible = "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + interrupt-controller; + reg = <0xfe438000 0x1000>, + <0xfe430000 0x100>; + }; +}; diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index 0e4a820bcbe8..49cba4a511df 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -33,6 +33,12 @@ config ARCH_R8A7740 select CPU_V7 select SH_CLK_CPG +config ARCH_R8A7778 + bool "R-Car M1 (R8A77780)" + select CPU_V7 + select SH_CLK_CPG + select ARM_GIC + config ARCH_R8A7779 bool "R-Car H1 (R8A77790)" select ARCH_WANT_OPTIONAL_GPIOLIB diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile index c5a43ef7cebf..2d42de46db8d 100644 --- a/arch/arm/mach-shmobile/Makefile +++ b/arch/arm/mach-shmobile/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_ARCH_SH7372) += setup-sh7372.o clock-sh7372.o intc-sh7372.o obj-$(CONFIG_ARCH_SH73A0) += setup-sh73a0.o clock-sh73a0.o intc-sh73a0.o obj-$(CONFIG_ARCH_R8A73A4) += setup-r8a73a4.o clock-r8a73a4.o obj-$(CONFIG_ARCH_R8A7740) += setup-r8a7740.o clock-r8a7740.o intc-r8a7740.o +obj-$(CONFIG_ARCH_R8A7778) += setup-r8a7778.o clock-r8a7778.o obj-$(CONFIG_ARCH_R8A7779) += setup-r8a7779.o clock-r8a7779.o intc-r8a7779.o obj-$(CONFIG_ARCH_EMEV2) += setup-emev2.o clock-emev2.o diff --git a/arch/arm/mach-shmobile/clock-r8a7778.c b/arch/arm/mach-shmobile/clock-r8a7778.c new file mode 100644 index 000000000000..387e3b74cc8c --- /dev/null +++ b/arch/arm/mach-shmobile/clock-r8a7778.c @@ -0,0 +1,91 @@ +/* + * r8a7778 clock framework support + * + * Copyright (C) 2013 Renesas Solutions Corp. + * Copyright (C) 2013 Kuninori Morimoto + * + * based on r8a7779 + * + * Copyright (C) 2011 Renesas Solutions Corp. + * Copyright (C) 2011 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include + +#define MSTPCR0 IOMEM(0xffc80030) +#define MSTPCR1 IOMEM(0xffc80034) +#define MSTPCR3 IOMEM(0xffc8003c) +#define MSTPSR1 IOMEM(0xffc80044) +#define MSTPSR4 IOMEM(0xffc80048) +#define MSTPSR6 IOMEM(0xffc8004c) +#define MSTPCR4 IOMEM(0xffc80050) +#define MSTPCR5 IOMEM(0xffc80054) +#define MSTPCR6 IOMEM(0xffc80058) + +/* ioremap() through clock mapping mandatory to avoid + * collision with ARM coherent DMA virtual memory range. + */ + +static struct clk_mapping cpg_mapping = { + .phys = 0xffc80000, + .len = 0x80, +}; + +static struct clk clkp = { + .rate = 62500000, /* FIXME: shortcut */ + .flags = CLK_ENABLE_ON_INIT, + .mapping = &cpg_mapping, +}; + +static struct clk *main_clks[] = { + &clkp, +}; + +enum { + MSTP016, MSTP015, + MSTP_NR }; + +static struct clk mstp_clks[MSTP_NR] = { + [MSTP016] = SH_CLK_MSTP32(&clkp, MSTPCR0, 16, 0), /* TMU0 */ + [MSTP015] = SH_CLK_MSTP32(&clkp, MSTPCR0, 15, 0), /* TMU1 */ +}; + +static struct clk_lookup lookups[] = { + /* MSTP32 clocks */ + CLKDEV_DEV_ID("sh_tmu.0", &mstp_clks[MSTP016]), /* TMU00 */ + CLKDEV_DEV_ID("sh_tmu.1", &mstp_clks[MSTP015]), /* TMU01 */ +}; + +void __init r8a7778_clock_init(void) +{ + int k, ret = 0; + + for (k = 0; !ret && (k < ARRAY_SIZE(main_clks)); k++) + ret = clk_register(main_clks[k]); + + if (!ret) + ret = sh_clk_mstp_register(mstp_clks, MSTP_NR); + + clkdev_add_table(lookups, ARRAY_SIZE(lookups)); + + if (!ret) + shmobile_clk_init(); + else + panic("failed to setup r8a7778 clocks\n"); +} diff --git a/arch/arm/mach-shmobile/include/mach/r8a7778.h b/arch/arm/mach-shmobile/include/mach/r8a7778.h new file mode 100644 index 000000000000..a755dcafef4d --- /dev/null +++ b/arch/arm/mach-shmobile/include/mach/r8a7778.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2013 Renesas Solutions Corp. + * Copyright (C) 2013 Kuninori Morimoto + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __ASM_R8A7778_H__ +#define __ASM_R8A7778_H__ + +extern void r8a7778_add_standard_devices(void); +extern void r8a7778_add_standard_devices_dt(void); +extern void r8a7778_init_delay(void); +extern void r8a7778_init_irq(void); +extern void r8a7778_init_irq_dt(void); +extern void r8a7778_clock_init(void); + +#endif /* __ASM_R8A7778_H__ */ diff --git a/arch/arm/mach-shmobile/setup-r8a7778.c b/arch/arm/mach-shmobile/setup-r8a7778.c new file mode 100644 index 000000000000..811ccf3c77a4 --- /dev/null +++ b/arch/arm/mach-shmobile/setup-r8a7778.c @@ -0,0 +1,167 @@ +/* + * r8a7778 processor support + * + * Copyright (C) 2013 Renesas Solutions Corp. + * Copyright (C) 2013 Kuninori Morimoto + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* TMU */ +static struct resource sh_tmu0_resources[] = { + DEFINE_RES_MEM(0xffd80008, 12), + DEFINE_RES_IRQ(gic_iid(0x40)), +}; + +static struct sh_timer_config sh_tmu0_platform_data = { + .name = "TMU00", + .channel_offset = 0x4, + .timer_bit = 0, + .clockevent_rating = 200, +}; + +static struct resource sh_tmu1_resources[] = { + DEFINE_RES_MEM(0xffd80014, 12), + DEFINE_RES_IRQ(gic_iid(0x41)), +}; + +static struct sh_timer_config sh_tmu1_platform_data = { + .name = "TMU01", + .channel_offset = 0x10, + .timer_bit = 1, + .clocksource_rating = 200, +}; + +#define PLATFORM_INFO(n, i) \ +{ \ + .parent = &platform_bus, \ + .name = #n, \ + .id = i, \ + .res = n ## i ## _resources, \ + .num_res = ARRAY_SIZE(n ## i ##_resources), \ + .data = &n ## i ##_platform_data, \ + .size_data = sizeof(n ## i ## _platform_data), \ +} + +struct platform_device_info platform_devinfo[] = { + PLATFORM_INFO(sh_tmu, 0), + PLATFORM_INFO(sh_tmu, 1), +}; + +void __init r8a7778_add_standard_devices(void) +{ + int i; + +#ifdef CONFIG_CACHE_L2X0 + void __iomem *base = ioremap_nocache(0xf0100000, 0x1000); + if (base) { + /* + * Early BRESP enable, Shared attribute override enable, 64K*16way + * don't call iounmap(base) + */ + l2x0_init(base, 0x40470000, 0x82000fff); + } +#endif + + for (i = 0; i < ARRAY_SIZE(platform_devinfo); i++) + platform_device_register_full(&platform_devinfo[i]); +} + +#define INT2SMSKCR0 0x82288 /* 0xfe782288 */ +#define INT2SMSKCR1 0x8228c /* 0xfe78228c */ + +#define INT2NTSR0 0x00018 /* 0xfe700018 */ +#define INT2NTSR1 0x0002c /* 0xfe70002c */ +static void __init r8a7778_init_irq_common(void) +{ + void __iomem *base = ioremap_nocache(0xfe700000, 0x00100000); + + BUG_ON(!base); + + /* route all interrupts to ARM */ + __raw_writel(0x73ffffff, base + INT2NTSR0); + __raw_writel(0xffffffff, base + INT2NTSR1); + + /* unmask all known interrupts in INTCS2 */ + __raw_writel(0x08330773, base + INT2SMSKCR0); + __raw_writel(0x00311110, base + INT2SMSKCR1); + + iounmap(base); +} + +void __init r8a7778_init_irq(void) +{ + void __iomem *gic_dist_base; + void __iomem *gic_cpu_base; + + gic_dist_base = ioremap_nocache(0xfe438000, PAGE_SIZE); + gic_cpu_base = ioremap_nocache(0xfe430000, PAGE_SIZE); + BUG_ON(!gic_dist_base || !gic_cpu_base); + + /* use GIC to handle interrupts */ + gic_init(0, 29, gic_dist_base, gic_cpu_base); + + r8a7778_init_irq_common(); +} + +void __init r8a7778_init_delay(void) +{ + shmobile_setup_delay(800, 1, 3); /* Cortex-A9 @ 800MHz */ +} + +#ifdef CONFIG_USE_OF +void __init r8a7778_init_irq_dt(void) +{ + irqchip_init(); + r8a7778_init_irq_common(); +} + +static const struct of_dev_auxdata r8a7778_auxdata_lookup[] __initconst = { + {}, +}; + +void __init r8a7778_add_standard_devices_dt(void) +{ + of_platform_populate(NULL, of_default_bus_match_table, + r8a7778_auxdata_lookup, NULL); +} + +static const char *r8a7778_compat_dt[] __initdata = { + "renesas,r8a7778", + NULL, +}; + +DT_MACHINE_START(R8A7778_DT, "Generic R8A7778 (Flattened Device Tree)") + .init_early = r8a7778_init_delay, + .init_irq = r8a7778_init_irq_dt, + .init_machine = r8a7778_add_standard_devices_dt, + .init_time = shmobile_timer_init, + .dt_compat = r8a7778_compat_dt, +MACHINE_END + +#endif /* CONFIG_USE_OF */ -- GitLab From db331fc8fc715fa6af05bf5e9d428be2ec306475 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 21 Mar 2013 03:02:38 -0700 Subject: [PATCH 0292/3163] ARM: shmobile: r8a7778 SCIF support Add SCIF serial port support to the r8a7778 SoC by adding platform devices together with clock bindings. DT device description is excluded at this point since such bindings are still under development. Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-r8a7778.c | 13 +++++++++++++ arch/arm/mach-shmobile/setup-r8a7778.c | 26 ++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/arch/arm/mach-shmobile/clock-r8a7778.c b/arch/arm/mach-shmobile/clock-r8a7778.c index 387e3b74cc8c..f1277f45381e 100644 --- a/arch/arm/mach-shmobile/clock-r8a7778.c +++ b/arch/arm/mach-shmobile/clock-r8a7778.c @@ -58,16 +58,29 @@ static struct clk *main_clks[] = { }; enum { + MSTP026, MSTP025, MSTP024, MSTP023, MSTP022, MSTP021, MSTP016, MSTP015, MSTP_NR }; static struct clk mstp_clks[MSTP_NR] = { + [MSTP026] = SH_CLK_MSTP32(&clkp, MSTPCR0, 26, 0), /* SCIF0 */ + [MSTP025] = SH_CLK_MSTP32(&clkp, MSTPCR0, 25, 0), /* SCIF1 */ + [MSTP024] = SH_CLK_MSTP32(&clkp, MSTPCR0, 24, 0), /* SCIF2 */ + [MSTP023] = SH_CLK_MSTP32(&clkp, MSTPCR0, 23, 0), /* SCIF3 */ + [MSTP022] = SH_CLK_MSTP32(&clkp, MSTPCR0, 22, 0), /* SCIF4 */ + [MSTP021] = SH_CLK_MSTP32(&clkp, MSTPCR0, 21, 0), /* SCIF5 */ [MSTP016] = SH_CLK_MSTP32(&clkp, MSTPCR0, 16, 0), /* TMU0 */ [MSTP015] = SH_CLK_MSTP32(&clkp, MSTPCR0, 15, 0), /* TMU1 */ }; static struct clk_lookup lookups[] = { /* MSTP32 clocks */ + CLKDEV_DEV_ID("sh-sci.0", &mstp_clks[MSTP026]), /* SCIF0 */ + CLKDEV_DEV_ID("sh-sci.1", &mstp_clks[MSTP025]), /* SCIF1 */ + CLKDEV_DEV_ID("sh-sci.2", &mstp_clks[MSTP024]), /* SCIF2 */ + CLKDEV_DEV_ID("sh-sci.3", &mstp_clks[MSTP023]), /* SCIF3 */ + CLKDEV_DEV_ID("sh-sci.4", &mstp_clks[MSTP022]), /* SCIF4 */ + CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP021]), /* SCIF6 */ CLKDEV_DEV_ID("sh_tmu.0", &mstp_clks[MSTP016]), /* TMU00 */ CLKDEV_DEV_ID("sh_tmu.1", &mstp_clks[MSTP015]), /* TMU01 */ }; diff --git a/arch/arm/mach-shmobile/setup-r8a7778.c b/arch/arm/mach-shmobile/setup-r8a7778.c index 811ccf3c77a4..01c62bedf9cf 100644 --- a/arch/arm/mach-shmobile/setup-r8a7778.c +++ b/arch/arm/mach-shmobile/setup-r8a7778.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,26 @@ #include #include +/* SCIF */ +#define SCIF_INFO(baseaddr, irq) \ +{ \ + .mapbase = baseaddr, \ + .flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP, \ + .scscr = SCSCR_RE | SCSCR_TE | SCSCR_CKE1, \ + .scbrr_algo_id = SCBRR_ALGO_2, \ + .type = PORT_SCIF, \ + .irqs = SCIx_IRQ_MUXED(irq), \ +} + +static struct plat_sci_port scif_platform_data[] = { + SCIF_INFO(0xffe40000, gic_iid(0x66)), + SCIF_INFO(0xffe41000, gic_iid(0x67)), + SCIF_INFO(0xffe42000, gic_iid(0x68)), + SCIF_INFO(0xffe43000, gic_iid(0x69)), + SCIF_INFO(0xffe44000, gic_iid(0x6a)), + SCIF_INFO(0xffe45000, gic_iid(0x6b)), +}; + /* TMU */ static struct resource sh_tmu0_resources[] = { DEFINE_RES_MEM(0xffd80008, 12), @@ -88,6 +109,11 @@ void __init r8a7778_add_standard_devices(void) } #endif + for (i = 0; i < ARRAY_SIZE(scif_platform_data); i++) + platform_device_register_data(&platform_bus, "sh-sci", i, + &scif_platform_data[i], + sizeof(struct plat_sci_port)); + for (i = 0; i < ARRAY_SIZE(platform_devinfo); i++) platform_device_register_full(&platform_devinfo[i]); } -- GitLab From 558f874029c904ca694a69e96b4b48c4d54686a3 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 21 Mar 2013 17:05:40 +0100 Subject: [PATCH 0293/3163] ARM: shmobile: sh73a0: add irqpin DT nodes Add DT nodes for the 4 irqpin interrupt controllers on sh73a0. We add them to sh73a0.dtsi, which is also used by configurations, doing all their device instantiation from board the .c code. We rely on the fact, that such configurations don't instantiate devices from the device-tree. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Simon Horman --- arch/arm/boot/dts/sh73a0.dtsi | 81 +++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/arch/arm/boot/dts/sh73a0.dtsi b/arch/arm/boot/dts/sh73a0.dtsi index 8a59465d0231..7e71e3a85767 100644 --- a/arch/arm/boot/dts/sh73a0.dtsi +++ b/arch/arm/boot/dts/sh73a0.dtsi @@ -38,6 +38,87 @@ <0xf0000100 0x100>; }; + irqpin0: irqpin@e6900000 { + compatible = "renesas,intc-irqpin"; + #interrupt-cells = <2>; + interrupt-controller; + reg = <0xe6900000 4>, + <0xe6900010 4>, + <0xe6900020 1>, + <0xe6900040 1>, + <0xe6900060 1>; + interrupt-parent = <&gic>; + interrupts = <0 1 0x4 + 0 2 0x4 + 0 3 0x4 + 0 4 0x4 + 0 5 0x4 + 0 6 0x4 + 0 7 0x4 + 0 8 0x4>; + }; + + irqpin1: irqpin@e6900004 { + compatible = "renesas,intc-irqpin"; + #interrupt-cells = <2>; + interrupt-controller; + reg = <0xe6900004 4>, + <0xe6900014 4>, + <0xe6900024 1>, + <0xe6900044 1>, + <0xe6900064 1>; + interrupt-parent = <&gic>; + interrupts = <0 9 0x4 + 0 10 0x4 + 0 11 0x4 + 0 12 0x4 + 0 13 0x4 + 0 14 0x4 + 0 15 0x4 + 0 16 0x4>; + control-parent; + }; + + irqpin2: irqpin@e6900008 { + compatible = "renesas,intc-irqpin"; + #interrupt-cells = <2>; + interrupt-controller; + reg = <0xe6900008 4>, + <0xe6900018 4>, + <0xe6900028 1>, + <0xe6900048 1>, + <0xe6900068 1>; + interrupt-parent = <&gic>; + interrupts = <0 17 0x4 + 0 18 0x4 + 0 19 0x4 + 0 20 0x4 + 0 21 0x4 + 0 22 0x4 + 0 23 0x4 + 0 24 0x4>; + }; + + irqpin3: irqpin@e690000c { + compatible = "renesas,intc-irqpin"; + #interrupt-cells = <2>; + interrupt-controller; + reg = <0xe690000c 4>, + <0xe690001c 4>, + <0xe690002c 1>, + <0xe690004c 1>, + <0xe690006c 1>; + interrupt-parent = <&gic>; + interrupts = <0 25 0x4 + 0 26 0x4 + 0 27 0x4 + 0 28 0x4 + 0 29 0x4 + 0 30 0x4 + 0 31 0x4 + 0 32 0x4>; + }; + i2c0: i2c@0xe6820000 { #address-cells = <1>; #size-cells = <0>; -- GitLab From 6722f6cb763203cab775297b6e9d00834af0d6d7 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 18 Mar 2013 22:58:18 +0900 Subject: [PATCH 0294/3163] ARM: shmobile: Disallow PINCTRL without GPIOLIB Modify mach-shmobile to only select PINCTRL in case of ARCH_WANT_OPTIONAL_GPIOLIB is set. This fixes a build error triggered when adding a new SoC lacking GPIO software support (ARCH_WANT_OPTIONAL_GPIOLIB=n): CC drivers/tty/vt/keyboard.o In file included from drivers/pinctrl/core.c:30:0: include/asm-generic/gpio.h: In function 'gpio_get_value_cansleep': include/asm-generic/gpio.h:270:2: error: implicit declaration of function '__gpio_get_value' include/asm-generic/gpio.h: In function 'gpio_set_value_cansleep': include/asm-generic/gpio.h:276:2: error: implicit declaration of function '__gpio_set_value' drivers/pinctrl/core.c: In function 'pinctrl_ready_for_gpio_range': drivers/pinctrl/core.c:297:9: error: implicit declaration of function 'gpio_to_chip' drivers/pinctrl/core.c:297:27: warning: initialization makes pointer from integer without a cast drivers/pinctrl/core.c:304:45: error: dereferencing pointer to incomplete type drivers/pinctrl/core.c:305:26: error: dereferencing pointer to incomplete type drivers/pinctrl/core.c:305:39: error: dereferencing pointer to incomplete type make[2]: *** [drivers/pinctrl/core.o] Error 1 make[1]: *** [drivers/pinctrl] Error 2 make[1]: *** Waiting for unfinished jobs.... LD drivers/sh/built-in.o Signed-off-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 5b714695b01b..b63902e7cacd 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -725,7 +725,7 @@ config ARCH_SHMOBILE select MULTI_IRQ_HANDLER select NEED_MACH_MEMORY_H select NO_IOPORT - select PINCTRL + select PINCTRL if ARCH_WANT_OPTIONAL_GPIOLIB select PM_GENERIC_DOMAINS if PM select SPARSE_IRQ help -- GitLab From c91cf2fad00f24bfe268d30b75e4015aaa326c04 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 25 Mar 2013 23:18:15 -0700 Subject: [PATCH 0295/3163] ARM: shmobile: r8a73a4: add thermal driver support You can get current thermal by > cat /sys/class/thermal/thermal_zone?/temp Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/boot/dts/r8a73a4.dtsi | 7 +++++++ arch/arm/mach-shmobile/clock-r8a73a4.c | 13 ++++++++++++- arch/arm/mach-shmobile/setup-r8a73a4.c | 15 +++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/r8a73a4.dtsi b/arch/arm/boot/dts/r8a73a4.dtsi index 4c68ba15727c..7db5b504e64c 100644 --- a/arch/arm/boot/dts/r8a73a4.dtsi +++ b/arch/arm/boot/dts/r8a73a4.dtsi @@ -84,4 +84,11 @@ <0 56 4>, <0 57 4>; }; + thermal@e61f0000 { + compatible = "renesas,rcar-thermal"; + reg = <0xe61f0000 0x14>, <0xe61f0100 0x38>, + <0xe61f0200 0x38>, <0xe61f0300 0x38>; + interrupt-parent = <&gic>; + interrupts = <0 69 4>; + }; }; diff --git a/arch/arm/mach-shmobile/clock-r8a73a4.c b/arch/arm/mach-shmobile/clock-r8a73a4.c index 037713bdff3f..e710c00c3822 100644 --- a/arch/arm/mach-shmobile/clock-r8a73a4.c +++ b/arch/arm/mach-shmobile/clock-r8a73a4.c @@ -29,6 +29,7 @@ #define MPCKCR 0xe6150080 #define SMSTPCR2 0xe6150138 +#define SMSTPCR5 0xe6150144 static struct clk_mapping cpg_mapping = { .phys = CPG_BASE, @@ -56,7 +57,12 @@ static struct clk *main_clks[] = { &extal2_clk, }; -enum { MSTP217, MSTP216, MSTP207, MSTP206, MSTP204, MSTP203, MSTP_NR }; +enum { + MSTP217, MSTP216, MSTP207, MSTP206, MSTP204, MSTP203, + MSTP522, + MSTP_NR +}; + static struct clk mstp_clks[MSTP_NR] = { [MSTP204] = SH_CLK_MSTP32(&extal2_clk, SMSTPCR2, 4, 0), /* SCIFA0 */ [MSTP203] = SH_CLK_MSTP32(&extal2_clk, SMSTPCR2, 3, 0), /* SCIFA1 */ @@ -64,6 +70,7 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP207] = SH_CLK_MSTP32(&extal2_clk, SMSTPCR2, 7, 0), /* SCIFB1 */ [MSTP216] = SH_CLK_MSTP32(&extal2_clk, SMSTPCR2, 16, 0), /* SCIFB2 */ [MSTP217] = SH_CLK_MSTP32(&extal2_clk, SMSTPCR2, 17, 0), /* SCIFB3 */ + [MSTP522] = SH_CLK_MSTP32(&extal2_clk, SMSTPCR5, 22, 0), /* Thermal */ }; static struct clk_lookup lookups[] = { @@ -73,6 +80,10 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("sh-sci.3", &mstp_clks[MSTP207]), CLKDEV_DEV_ID("sh-sci.4", &mstp_clks[MSTP216]), CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP217]), + CLKDEV_DEV_ID("rcar_thermal", &mstp_clks[MSTP522]), + + /* for DT */ + CLKDEV_DEV_ID("e61f0000.thermal", &mstp_clks[MSTP522]), }; void __init r8a73a4_clock_init(void) diff --git a/arch/arm/mach-shmobile/setup-r8a73a4.c b/arch/arm/mach-shmobile/setup-r8a73a4.c index c2d86f30cde4..c5a75a7a508f 100644 --- a/arch/arm/mach-shmobile/setup-r8a73a4.c +++ b/arch/arm/mach-shmobile/setup-r8a73a4.c @@ -155,6 +155,20 @@ static const struct resource irqc1_resources[] = { &irqc##idx##_data, \ sizeof(struct renesas_irqc_config)) +/* Thermal0 -> Thermal2 */ +static const struct resource thermal0_resources[] = { + DEFINE_RES_MEM(0xe61f0000, 0x14), + DEFINE_RES_MEM(0xe61f0100, 0x38), + DEFINE_RES_MEM(0xe61f0200, 0x38), + DEFINE_RES_MEM(0xe61f0300, 0x38), + DEFINE_RES_IRQ(gic_spi(69)), +}; + +#define r8a73a4_register_thermal() \ + platform_device_register_simple("rcar_thermal", -1, \ + thermal0_resources, \ + ARRAY_SIZE(thermal0_resources)) + void __init r8a73a4_add_standard_devices(void) { r8a73a4_register_scif(SCIFA0); @@ -165,6 +179,7 @@ void __init r8a73a4_add_standard_devices(void) r8a73a4_register_scif(SCIFB3); r8a73a4_register_irqc(0); r8a73a4_register_irqc(1); + r8a73a4_register_thermal(); } #ifdef CONFIG_USE_OF -- GitLab From 0b7d78202260162057248875b1c9bac70d041e58 Mon Sep 17 00:00:00 2001 From: Bastian Hecht Date: Wed, 27 Mar 2013 14:54:04 +0100 Subject: [PATCH 0296/3163] ARM: shmobile: r8a7740: Migrate from INTC to GIC With the added capabilty of the intc_irqpin driver to handle shared external IRQs, all prerequisites are fulfilled and we are ready to migrate completely to GIC. This includes the following steps: - Kconfig: select ARM_GIC and RENESAS_INTC_IRQPIN - intc-r8a7740: Throw out all legacy INTC code and init the GIC. We need to mask out all shared IRQs as it is needed by the shared intc_irqpin driver. - setup-r8a7740: Add 4 irqpin devices to handle external IRQs and update all IRQ numbers to point to the GIC SPI. - board-armadillo: Update all IRQ numbers to point to the GIC SPI. - pfc-r8a7740: Update all IRQ numbers of the GPIOs to point to the GIC SPI. Signed-off-by: Bastian Hecht Acked-by: Kuninori Morimoto Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/Kconfig | 2 + .../arm/mach-shmobile/board-armadillo800eva.c | 35 +- arch/arm/mach-shmobile/intc-r8a7740.c | 641 +----------------- arch/arm/mach-shmobile/setup-r8a7740.c | 192 +++++- drivers/pinctrl/sh-pfc/pfc-r8a7740.c | 64 +- 5 files changed, 239 insertions(+), 695 deletions(-) diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index 49cba4a511df..d569c34b1c86 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -30,8 +30,10 @@ config ARCH_R8A73A4 config ARCH_R8A7740 bool "R-Mobile A1 (R8A77400)" select ARCH_WANT_OPTIONAL_GPIOLIB + select ARM_GIC select CPU_V7 select SH_CLK_CPG + select RENESAS_INTC_IRQPIN config ARCH_R8A7778 bool "R-Car M1 (R8A77780)" diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c index f2ec0777cfbe..e451327278af 100644 --- a/arch/arm/mach-shmobile/board-armadillo800eva.c +++ b/arch/arm/mach-shmobile/board-armadillo800eva.c @@ -145,7 +145,7 @@ * see * usbhsf_power_ctrl() */ -#define IRQ7 evt2irq(0x02e0) +#define IRQ7 irq_pin(7) #define USBCR1 IOMEM(0xe605810a) #define USBH 0xC6700000 #define USBH_USBCTR 0x10834 @@ -330,7 +330,7 @@ static struct resource usbhsf_resources[] = { .flags = IORESOURCE_MEM, }, { - .start = evt2irq(0x0A20), + .start = gic_spi(51), .flags = IORESOURCE_IRQ, }, }; @@ -363,7 +363,7 @@ static struct resource sh_eth_resources[] = { .end = 0xe9a02000 - 1, .flags = IORESOURCE_MEM, }, { - .start = evt2irq(0x0500), + .start = gic_spi(110), .flags = IORESOURCE_IRQ, }, }; @@ -417,7 +417,7 @@ static struct resource lcdc0_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = intcs_evt2irq(0x580), + .start = gic_spi(177), .flags = IORESOURCE_IRQ, }, }; @@ -452,7 +452,7 @@ static struct resource hdmi_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = evt2irq(0x1700), + .start = gic_spi(131), .flags = IORESOURCE_IRQ, }, [2] = { @@ -514,7 +514,7 @@ static struct resource hdmi_lcdc_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = intcs_evt2irq(0x1780), + .start = gic_spi(178), .flags = IORESOURCE_IRQ, }, }; @@ -574,7 +574,7 @@ static struct regulator_consumer_supply fixed3v3_power_consumers[] = * We can use IRQ31 as card detect irq, * but it needs chattering removal operation */ -#define IRQ31 evt2irq(0x33E0) +#define IRQ31 irq_pin(31) static struct sh_mobile_sdhi_info sdhi0_info = { .dma_slave_tx = SHDMA_SLAVE_SDHI0_TX, .dma_slave_rx = SHDMA_SLAVE_SDHI0_RX, @@ -596,12 +596,12 @@ static struct resource sdhi0_resources[] = { */ { .name = SH_MOBILE_SDHI_IRQ_SDCARD, - .start = evt2irq(0x0E20), + .start = gic_spi(118), .flags = IORESOURCE_IRQ, }, { .name = SH_MOBILE_SDHI_IRQ_SDIO, - .start = evt2irq(0x0E40), + .start = gic_spi(119), .flags = IORESOURCE_IRQ, }, }; @@ -633,15 +633,15 @@ static struct resource sdhi1_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = evt2irq(0x0E80), + .start = gic_spi(121), .flags = IORESOURCE_IRQ, }, [2] = { - .start = evt2irq(0x0EA0), + .start = gic_spi(122), .flags = IORESOURCE_IRQ, }, [3] = { - .start = evt2irq(0x0EC0), + .start = gic_spi(123), .flags = IORESOURCE_IRQ, }, }; @@ -674,12 +674,12 @@ static struct resource sh_mmcif_resources[] = { }, [1] = { /* MMC ERR */ - .start = evt2irq(0x1AC0), + .start = gic_spi(56), .flags = IORESOURCE_IRQ, }, [2] = { /* MMC NOR */ - .start = evt2irq(0x1AE0), + .start = gic_spi(57), .flags = IORESOURCE_IRQ, }, }; @@ -756,7 +756,7 @@ static struct resource ceu0_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = intcs_evt2irq(0x0500), + .start = gic_spi(160), .flags = IORESOURCE_IRQ, }, [2] = { @@ -798,7 +798,7 @@ static struct resource fsi_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = evt2irq(0x1840), + .start = gic_spi(9), .flags = IORESOURCE_IRQ, }, }; @@ -881,7 +881,7 @@ static struct platform_device i2c_gpio_device = { static struct i2c_board_info i2c0_devices[] = { { I2C_BOARD_INFO("st1232-ts", 0x55), - .irq = evt2irq(0x0340), + .irq = irq_pin(10), }, { I2C_BOARD_INFO("wm8978", 0x1a), @@ -1207,7 +1207,6 @@ DT_MACHINE_START(ARMADILLO800EVA_DT, "armadillo800eva") .map_io = r8a7740_map_io, .init_early = eva_add_early_devices, .init_irq = r8a7740_init_irq, - .handle_irq = shmobile_handle_irq_intc, .init_machine = eva_init, .init_late = shmobile_init_late, .init_time = eva_earlytimer_init, diff --git a/arch/arm/mach-shmobile/intc-r8a7740.c b/arch/arm/mach-shmobile/intc-r8a7740.c index 9a69a31918ba..b741c8409a5a 100644 --- a/arch/arm/mach-shmobile/intc-r8a7740.c +++ b/arch/arm/mach-shmobile/intc-r8a7740.c @@ -18,620 +18,39 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#include #include -#include -#include #include -#include -#include -#include -#include -#include - -/* - * INTCA - */ -enum { - UNUSED_INTCA = 0, - - /* interrupt sources INTCA */ - DIRC, - ATAPI, - IIC1_ALI, IIC1_TACKI, IIC1_WAITI, IIC1_DTEI, - AP_ARM_COMMTX, AP_ARM_COMMRX, - MFI, MFIS, - BBIF1, BBIF2, - USBHSDMAC, - USBF_OUL_SOF, USBF_IXL_INT, - SGX540, - CMT1_0, CMT1_1, CMT1_2, CMT1_3, - CMT2, - CMT3, - KEYSC, - SCIFA0, SCIFA1, SCIFA2, SCIFA3, - MSIOF2, MSIOF1, - SCIFA4, SCIFA5, SCIFB, - FLCTL_FLSTEI, FLCTL_FLTENDI, FLCTL_FLTREQ0I, FLCTL_FLTREQ1I, - SDHI0_0, SDHI0_1, SDHI0_2, SDHI0_3, - SDHI1_0, SDHI1_1, SDHI1_2, SDHI1_3, - AP_ARM_L2CINT, - IRDA, - TPU0, - SCIFA6, SCIFA7, - GbEther, - ICBS0, - DDM, - SDHI2_0, SDHI2_1, SDHI2_2, SDHI2_3, - RWDT0, - DMAC1_1_DEI0, DMAC1_1_DEI1, DMAC1_1_DEI2, DMAC1_1_DEI3, - DMAC1_2_DEI4, DMAC1_2_DEI5, DMAC1_2_DADERR, - DMAC2_1_DEI0, DMAC2_1_DEI1, DMAC2_1_DEI2, DMAC2_1_DEI3, - DMAC2_2_DEI4, DMAC2_2_DEI5, DMAC2_2_DADERR, - DMAC3_1_DEI0, DMAC3_1_DEI1, DMAC3_1_DEI2, DMAC3_1_DEI3, - DMAC3_2_DEI4, DMAC3_2_DEI5, DMAC3_2_DADERR, - SHWYSTAT_RT, SHWYSTAT_HS, SHWYSTAT_COM, - HDMI, - USBH_INT, USBH_OHCI, USBH_EHCI, USBH_PME, USBH_BIND, - RSPI_OVRF, RSPI_SPTEF, RSPI_SPRF, - SPU2_0, SPU2_1, - FSI, FMSI, - HDMI_SSS, HDMI_KEY, - IPMMU, - AP_ARM_CTIIRQ, AP_ARM_PMURQ, - MFIS2, - CPORTR2S, - CMT14, CMT15, - MMCIF_0, MMCIF_1, MMCIF_2, - SIM_ERI, SIM_RXI, SIM_TXI, SIM_TEI, - STPRO_0, STPRO_1, STPRO_2, STPRO_3, STPRO_4, - - /* interrupt groups INTCA */ - DMAC1_1, DMAC1_2, - DMAC2_1, DMAC2_2, - DMAC3_1, DMAC3_2, - AP_ARM1, AP_ARM2, - SDHI0, SDHI1, SDHI2, - SHWYSTAT, - USBF, USBH1, USBH2, - RSPI, SPU2, FLCTL, IIC1, -}; - -static struct intc_vect intca_vectors[] __initdata = { - INTC_VECT(DIRC, 0x0560), - INTC_VECT(ATAPI, 0x05E0), - INTC_VECT(IIC1_ALI, 0x0780), - INTC_VECT(IIC1_TACKI, 0x07A0), - INTC_VECT(IIC1_WAITI, 0x07C0), - INTC_VECT(IIC1_DTEI, 0x07E0), - INTC_VECT(AP_ARM_COMMTX, 0x0840), - INTC_VECT(AP_ARM_COMMRX, 0x0860), - INTC_VECT(MFI, 0x0900), - INTC_VECT(MFIS, 0x0920), - INTC_VECT(BBIF1, 0x0940), - INTC_VECT(BBIF2, 0x0960), - INTC_VECT(USBHSDMAC, 0x0A00), - INTC_VECT(USBF_OUL_SOF, 0x0A20), - INTC_VECT(USBF_IXL_INT, 0x0A40), - INTC_VECT(SGX540, 0x0A60), - INTC_VECT(CMT1_0, 0x0B00), - INTC_VECT(CMT1_1, 0x0B20), - INTC_VECT(CMT1_2, 0x0B40), - INTC_VECT(CMT1_3, 0x0B60), - INTC_VECT(CMT2, 0x0B80), - INTC_VECT(CMT3, 0x0BA0), - INTC_VECT(KEYSC, 0x0BE0), - INTC_VECT(SCIFA0, 0x0C00), - INTC_VECT(SCIFA1, 0x0C20), - INTC_VECT(SCIFA2, 0x0C40), - INTC_VECT(SCIFA3, 0x0C60), - INTC_VECT(MSIOF2, 0x0C80), - INTC_VECT(MSIOF1, 0x0D00), - INTC_VECT(SCIFA4, 0x0D20), - INTC_VECT(SCIFA5, 0x0D40), - INTC_VECT(SCIFB, 0x0D60), - INTC_VECT(FLCTL_FLSTEI, 0x0D80), - INTC_VECT(FLCTL_FLTENDI, 0x0DA0), - INTC_VECT(FLCTL_FLTREQ0I, 0x0DC0), - INTC_VECT(FLCTL_FLTREQ1I, 0x0DE0), - INTC_VECT(SDHI0_0, 0x0E00), - INTC_VECT(SDHI0_1, 0x0E20), - INTC_VECT(SDHI0_2, 0x0E40), - INTC_VECT(SDHI0_3, 0x0E60), - INTC_VECT(SDHI1_0, 0x0E80), - INTC_VECT(SDHI1_1, 0x0EA0), - INTC_VECT(SDHI1_2, 0x0EC0), - INTC_VECT(SDHI1_3, 0x0EE0), - INTC_VECT(AP_ARM_L2CINT, 0x0FA0), - INTC_VECT(IRDA, 0x0480), - INTC_VECT(TPU0, 0x04A0), - INTC_VECT(SCIFA6, 0x04C0), - INTC_VECT(SCIFA7, 0x04E0), - INTC_VECT(GbEther, 0x0500), - INTC_VECT(ICBS0, 0x0540), - INTC_VECT(DDM, 0x1140), - INTC_VECT(SDHI2_0, 0x1200), - INTC_VECT(SDHI2_1, 0x1220), - INTC_VECT(SDHI2_2, 0x1240), - INTC_VECT(SDHI2_3, 0x1260), - INTC_VECT(RWDT0, 0x1280), - INTC_VECT(DMAC1_1_DEI0, 0x2000), - INTC_VECT(DMAC1_1_DEI1, 0x2020), - INTC_VECT(DMAC1_1_DEI2, 0x2040), - INTC_VECT(DMAC1_1_DEI3, 0x2060), - INTC_VECT(DMAC1_2_DEI4, 0x2080), - INTC_VECT(DMAC1_2_DEI5, 0x20A0), - INTC_VECT(DMAC1_2_DADERR, 0x20C0), - INTC_VECT(DMAC2_1_DEI0, 0x2100), - INTC_VECT(DMAC2_1_DEI1, 0x2120), - INTC_VECT(DMAC2_1_DEI2, 0x2140), - INTC_VECT(DMAC2_1_DEI3, 0x2160), - INTC_VECT(DMAC2_2_DEI4, 0x2180), - INTC_VECT(DMAC2_2_DEI5, 0x21A0), - INTC_VECT(DMAC2_2_DADERR, 0x21C0), - INTC_VECT(DMAC3_1_DEI0, 0x2200), - INTC_VECT(DMAC3_1_DEI1, 0x2220), - INTC_VECT(DMAC3_1_DEI2, 0x2240), - INTC_VECT(DMAC3_1_DEI3, 0x2260), - INTC_VECT(DMAC3_2_DEI4, 0x2280), - INTC_VECT(DMAC3_2_DEI5, 0x22A0), - INTC_VECT(DMAC3_2_DADERR, 0x22C0), - INTC_VECT(SHWYSTAT_RT, 0x1300), - INTC_VECT(SHWYSTAT_HS, 0x1320), - INTC_VECT(SHWYSTAT_COM, 0x1340), - INTC_VECT(USBH_INT, 0x1540), - INTC_VECT(USBH_OHCI, 0x1560), - INTC_VECT(USBH_EHCI, 0x1580), - INTC_VECT(USBH_PME, 0x15A0), - INTC_VECT(USBH_BIND, 0x15C0), - INTC_VECT(HDMI, 0x1700), - INTC_VECT(RSPI_OVRF, 0x1780), - INTC_VECT(RSPI_SPTEF, 0x17A0), - INTC_VECT(RSPI_SPRF, 0x17C0), - INTC_VECT(SPU2_0, 0x1800), - INTC_VECT(SPU2_1, 0x1820), - INTC_VECT(FSI, 0x1840), - INTC_VECT(FMSI, 0x1860), - INTC_VECT(HDMI_SSS, 0x18A0), - INTC_VECT(HDMI_KEY, 0x18C0), - INTC_VECT(IPMMU, 0x1920), - INTC_VECT(AP_ARM_CTIIRQ, 0x1980), - INTC_VECT(AP_ARM_PMURQ, 0x19A0), - INTC_VECT(MFIS2, 0x1A00), - INTC_VECT(CPORTR2S, 0x1A20), - INTC_VECT(CMT14, 0x1A40), - INTC_VECT(CMT15, 0x1A60), - INTC_VECT(MMCIF_0, 0x1AA0), - INTC_VECT(MMCIF_1, 0x1AC0), - INTC_VECT(MMCIF_2, 0x1AE0), - INTC_VECT(SIM_ERI, 0x1C00), - INTC_VECT(SIM_RXI, 0x1C20), - INTC_VECT(SIM_TXI, 0x1C40), - INTC_VECT(SIM_TEI, 0x1C60), - INTC_VECT(STPRO_0, 0x1C80), - INTC_VECT(STPRO_1, 0x1CA0), - INTC_VECT(STPRO_2, 0x1CC0), - INTC_VECT(STPRO_3, 0x1CE0), - INTC_VECT(STPRO_4, 0x1D00), -}; - -static struct intc_group intca_groups[] __initdata = { - INTC_GROUP(DMAC1_1, - DMAC1_1_DEI0, DMAC1_1_DEI1, DMAC1_1_DEI2, DMAC1_1_DEI3), - INTC_GROUP(DMAC1_2, - DMAC1_2_DEI4, DMAC1_2_DEI5, DMAC1_2_DADERR), - INTC_GROUP(DMAC2_1, - DMAC2_1_DEI0, DMAC2_1_DEI1, DMAC2_1_DEI2, DMAC2_1_DEI3), - INTC_GROUP(DMAC2_2, - DMAC2_2_DEI4, DMAC2_2_DEI5, DMAC2_2_DADERR), - INTC_GROUP(DMAC3_1, - DMAC3_1_DEI0, DMAC3_1_DEI1, DMAC3_1_DEI2, DMAC3_1_DEI3), - INTC_GROUP(DMAC3_2, - DMAC3_2_DEI4, DMAC3_2_DEI5, DMAC3_2_DADERR), - INTC_GROUP(AP_ARM1, - AP_ARM_COMMTX, AP_ARM_COMMRX), - INTC_GROUP(AP_ARM2, - AP_ARM_CTIIRQ, AP_ARM_PMURQ), - INTC_GROUP(USBF, - USBF_OUL_SOF, USBF_IXL_INT), - INTC_GROUP(SDHI0, - SDHI0_0, SDHI0_1, SDHI0_2, SDHI0_3), - INTC_GROUP(SDHI1, - SDHI1_0, SDHI1_1, SDHI1_2, SDHI1_3), - INTC_GROUP(SDHI2, - SDHI2_0, SDHI2_1, SDHI2_2, SDHI2_3), - INTC_GROUP(SHWYSTAT, - SHWYSTAT_RT, SHWYSTAT_HS, SHWYSTAT_COM), - INTC_GROUP(USBH1, /* FIXME */ - USBH_INT, USBH_OHCI), - INTC_GROUP(USBH2, /* FIXME */ - USBH_EHCI, - USBH_PME, USBH_BIND), - INTC_GROUP(RSPI, - RSPI_OVRF, RSPI_SPTEF, RSPI_SPRF), - INTC_GROUP(SPU2, - SPU2_0, SPU2_1), - INTC_GROUP(FLCTL, - FLCTL_FLSTEI, FLCTL_FLTENDI, FLCTL_FLTREQ0I, FLCTL_FLTREQ1I), - INTC_GROUP(IIC1, - IIC1_ALI, IIC1_TACKI, IIC1_WAITI, IIC1_DTEI), -}; - -static struct intc_mask_reg intca_mask_registers[] __initdata = { - { /* IMR0A / IMCR0A */ 0xe6940080, 0xe69400c0, 8, - { DMAC2_1_DEI3, DMAC2_1_DEI2, DMAC2_1_DEI1, DMAC2_1_DEI0, - 0, 0, AP_ARM_COMMTX, AP_ARM_COMMRX } }, - { /* IMR1A / IMCR1A */ 0xe6940084, 0xe69400c4, 8, - { ATAPI, 0, DIRC, 0, - DMAC1_1_DEI3, DMAC1_1_DEI2, DMAC1_1_DEI1, DMAC1_1_DEI0 } }, - { /* IMR2A / IMCR2A */ 0xe6940088, 0xe69400c8, 8, - { 0, 0, 0, 0, - BBIF1, BBIF2, MFIS, MFI } }, - { /* IMR3A / IMCR3A */ 0xe694008c, 0xe69400cc, 8, - { DMAC3_1_DEI3, DMAC3_1_DEI2, DMAC3_1_DEI1, DMAC3_1_DEI0, - DMAC3_2_DADERR, DMAC3_2_DEI5, DMAC3_2_DEI4, IRDA } }, - { /* IMR4A / IMCR4A */ 0xe6940090, 0xe69400d0, 8, - { DDM, 0, 0, 0, - 0, 0, 0, 0 } }, - { /* IMR5A / IMCR5A */ 0xe6940094, 0xe69400d4, 8, - { KEYSC, DMAC1_2_DADERR, DMAC1_2_DEI5, DMAC1_2_DEI4, - SCIFA3, SCIFA2, SCIFA1, SCIFA0 } }, - { /* IMR6A / IMCR6A */ 0xe6940098, 0xe69400d8, 8, - { SCIFB, SCIFA5, SCIFA4, MSIOF1, - 0, 0, MSIOF2, 0 } }, - { /* IMR7A / IMCR7A */ 0xe694009c, 0xe69400dc, 8, - { SDHI0_3, SDHI0_2, SDHI0_1, SDHI0_0, - FLCTL_FLTREQ1I, FLCTL_FLTREQ0I, FLCTL_FLTENDI, FLCTL_FLSTEI } }, - { /* IMR8A / IMCR8A */ 0xe69400a0, 0xe69400e0, 8, - { SDHI1_3, SDHI1_2, SDHI1_1, SDHI1_0, - 0, USBHSDMAC, 0, AP_ARM_L2CINT } }, - { /* IMR9A / IMCR9A */ 0xe69400a4, 0xe69400e4, 8, - { CMT1_3, CMT1_2, CMT1_1, CMT1_0, - CMT2, USBF_IXL_INT, USBF_OUL_SOF, SGX540 } }, - { /* IMR10A / IMCR10A */ 0xe69400a8, 0xe69400e8, 8, - { 0, DMAC2_2_DADERR, DMAC2_2_DEI5, DMAC2_2_DEI4, - 0, 0, 0, 0 } }, - { /* IMR11A / IMCR11A */ 0xe69400ac, 0xe69400ec, 8, - { IIC1_DTEI, IIC1_WAITI, IIC1_TACKI, IIC1_ALI, - ICBS0, 0, 0, 0 } }, - { /* IMR12A / IMCR12A */ 0xe69400b0, 0xe69400f0, 8, - { 0, 0, TPU0, SCIFA6, - SCIFA7, GbEther, 0, 0 } }, - { /* IMR13A / IMCR13A */ 0xe69400b4, 0xe69400f4, 8, - { SDHI2_3, SDHI2_2, SDHI2_1, SDHI2_0, - 0, CMT3, 0, RWDT0 } }, - { /* IMR0A3 / IMCR0A3 */ 0xe6950080, 0xe69500c0, 8, - { SHWYSTAT_RT, SHWYSTAT_HS, SHWYSTAT_COM, 0, - 0, 0, 0, 0 } }, - /* IMR1A3 / IMCR1A3 */ - { /* IMR2A3 / IMCR2A3 */ 0xe6950088, 0xe69500c8, 8, - { 0, 0, USBH_INT, USBH_OHCI, - USBH_EHCI, USBH_PME, USBH_BIND, 0 } }, - /* IMR3A3 / IMCR3A3 */ - { /* IMR4A3 / IMCR4A3 */ 0xe6950090, 0xe69500d0, 8, - { HDMI, 0, 0, 0, - RSPI_OVRF, RSPI_SPTEF, RSPI_SPRF, 0 } }, - { /* IMR5A3 / IMCR5A3 */ 0xe6950094, 0xe69500d4, 8, - { SPU2_0, SPU2_1, FSI, FMSI, - 0, HDMI_SSS, HDMI_KEY, 0 } }, - { /* IMR6A3 / IMCR6A3 */ 0xe6950098, 0xe69500d8, 8, - { 0, IPMMU, 0, 0, - AP_ARM_CTIIRQ, AP_ARM_PMURQ, 0, 0 } }, - { /* IMR7A3 / IMCR7A3 */ 0xe695009c, 0xe69500dc, 8, - { MFIS2, CPORTR2S, CMT14, CMT15, - 0, MMCIF_0, MMCIF_1, MMCIF_2 } }, - /* IMR8A3 / IMCR8A3 */ - { /* IMR9A3 / IMCR9A3 */ 0xe69500a4, 0xe69500e4, 8, - { SIM_ERI, SIM_RXI, SIM_TXI, SIM_TEI, - STPRO_0, STPRO_1, STPRO_2, STPRO_3 } }, - { /* IMR10A3 / IMCR10A3 */ 0xe69500a8, 0xe69500e8, 8, - { STPRO_4, 0, 0, 0, - 0, 0, 0, 0 } }, -}; - -static struct intc_prio_reg intca_prio_registers[] __initdata = { - { 0xe6940000, 0, 16, 4, /* IPRAA */ { DMAC3_1, DMAC3_2, CMT2, ICBS0 } }, - { 0xe6940004, 0, 16, 4, /* IPRBA */ { IRDA, 0, BBIF1, BBIF2 } }, - { 0xe6940008, 0, 16, 4, /* IPRCA */ { ATAPI, 0, CMT1_1, AP_ARM1 } }, - { 0xe694000c, 0, 16, 4, /* IPRDA */ { 0, 0, CMT1_2, 0 } }, - { 0xe6940010, 0, 16, 4, /* IPREA */ { DMAC1_1, MFIS, MFI, USBF } }, - { 0xe6940014, 0, 16, 4, /* IPRFA */ { KEYSC, DMAC1_2, - SGX540, CMT1_0 } }, - { 0xe6940018, 0, 16, 4, /* IPRGA */ { SCIFA0, SCIFA1, - SCIFA2, SCIFA3 } }, - { 0xe694001c, 0, 16, 4, /* IPRGH */ { MSIOF2, USBHSDMAC, - FLCTL, SDHI0 } }, - { 0xe6940020, 0, 16, 4, /* IPRIA */ { MSIOF1, SCIFA4, 0, IIC1 } }, - { 0xe6940024, 0, 16, 4, /* IPRJA */ { DMAC2_1, DMAC2_2, - AP_ARM_L2CINT, 0 } }, - { 0xe6940028, 0, 16, 4, /* IPRKA */ { 0, CMT1_3, 0, SDHI1 } }, - { 0xe694002c, 0, 16, 4, /* IPRLA */ { TPU0, SCIFA6, - SCIFA7, GbEther } }, - { 0xe6940030, 0, 16, 4, /* IPRMA */ { 0, CMT3, 0, RWDT0 } }, - { 0xe6940034, 0, 16, 4, /* IPRNA */ { SCIFB, SCIFA5, 0, DDM } }, - { 0xe6940038, 0, 16, 4, /* IPROA */ { 0, 0, DIRC, SDHI2 } }, - { 0xe6950000, 0, 16, 4, /* IPRAA3 */ { SHWYSTAT, 0, 0, 0 } }, - /* IPRBA3 */ - /* IPRCA3 */ - /* IPRDA3 */ - { 0xe6950010, 0, 16, 4, /* IPREA3 */ { USBH1, 0, 0, 0 } }, - { 0xe6950014, 0, 16, 4, /* IPRFA3 */ { USBH2, 0, 0, 0 } }, - /* IPRGA3 */ - /* IPRHA3 */ - { 0xe6950020, 0, 16, 4, /* IPRIA3 */ { HDMI, 0, 0, 0 } }, - { 0xe6950024, 0, 16, 4, /* IPRJA3 */ { RSPI, 0, 0, 0 } }, - { 0xe6950028, 0, 16, 4, /* IPRKA3 */ { SPU2, 0, FSI, FMSI } }, - { 0xe695002c, 0, 16, 4, /* IPRLA3 */ { 0, HDMI_SSS, HDMI_KEY, 0 } }, - { 0xe6950030, 0, 16, 4, /* IPRMA3 */ { IPMMU, 0, 0, 0 } }, - { 0xe6950034, 0, 16, 4, /* IPRNA3 */ { AP_ARM2, 0, 0, 0 } }, - { 0xe6950038, 0, 16, 4, /* IPROA3 */ { MFIS2, CPORTR2S, - CMT14, CMT15 } }, - { 0xe695003c, 0, 16, 4, /* IPRPA3 */ { 0, MMCIF_0, MMCIF_1, MMCIF_2 } }, - /* IPRQA3 */ - /* IPRRA3 */ - { 0xe6950048, 0, 16, 4, /* IPRSA3 */ { SIM_ERI, SIM_RXI, - SIM_TXI, SIM_TEI } }, - { 0xe695004c, 0, 16, 4, /* IPRTA3 */ { STPRO_0, STPRO_1, - STPRO_2, STPRO_3 } }, - { 0xe6950050, 0, 16, 4, /* IPRUA3 */ { STPRO_4, 0, 0, 0 } }, -}; - -static DECLARE_INTC_DESC(intca_desc, "r8a7740-intca", - intca_vectors, intca_groups, - intca_mask_registers, intca_prio_registers, - NULL); - -INTC_IRQ_PINS_32(intca_irq_pins, 0xe6900000, - INTC_VECT, "r8a7740-intca-irq-pins"); - - -/* - * INTCS - */ -enum { - UNUSED_INTCS = 0, - - INTCS, - - /* interrupt sources INTCS */ - - /* HUDI */ - /* STPRO */ - /* RTDMAC(1) */ - VPU5HA2, - _2DG_TRAP, _2DG_GPM_INT, _2DG_CER_INT, - /* MFI */ - /* BBIF2 */ - VPU5F, - _2DG_BRK_INT, - /* SGX540 */ - /* 2DDMAC */ - /* IPMMU */ - /* RTDMAC 2 */ - /* KEYSC */ - /* MSIOF */ - IIC0_ALI, IIC0_TACKI, IIC0_WAITI, IIC0_DTEI, - TMU0_0, TMU0_1, TMU0_2, - CMT0, - /* CMT2 */ - LMB, - CTI, - VOU, - /* RWDT0 */ - ICB, - VIO6C, - CEU20, CEU21, - JPU, - LCDC0, - LCRC, - /* RTDMAC2(1) */ - /* RTDMAC2(2) */ - LCDC1, - /* SPU2 */ - /* FSI */ - /* FMSI */ - TMU1_0, TMU1_1, TMU1_2, - CMT4, - DISP, - DSRV, - /* MFIS2 */ - CPORTS2R, - - /* interrupt groups INTCS */ - _2DG1, - IIC0, TMU1, -}; - -static struct intc_vect intcs_vectors[] = { - /* HUDI */ - /* STPRO */ - /* RTDMAC(1) */ - INTCS_VECT(VPU5HA2, 0x0880), - INTCS_VECT(_2DG_TRAP, 0x08A0), - INTCS_VECT(_2DG_GPM_INT, 0x08C0), - INTCS_VECT(_2DG_CER_INT, 0x08E0), - /* MFI */ - /* BBIF2 */ - INTCS_VECT(VPU5F, 0x0980), - INTCS_VECT(_2DG_BRK_INT, 0x09A0), - /* SGX540 */ - /* 2DDMAC */ - /* IPMMU */ - /* RTDMAC(2) */ - /* KEYSC */ - /* MSIOF */ - INTCS_VECT(IIC0_ALI, 0x0E00), - INTCS_VECT(IIC0_TACKI, 0x0E20), - INTCS_VECT(IIC0_WAITI, 0x0E40), - INTCS_VECT(IIC0_DTEI, 0x0E60), - INTCS_VECT(TMU0_0, 0x0E80), - INTCS_VECT(TMU0_1, 0x0EA0), - INTCS_VECT(TMU0_2, 0x0EC0), - INTCS_VECT(CMT0, 0x0F00), - /* CMT2 */ - INTCS_VECT(LMB, 0x0F60), - INTCS_VECT(CTI, 0x0400), - INTCS_VECT(VOU, 0x0420), - /* RWDT0 */ - INTCS_VECT(ICB, 0x0480), - INTCS_VECT(VIO6C, 0x04E0), - INTCS_VECT(CEU20, 0x0500), - INTCS_VECT(CEU21, 0x0520), - INTCS_VECT(JPU, 0x0560), - INTCS_VECT(LCDC0, 0x0580), - INTCS_VECT(LCRC, 0x05A0), - /* RTDMAC2(1) */ - /* RTDMAC2(2) */ - INTCS_VECT(LCDC1, 0x1780), - /* SPU2 */ - /* FSI */ - /* FMSI */ - INTCS_VECT(TMU1_0, 0x1900), - INTCS_VECT(TMU1_1, 0x1920), - INTCS_VECT(TMU1_2, 0x1940), - INTCS_VECT(CMT4, 0x1980), - INTCS_VECT(DISP, 0x19A0), - INTCS_VECT(DSRV, 0x19C0), - /* MFIS2 */ - INTCS_VECT(CPORTS2R, 0x1A20), - - INTC_VECT(INTCS, 0xf80), -}; - -static struct intc_group intcs_groups[] __initdata = { - INTC_GROUP(_2DG1, /*FIXME*/ - _2DG_CER_INT, _2DG_GPM_INT, _2DG_TRAP), - INTC_GROUP(IIC0, - IIC0_DTEI, IIC0_WAITI, IIC0_TACKI, IIC0_ALI), - INTC_GROUP(TMU1, - TMU1_0, TMU1_1, TMU1_2), -}; - -static struct intc_mask_reg intcs_mask_registers[] = { - /* IMR0SA / IMCR0SA */ /* all 0 */ - { /* IMR1SA / IMCR1SA */ 0xffd20184, 0xffd201c4, 8, - { _2DG_CER_INT, _2DG_GPM_INT, _2DG_TRAP, VPU5HA2, - 0, 0, 0, 0 /*STPRO*/ } }, - { /* IMR2SA / IMCR2SA */ 0xffd20188, 0xffd201c8, 8, - { 0/*STPRO*/, 0, CEU21, VPU5F, - 0/*BBIF2*/, 0, 0, 0/*MFI*/ } }, - { /* IMR3SA / IMCR3SA */ 0xffd2018c, 0xffd201cc, 8, - { 0, 0, 0, 0, /*2DDMAC*/ - VIO6C, 0, 0, ICB } }, - { /* IMR4SA / IMCR4SA */ 0xffd20190, 0xffd201d0, 8, - { 0, 0, VOU, CTI, - JPU, 0, LCRC, LCDC0 } }, - /* IMR5SA / IMCR5SA */ /*KEYSC/RTDMAC2/RTDMAC1*/ - /* IMR6SA / IMCR6SA */ /*MSIOF/SGX540*/ - { /* IMR7SA / IMCR7SA */ 0xffd2019c, 0xffd201dc, 8, - { 0, TMU0_2, TMU0_1, TMU0_0, - 0, 0, 0, 0 } }, - { /* IMR8SA / IMCR8SA */ 0xffd201a0, 0xffd201e0, 8, - { 0, 0, 0, 0, - CEU20, 0, 0, 0 } }, - { /* IMR9SA / IMCR9SA */ 0xffd201a4, 0xffd201e4, 8, - { 0, 0/*RWDT0*/, 0/*CMT2*/, CMT0, - 0, 0, 0, 0 } }, - /* IMR10SA / IMCR10SA */ /*IPMMU*/ - { /* IMR11SA / IMCR11SA */ 0xffd201ac, 0xffd201ec, 8, - { IIC0_DTEI, IIC0_WAITI, IIC0_TACKI, IIC0_ALI, - 0, _2DG_BRK_INT, LMB, 0 } }, - /* IMR12SA / IMCR12SA */ - /* IMR13SA / IMCR13SA */ - /* IMR0SA3 / IMCR0SA3 */ /*RTDMAC2(1)/RTDMAC2(2)*/ - /* IMR1SA3 / IMCR1SA3 */ - /* IMR2SA3 / IMCR2SA3 */ - /* IMR3SA3 / IMCR3SA3 */ - { /* IMR4SA3 / IMCR4SA3 */ 0xffd50190, 0xffd501d0, 8, - { 0, 0, 0, 0, - LCDC1, 0, 0, 0 } }, - /* IMR5SA3 / IMCR5SA3 */ /* SPU2/FSI/FMSI */ - { /* IMR6SA3 / IMCR6SA3 */ 0xffd50198, 0xffd501d8, 8, - { TMU1_0, TMU1_1, TMU1_2, 0, - CMT4, DISP, DSRV, 0 } }, - { /* IMR7SA3 / IMCR7SA3 */ 0xffd5019c, 0xffd501dc, 8, - { 0/*MFIS2*/, CPORTS2R, 0, 0, - 0, 0, 0, 0 } }, - { /* INTAMASK */ 0xffd20104, 0, 16, - { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, INTCS } }, -}; - -/* Priority is needed for INTCA to receive the INTCS interrupt */ -static struct intc_prio_reg intcs_prio_registers[] = { - { 0xffd20000, 0, 16, 4, /* IPRAS */ { CTI, VOU, 0/*2DDMAC*/, ICB } }, - { 0xffd20004, 0, 16, 4, /* IPRBS */ { JPU, LCDC0, 0, LCRC } }, - /* IPRCS */ /*BBIF2*/ - /* IPRDS */ - { 0xffd20010, 0, 16, 4, /* IPRES */ { 0/*RTDMAC(1)*/, VPU5HA2, - 0/*MFI*/, VPU5F } }, - { 0xffd20014, 0, 16, 4, /* IPRFS */ { 0/*KEYSC*/, 0/*RTDMAC(2)*/, - 0/*CMT2*/, CMT0 } }, - { 0xffd20018, 0, 16, 4, /* IPRGS */ { TMU0_0, TMU0_1, - TMU0_2, _2DG1 } }, - { 0xffd2001c, 0, 16, 4, /* IPRHS */ { 0, 0/*STPRO*/, 0/*STPRO*/, - _2DG_BRK_INT/*FIXME*/ } }, - { 0xffd20020, 0, 16, 4, /* IPRIS */ { 0, 0/*MSIOF*/, 0, IIC0 } }, - { 0xffd20024, 0, 16, 4, /* IPRJS */ { CEU20, 0/*SGX540*/, 0, 0 } }, - { 0xffd20028, 0, 16, 4, /* IPRKS */ { VIO6C, 0, LMB, 0 } }, - { 0xffd2002c, 0, 16, 4, /* IPRLS */ { 0/*IPMMU*/, 0, CEU21, 0 } }, - /* IPRMS */ /*RWDT0*/ - /* IPRAS3 */ /*RTDMAC2(1)*/ - /* IPRBS3 */ /*RTDMAC2(2)*/ - /* IPRCS3 */ - /* IPRDS3 */ - /* IPRES3 */ - /* IPRFS3 */ - /* IPRGS3 */ - /* IPRHS3 */ - /* IPRIS3 */ - { 0xffd50024, 0, 16, 4, /* IPRJS3 */ { LCDC1, 0, 0, 0 } }, - /* IPRKS3 */ /*SPU2/FSI/FMSi*/ - /* IPRLS3 */ - { 0xffd50030, 0, 16, 4, /* IPRMS3 */ { TMU1, 0, 0, 0 } }, - { 0xffd50034, 0, 16, 4, /* IPRNS3 */ { CMT4, DISP, DSRV, 0 } }, - { 0xffd50038, 0, 16, 4, /* IPROS3 */ { 0/*MFIS2*/, CPORTS2R, 0, 0 } }, - /* IPRPS3 */ -}; - -static struct resource intcs_resources[] __initdata = { - [0] = { - .start = 0xffd20000, - .end = 0xffd201ff, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = 0xffd50000, - .end = 0xffd501ff, - .flags = IORESOURCE_MEM, - } -}; - -static struct intc_desc intcs_desc __initdata = { - .name = "r8a7740-intcs", - .resource = intcs_resources, - .num_resources = ARRAY_SIZE(intcs_resources), - .hw = INTC_HW_DESC(intcs_vectors, intcs_groups, intcs_mask_registers, - intcs_prio_registers, NULL, NULL), -}; - -static void intcs_demux(unsigned int irq, struct irq_desc *desc) -{ - void __iomem *reg = (void *)irq_get_handler_data(irq); - unsigned int evtcodeas = ioread32(reg); - - generic_handle_irq(intcs_evt2irq(evtcodeas)); -} +#include void __init r8a7740_init_irq(void) { - void __iomem *intevtsa = ioremap_nocache(0xffd20100, PAGE_SIZE); - - register_intc_controller(&intca_desc); - register_intc_controller(&intca_irq_pins_desc); - register_intc_controller(&intcs_desc); - - /* demux using INTEVTSA */ - irq_set_handler_data(evt2irq(0xf80), (void *)intevtsa); - irq_set_chained_handler(evt2irq(0xf80), intcs_demux); + void __iomem *gic_dist_base = ioremap_nocache(0xc2800000, 0x1000); + void __iomem *gic_cpu_base = ioremap_nocache(0xc2000000, 0x1000); + void __iomem *intc_prio_base = ioremap_nocache(0xe6900010, 0x10); + void __iomem *intc_msk_base = ioremap_nocache(0xe6900040, 0x10); + void __iomem *pfc_inta_ctrl = ioremap_nocache(0xe605807c, 0x4); + + /* initialize the Generic Interrupt Controller PL390 r0p0 */ + gic_init(0, 29, gic_dist_base, gic_cpu_base); + + /* route signals to GIC */ + iowrite32(0x0, pfc_inta_ctrl); + + /* + * To mask the shared interrupt to SPI 149 we must ensure to set + * PRIO *and* MASK. Else we run into IRQ floods when registering + * the intc_irqpin devices + */ + iowrite32(0x0, intc_prio_base + 0x0); + iowrite32(0x0, intc_prio_base + 0x4); + iowrite32(0x0, intc_prio_base + 0x8); + iowrite32(0x0, intc_prio_base + 0xc); + iowrite8(0xff, intc_msk_base + 0x0); + iowrite8(0xff, intc_msk_base + 0x4); + iowrite8(0xff, intc_msk_base + 0x8); + iowrite8(0xff, intc_msk_base + 0xc); + + iounmap(intc_prio_base); + iounmap(intc_msk_base); + iounmap(pfc_inta_ctrl); } diff --git a/arch/arm/mach-shmobile/setup-r8a7740.c b/arch/arm/mach-shmobile/setup-r8a7740.c index 8b85d4d8fab6..228d7aba4a7c 100644 --- a/arch/arm/mach-shmobile/setup-r8a7740.c +++ b/arch/arm/mach-shmobile/setup-r8a7740.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -94,6 +95,126 @@ void __init r8a7740_pinmux_init(void) platform_device_register(&r8a7740_pfc_device); } +static struct renesas_intc_irqpin_config irqpin0_platform_data = { + .irq_base = irq_pin(0), /* IRQ0 -> IRQ7 */ +}; + +static struct resource irqpin0_resources[] = { + DEFINE_RES_MEM(0xe6900000, 4), /* ICR1A */ + DEFINE_RES_MEM(0xe6900010, 4), /* INTPRI00A */ + DEFINE_RES_MEM(0xe6900020, 1), /* INTREQ00A */ + DEFINE_RES_MEM(0xe6900040, 1), /* INTMSK00A */ + DEFINE_RES_MEM(0xe6900060, 1), /* INTMSKCLR00A */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ0 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ1 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ2 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ3 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ4 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ5 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ6 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ7 */ +}; + +static struct platform_device irqpin0_device = { + .name = "renesas_intc_irqpin", + .id = 0, + .resource = irqpin0_resources, + .num_resources = ARRAY_SIZE(irqpin0_resources), + .dev = { + .platform_data = &irqpin0_platform_data, + }, +}; + +static struct renesas_intc_irqpin_config irqpin1_platform_data = { + .irq_base = irq_pin(8), /* IRQ8 -> IRQ15 */ +}; + +static struct resource irqpin1_resources[] = { + DEFINE_RES_MEM(0xe6900004, 4), /* ICR2A */ + DEFINE_RES_MEM(0xe6900014, 4), /* INTPRI10A */ + DEFINE_RES_MEM(0xe6900024, 1), /* INTREQ10A */ + DEFINE_RES_MEM(0xe6900044, 1), /* INTMSK10A */ + DEFINE_RES_MEM(0xe6900064, 1), /* INTMSKCLR10A */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ8 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ9 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ10 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ11 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ12 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ13 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ14 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ15 */ +}; + +static struct platform_device irqpin1_device = { + .name = "renesas_intc_irqpin", + .id = 1, + .resource = irqpin1_resources, + .num_resources = ARRAY_SIZE(irqpin1_resources), + .dev = { + .platform_data = &irqpin1_platform_data, + }, +}; + +static struct renesas_intc_irqpin_config irqpin2_platform_data = { + .irq_base = irq_pin(16), /* IRQ16 -> IRQ23 */ +}; + +static struct resource irqpin2_resources[] = { + DEFINE_RES_MEM(0xe6900008, 4), /* ICR3A */ + DEFINE_RES_MEM(0xe6900018, 4), /* INTPRI30A */ + DEFINE_RES_MEM(0xe6900028, 1), /* INTREQ30A */ + DEFINE_RES_MEM(0xe6900048, 1), /* INTMSK30A */ + DEFINE_RES_MEM(0xe6900068, 1), /* INTMSKCLR30A */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ16 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ17 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ18 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ19 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ20 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ21 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ22 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ23 */ +}; + +static struct platform_device irqpin2_device = { + .name = "renesas_intc_irqpin", + .id = 2, + .resource = irqpin2_resources, + .num_resources = ARRAY_SIZE(irqpin2_resources), + .dev = { + .platform_data = &irqpin2_platform_data, + }, +}; + +static struct renesas_intc_irqpin_config irqpin3_platform_data = { + .irq_base = irq_pin(24), /* IRQ24 -> IRQ31 */ +}; + +static struct resource irqpin3_resources[] = { + DEFINE_RES_MEM(0xe690000c, 4), /* ICR3A */ + DEFINE_RES_MEM(0xe690001c, 4), /* INTPRI30A */ + DEFINE_RES_MEM(0xe690002c, 1), /* INTREQ30A */ + DEFINE_RES_MEM(0xe690004c, 1), /* INTMSK30A */ + DEFINE_RES_MEM(0xe690006c, 1), /* INTMSKCLR30A */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ24 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ25 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ26 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ27 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ28 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ29 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ30 */ + DEFINE_RES_IRQ(gic_spi(149)), /* IRQ31 */ +}; + +static struct platform_device irqpin3_device = { + .name = "renesas_intc_irqpin", + .id = 3, + .resource = irqpin3_resources, + .num_resources = ARRAY_SIZE(irqpin3_resources), + .dev = { + .platform_data = &irqpin3_platform_data, + }, +}; + /* SCIFA0 */ static struct plat_sci_port scif0_platform_data = { .mapbase = 0xe6c40000, @@ -101,7 +222,7 @@ static struct plat_sci_port scif0_platform_data = { .scscr = SCSCR_RE | SCSCR_TE, .scbrr_algo_id = SCBRR_ALGO_4, .type = PORT_SCIFA, - .irqs = SCIx_IRQ_MUXED(evt2irq(0x0c00)), + .irqs = SCIx_IRQ_MUXED(gic_spi(100)), }; static struct platform_device scif0_device = { @@ -119,7 +240,7 @@ static struct plat_sci_port scif1_platform_data = { .scscr = SCSCR_RE | SCSCR_TE, .scbrr_algo_id = SCBRR_ALGO_4, .type = PORT_SCIFA, - .irqs = SCIx_IRQ_MUXED(evt2irq(0x0c20)), + .irqs = SCIx_IRQ_MUXED(gic_spi(101)), }; static struct platform_device scif1_device = { @@ -137,7 +258,7 @@ static struct plat_sci_port scif2_platform_data = { .scscr = SCSCR_RE | SCSCR_TE, .scbrr_algo_id = SCBRR_ALGO_4, .type = PORT_SCIFA, - .irqs = SCIx_IRQ_MUXED(evt2irq(0x0c40)), + .irqs = SCIx_IRQ_MUXED(gic_spi(102)), }; static struct platform_device scif2_device = { @@ -155,7 +276,7 @@ static struct plat_sci_port scif3_platform_data = { .scscr = SCSCR_RE | SCSCR_TE, .scbrr_algo_id = SCBRR_ALGO_4, .type = PORT_SCIFA, - .irqs = SCIx_IRQ_MUXED(evt2irq(0x0c60)), + .irqs = SCIx_IRQ_MUXED(gic_spi(103)), }; static struct platform_device scif3_device = { @@ -173,7 +294,7 @@ static struct plat_sci_port scif4_platform_data = { .scscr = SCSCR_RE | SCSCR_TE, .scbrr_algo_id = SCBRR_ALGO_4, .type = PORT_SCIFA, - .irqs = SCIx_IRQ_MUXED(evt2irq(0x0d20)), + .irqs = SCIx_IRQ_MUXED(gic_spi(104)), }; static struct platform_device scif4_device = { @@ -191,7 +312,7 @@ static struct plat_sci_port scif5_platform_data = { .scscr = SCSCR_RE | SCSCR_TE, .scbrr_algo_id = SCBRR_ALGO_4, .type = PORT_SCIFA, - .irqs = SCIx_IRQ_MUXED(evt2irq(0x0d40)), + .irqs = SCIx_IRQ_MUXED(gic_spi(105)), }; static struct platform_device scif5_device = { @@ -209,7 +330,7 @@ static struct plat_sci_port scif6_platform_data = { .scscr = SCSCR_RE | SCSCR_TE, .scbrr_algo_id = SCBRR_ALGO_4, .type = PORT_SCIFA, - .irqs = SCIx_IRQ_MUXED(evt2irq(0x04c0)), + .irqs = SCIx_IRQ_MUXED(gic_spi(106)), }; static struct platform_device scif6_device = { @@ -227,7 +348,7 @@ static struct plat_sci_port scif7_platform_data = { .scscr = SCSCR_RE | SCSCR_TE, .scbrr_algo_id = SCBRR_ALGO_4, .type = PORT_SCIFA, - .irqs = SCIx_IRQ_MUXED(evt2irq(0x04e0)), + .irqs = SCIx_IRQ_MUXED(gic_spi(107)), }; static struct platform_device scif7_device = { @@ -245,7 +366,7 @@ static struct plat_sci_port scifb_platform_data = { .scscr = SCSCR_RE | SCSCR_TE, .scbrr_algo_id = SCBRR_ALGO_4, .type = PORT_SCIFB, - .irqs = SCIx_IRQ_MUXED(evt2irq(0x0d60)), + .irqs = SCIx_IRQ_MUXED(gic_spi(108)), }; static struct platform_device scifb_device = { @@ -273,7 +394,7 @@ static struct resource cmt10_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = evt2irq(0x0b00), + .start = gic_spi(58), .flags = IORESOURCE_IRQ, }, }; @@ -304,7 +425,7 @@ static struct resource tmu00_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = intcs_evt2irq(0xe80), + .start = gic_spi(198), .flags = IORESOURCE_IRQ, }, }; @@ -334,7 +455,7 @@ static struct resource tmu01_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = intcs_evt2irq(0xea0), + .start = gic_spi(199), .flags = IORESOURCE_IRQ, }, }; @@ -364,7 +485,7 @@ static struct resource tmu02_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = intcs_evt2irq(0xec0), + .start = gic_spi(200), .flags = IORESOURCE_IRQ, }, }; @@ -411,6 +532,10 @@ static struct platform_device ipmmu_device = { }; static struct platform_device *r8a7740_early_devices[] __initdata = { + &irqpin0_device, + &irqpin1_device, + &irqpin2_device, + &irqpin3_device, &scif0_device, &scif1_device, &scif2_device, @@ -525,14 +650,14 @@ static struct resource r8a7740_dmae0_resources[] = { }, { .name = "error_irq", - .start = evt2irq(0x20c0), - .end = evt2irq(0x20c0), + .start = gic_spi(34), + .end = gic_spi(34), .flags = IORESOURCE_IRQ, }, { /* IRQ for channels 0-5 */ - .start = evt2irq(0x2000), - .end = evt2irq(0x20a0), + .start = gic_spi(28), + .end = gic_spi(33), .flags = IORESOURCE_IRQ, }, }; @@ -553,14 +678,14 @@ static struct resource r8a7740_dmae1_resources[] = { }, { .name = "error_irq", - .start = evt2irq(0x21c0), - .end = evt2irq(0x21c0), + .start = gic_spi(41), + .end = gic_spi(41), .flags = IORESOURCE_IRQ, }, { /* IRQ for channels 0-5 */ - .start = evt2irq(0x2100), - .end = evt2irq(0x21a0), + .start = gic_spi(35), + .end = gic_spi(40), .flags = IORESOURCE_IRQ, }, }; @@ -581,14 +706,14 @@ static struct resource r8a7740_dmae2_resources[] = { }, { .name = "error_irq", - .start = evt2irq(0x22c0), - .end = evt2irq(0x22c0), + .start = gic_spi(48), + .end = gic_spi(48), .flags = IORESOURCE_IRQ, }, { /* IRQ for channels 0-5 */ - .start = evt2irq(0x2200), - .end = evt2irq(0x22a0), + .start = gic_spi(42), + .end = gic_spi(47), .flags = IORESOURCE_IRQ, }, }; @@ -677,8 +802,8 @@ static struct resource r8a7740_usb_dma_resources[] = { }, { /* IRQ for channels */ - .start = evt2irq(0x0a00), - .end = evt2irq(0x0a00), + .start = gic_spi(49), + .end = gic_spi(49), .flags = IORESOURCE_IRQ, }, }; @@ -702,8 +827,8 @@ static struct resource i2c0_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = intcs_evt2irq(0xe00), - .end = intcs_evt2irq(0xe60), + .start = gic_spi(201), + .end = gic_spi(204), .flags = IORESOURCE_IRQ, }, }; @@ -716,8 +841,8 @@ static struct resource i2c1_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = evt2irq(0x780), /* IIC1_ALI1 */ - .end = evt2irq(0x7e0), /* IIC1_DTEI1 */ + .start = gic_spi(70), /* IIC1_ALI1 */ + .end = gic_spi(73), /* IIC1_DTEI1 */ .flags = IORESOURCE_IRQ, }, }; @@ -738,8 +863,8 @@ static struct platform_device i2c1_device = { static struct resource pmu_resources[] = { [0] = { - .start = evt2irq(0x19a0), - .end = evt2irq(0x19a0), + .start = gic_spi(83), + .end = gic_spi(83), .flags = IORESOURCE_IRQ, }, }; @@ -904,7 +1029,6 @@ DT_MACHINE_START(R8A7740_DT, "Generic R8A7740 (Flattened Device Tree)") .map_io = r8a7740_map_io, .init_early = r8a7740_add_early_devices_dt, .init_irq = r8a7740_init_irq, - .handle_irq = shmobile_handle_irq_intc, .init_machine = r8a7740_add_standard_devices_dt, .init_time = shmobile_timer_init, .dt_compat = r8a7740_boards_compat_dt, diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7740.c b/drivers/pinctrl/sh-pfc/pfc-r8a7740.c index 214788c4a606..2b528280e3c1 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7740.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7740.c @@ -2545,38 +2545,38 @@ static struct pinmux_data_reg pinmux_data_regs[] = { }; static struct pinmux_irq pinmux_irqs[] = { - PINMUX_IRQ(evt2irq(0x0200), PORT2_FN0, PORT13_FN0), /* IRQ0A */ - PINMUX_IRQ(evt2irq(0x0220), PORT20_FN0), /* IRQ1A */ - PINMUX_IRQ(evt2irq(0x0240), PORT11_FN0, PORT12_FN0), /* IRQ2A */ - PINMUX_IRQ(evt2irq(0x0260), PORT10_FN0, PORT14_FN0), /* IRQ3A */ - PINMUX_IRQ(evt2irq(0x0280), PORT15_FN0, PORT172_FN0), /* IRQ4A */ - PINMUX_IRQ(evt2irq(0x02A0), PORT0_FN0, PORT1_FN0), /* IRQ5A */ - PINMUX_IRQ(evt2irq(0x02C0), PORT121_FN0, PORT173_FN0), /* IRQ6A */ - PINMUX_IRQ(evt2irq(0x02E0), PORT120_FN0, PORT209_FN0), /* IRQ7A */ - PINMUX_IRQ(evt2irq(0x0300), PORT119_FN0), /* IRQ8A */ - PINMUX_IRQ(evt2irq(0x0320), PORT118_FN0, PORT210_FN0), /* IRQ9A */ - PINMUX_IRQ(evt2irq(0x0340), PORT19_FN0), /* IRQ10A */ - PINMUX_IRQ(evt2irq(0x0360), PORT104_FN0), /* IRQ11A */ - PINMUX_IRQ(evt2irq(0x0380), PORT42_FN0, PORT97_FN0), /* IRQ12A */ - PINMUX_IRQ(evt2irq(0x03A0), PORT64_FN0, PORT98_FN0), /* IRQ13A */ - PINMUX_IRQ(evt2irq(0x03C0), PORT63_FN0, PORT99_FN0), /* IRQ14A */ - PINMUX_IRQ(evt2irq(0x03E0), PORT62_FN0, PORT100_FN0), /* IRQ15A */ - PINMUX_IRQ(evt2irq(0x3200), PORT68_FN0, PORT211_FN0), /* IRQ16A */ - PINMUX_IRQ(evt2irq(0x3220), PORT69_FN0), /* IRQ17A */ - PINMUX_IRQ(evt2irq(0x3240), PORT70_FN0), /* IRQ18A */ - PINMUX_IRQ(evt2irq(0x3260), PORT71_FN0), /* IRQ19A */ - PINMUX_IRQ(evt2irq(0x3280), PORT67_FN0), /* IRQ20A */ - PINMUX_IRQ(evt2irq(0x32A0), PORT202_FN0), /* IRQ21A */ - PINMUX_IRQ(evt2irq(0x32C0), PORT95_FN0), /* IRQ22A */ - PINMUX_IRQ(evt2irq(0x32E0), PORT96_FN0), /* IRQ23A */ - PINMUX_IRQ(evt2irq(0x3300), PORT180_FN0), /* IRQ24A */ - PINMUX_IRQ(evt2irq(0x3320), PORT38_FN0), /* IRQ25A */ - PINMUX_IRQ(evt2irq(0x3340), PORT58_FN0, PORT81_FN0), /* IRQ26A */ - PINMUX_IRQ(evt2irq(0x3360), PORT57_FN0, PORT168_FN0), /* IRQ27A */ - PINMUX_IRQ(evt2irq(0x3380), PORT56_FN0, PORT169_FN0), /* IRQ28A */ - PINMUX_IRQ(evt2irq(0x33A0), PORT50_FN0, PORT170_FN0), /* IRQ29A */ - PINMUX_IRQ(evt2irq(0x33C0), PORT49_FN0, PORT171_FN0), /* IRQ30A */ - PINMUX_IRQ(evt2irq(0x33E0), PORT41_FN0, PORT167_FN0), /* IRQ31A */ + PINMUX_IRQ(irq_pin(0), GPIO_PORT2, GPIO_PORT13), /* IRQ0A */ + PINMUX_IRQ(irq_pin(1), GPIO_PORT20), /* IRQ1A */ + PINMUX_IRQ(irq_pin(2), GPIO_PORT11, GPIO_PORT12), /* IRQ2A */ + PINMUX_IRQ(irq_pin(3), GPIO_PORT10, GPIO_PORT14), /* IRQ3A */ + PINMUX_IRQ(irq_pin(4), GPIO_PORT15, GPIO_PORT172),/* IRQ4A */ + PINMUX_IRQ(irq_pin(5), GPIO_PORT0, GPIO_PORT1), /* IRQ5A */ + PINMUX_IRQ(irq_pin(6), GPIO_PORT121, GPIO_PORT173),/* IRQ6A */ + PINMUX_IRQ(irq_pin(7), GPIO_PORT120, GPIO_PORT209),/* IRQ7A */ + PINMUX_IRQ(irq_pin(8), GPIO_PORT119), /* IRQ8A */ + PINMUX_IRQ(irq_pin(9), GPIO_PORT118, GPIO_PORT210),/* IRQ9A */ + PINMUX_IRQ(irq_pin(10), GPIO_PORT19), /* IRQ10A */ + PINMUX_IRQ(irq_pin(11), GPIO_PORT104), /* IRQ11A */ + PINMUX_IRQ(irq_pin(12), GPIO_PORT42, GPIO_PORT97), /* IRQ12A */ + PINMUX_IRQ(irq_pin(13), GPIO_PORT64, GPIO_PORT98), /* IRQ13A */ + PINMUX_IRQ(irq_pin(14), GPIO_PORT63, GPIO_PORT99), /* IRQ14A */ + PINMUX_IRQ(irq_pin(15), GPIO_PORT62, GPIO_PORT100),/* IRQ15A */ + PINMUX_IRQ(irq_pin(16), GPIO_PORT68, GPIO_PORT211),/* IRQ16A */ + PINMUX_IRQ(irq_pin(17), GPIO_PORT69), /* IRQ17A */ + PINMUX_IRQ(irq_pin(18), GPIO_PORT70), /* IRQ18A */ + PINMUX_IRQ(irq_pin(19), GPIO_PORT71), /* IRQ19A */ + PINMUX_IRQ(irq_pin(20), GPIO_PORT67), /* IRQ20A */ + PINMUX_IRQ(irq_pin(21), GPIO_PORT202), /* IRQ21A */ + PINMUX_IRQ(irq_pin(22), GPIO_PORT95), /* IRQ22A */ + PINMUX_IRQ(irq_pin(23), GPIO_PORT96), /* IRQ23A */ + PINMUX_IRQ(irq_pin(24), GPIO_PORT180), /* IRQ24A */ + PINMUX_IRQ(irq_pin(25), GPIO_PORT38), /* IRQ25A */ + PINMUX_IRQ(irq_pin(26), GPIO_PORT58, GPIO_PORT81), /* IRQ26A */ + PINMUX_IRQ(irq_pin(27), GPIO_PORT57, GPIO_PORT168),/* IRQ27A */ + PINMUX_IRQ(irq_pin(28), GPIO_PORT56, GPIO_PORT169),/* IRQ28A */ + PINMUX_IRQ(irq_pin(29), GPIO_PORT50, GPIO_PORT170),/* IRQ29A */ + PINMUX_IRQ(irq_pin(30), GPIO_PORT49, GPIO_PORT171),/* IRQ30A */ + PINMUX_IRQ(irq_pin(31), GPIO_PORT41, GPIO_PORT167),/* IRQ31A */ }; struct sh_pfc_soc_info r8a7740_pinmux_info = { -- GitLab From fe7aa82d62d13d97c9a786707c467357cb8bddc3 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 7 Mar 2013 20:00:48 +0100 Subject: [PATCH 0297/3163] ARM: shmobile: sh73a0: add a TWD clock Add a TWD clock on sh73a0 for the smp_twd driver to properly update the clock's frequency upon cpufreq events. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-sh73a0.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c index 34b5c5ae4cbd..a57ec151674e 100644 --- a/arch/arm/mach-shmobile/clock-sh73a0.c +++ b/arch/arm/mach-shmobile/clock-sh73a0.c @@ -288,6 +288,20 @@ static struct clk div4_clks[DIV4_NR] = { [DIV4_HP] = DIV4(FRQCRB, 4, 0xdff, 0), }; +static unsigned long twd_recalc(struct clk *clk) +{ + return clk_get_rate(clk->parent) / 4; +} + +static struct sh_clk_ops twd_clk_ops = { + .recalc = twd_recalc, +}; + +static struct clk twd_clk = { + .parent = &div4_clks[DIV4_Z], + .ops = &twd_clk_ops, +}; + enum { DIV6_VCK1, DIV6_VCK2, DIV6_VCK3, DIV6_ZB1, DIV6_FLCTL, DIV6_SDHI0, DIV6_SDHI1, DIV6_SDHI2, DIV6_FSIA, DIV6_FSIB, DIV6_SUB, @@ -482,6 +496,7 @@ static struct clk dsi1phy_clk = { static struct clk *late_main_clks[] = { &dsi0phy_clk, &dsi1phy_clk, + &twd_clk, }; enum { MSTP001, @@ -546,6 +561,7 @@ static struct clk mstp_clks[MSTP_NR] = { static struct clk_lookup lookups[] = { /* main clocks */ CLKDEV_CON_ID("r_clk", &r_clk), + CLKDEV_DEV_ID("smp_twd", &twd_clk), /* smp_twd */ /* DIV6 clocks */ CLKDEV_CON_ID("vck1_clk", &div6_clks[DIV6_VCK1]), -- GitLab From 1f7ccd88717d993c5189280034f1d3b6b5af9693 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 27 Mar 2013 00:55:07 -0700 Subject: [PATCH 0298/3163] ARM: shmobile: sh73a0: remove DIV4_ZT* clocks DIV4_ZT* clocks are for debugging and trace bus clock. It is not necessary to control it from Linux/Software. Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-sh73a0.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c index a57ec151674e..26a580324105 100644 --- a/arch/arm/mach-shmobile/clock-sh73a0.c +++ b/arch/arm/mach-shmobile/clock-sh73a0.c @@ -269,7 +269,7 @@ static struct clk_div4_table div4_table = { }; enum { DIV4_I, DIV4_ZG, DIV4_M3, DIV4_B, DIV4_M1, DIV4_M2, - DIV4_Z, DIV4_ZTR, DIV4_ZT, DIV4_ZX, DIV4_HP, DIV4_NR }; + DIV4_Z, DIV4_ZX, DIV4_HP, DIV4_NR }; #define DIV4(_reg, _bit, _mask, _flags) \ SH_CLK_DIV4(&pll1_clk, _reg, _bit, _mask, _flags) @@ -282,8 +282,6 @@ static struct clk div4_clks[DIV4_NR] = { [DIV4_M1] = DIV4(FRQCRA, 4, 0x1dff, 0), [DIV4_M2] = DIV4(FRQCRA, 0, 0x1dff, 0), [DIV4_Z] = SH_CLK_DIV4(&pll0_clk, FRQCRB, 24, 0x97f, 0), - [DIV4_ZTR] = DIV4(FRQCRB, 20, 0xdff, 0), - [DIV4_ZT] = DIV4(FRQCRB, 16, 0xdff, 0), [DIV4_ZX] = DIV4(FRQCRB, 12, 0xdff, 0), [DIV4_HP] = DIV4(FRQCRB, 4, 0xdff, 0), }; -- GitLab From b3186c68805911599cbacceae23f60debb5e2210 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 27 Mar 2013 00:55:24 -0700 Subject: [PATCH 0299/3163] ARM: shmobile: sh7372: remove DIV4_ZT* clocks DIV4_ZT* clocks are for debugging and trace bus clock. It is not necessary to control it from Linux/Software. Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-sh7372.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 45d21fe317f4..6c23e3f22d62 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -342,7 +342,7 @@ static struct clk_div4_table div4_table = { }; enum { DIV4_I, DIV4_ZG, DIV4_B, DIV4_M1, DIV4_CSIR, - DIV4_ZTR, DIV4_ZT, DIV4_ZX, DIV4_HP, + DIV4_ZX, DIV4_HP, DIV4_ISPB, DIV4_S, DIV4_ZB, DIV4_ZB3, DIV4_CP, DIV4_DDRP, DIV4_NR }; @@ -355,8 +355,6 @@ static struct clk div4_clks[DIV4_NR] = { [DIV4_B] = DIV4(FRQCRA, 8, 0x6fff, CLK_ENABLE_ON_INIT), [DIV4_M1] = DIV4(FRQCRA, 4, 0x6fff, CLK_ENABLE_ON_INIT), [DIV4_CSIR] = DIV4(FRQCRA, 0, 0x6fff, 0), - [DIV4_ZTR] = DIV4(FRQCRB, 20, 0x6fff, 0), - [DIV4_ZT] = DIV4(FRQCRB, 16, 0x6fff, 0), [DIV4_ZX] = DIV4(FRQCRB, 12, 0x6fff, 0), [DIV4_HP] = DIV4(FRQCRB, 4, 0x6fff, 0), [DIV4_ISPB] = DIV4(FRQCRC, 20, 0x6fff, 0), @@ -516,8 +514,6 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("b_clk", &div4_clks[DIV4_B]), CLKDEV_CON_ID("m1_clk", &div4_clks[DIV4_M1]), CLKDEV_CON_ID("csir_clk", &div4_clks[DIV4_CSIR]), - CLKDEV_CON_ID("ztr_clk", &div4_clks[DIV4_ZTR]), - CLKDEV_CON_ID("zt_clk", &div4_clks[DIV4_ZT]), CLKDEV_CON_ID("zx_clk", &div4_clks[DIV4_ZX]), CLKDEV_CON_ID("hp_clk", &div4_clks[DIV4_HP]), CLKDEV_CON_ID("ispb_clk", &div4_clks[DIV4_ISPB]), -- GitLab From f5942c76217e3f4c2a62a72c9d64997b8765f9e2 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 27 Mar 2013 00:55:41 -0700 Subject: [PATCH 0300/3163] ARM: shmobile: add struct clk_ratio and fixed ratio clock macro Renesas chip has many clocks inside, and some of them are using fixed ratio via parent clock. Current shmobile clock code is using own divX_recalc function and divX_clk_ops. This patch can reduce these code Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock.c | 13 +++++++ arch/arm/mach-shmobile/include/mach/clock.h | 39 +++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 arch/arm/mach-shmobile/include/mach/clock.h diff --git a/arch/arm/mach-shmobile/clock.c b/arch/arm/mach-shmobile/clock.c index e816ca9bd213..ad7df629d995 100644 --- a/arch/arm/mach-shmobile/clock.c +++ b/arch/arm/mach-shmobile/clock.c @@ -23,6 +23,19 @@ #include #include #include +#include +#include + +unsigned long shmobile_fixed_ratio_clk_recalc(struct clk *clk) +{ + struct clk_ratio *p = clk->priv; + + return clk->parent->rate / p->div * p->mul; +}; + +struct sh_clk_ops shmobile_fixed_ratio_clk_ops = { + .recalc = shmobile_fixed_ratio_clk_recalc, +}; int __init shmobile_clk_init(void) { diff --git a/arch/arm/mach-shmobile/include/mach/clock.h b/arch/arm/mach-shmobile/include/mach/clock.h new file mode 100644 index 000000000000..76ac61292e48 --- /dev/null +++ b/arch/arm/mach-shmobile/include/mach/clock.h @@ -0,0 +1,39 @@ +#ifndef CLOCK_H +#define CLOCK_H + +unsigned long shmobile_fixed_ratio_clk_recalc(struct clk *clk); +extern struct sh_clk_ops shmobile_fixed_ratio_clk_ops; + +/* clock ratio */ +struct clk_ratio { + int mul; + int div; +}; + +#define SH_CLK_RATIO(name, m, d) \ +static struct clk_ratio name ##_ratio = { \ + .mul = m, \ + .div = d, \ +} + +#define SH_FIXED_RATIO_CLKg(name, p, r) \ +struct clk name = { \ + .parent = &p, \ + .ops = &shmobile_fixed_ratio_clk_ops,\ + .priv = &r ## _ratio, \ +} + +#define SH_FIXED_RATIO_CLK(name, p, r) \ +static SH_FIXED_RATIO_CLKg(name, p, r); + +#define SH_FIXED_RATIO_CLK_SET(name, p, m, d) \ + SH_CLK_RATIO(name, m, d); \ + SH_FIXED_RATIO_CLK(name, p, name); + +#define SH_CLK_SET_RATIO(p, m, d) \ +{ \ + (p)->mul = m; \ + (p)->div = d; \ +} + +#endif -- GitLab From 99fb32b88be4e9b12c44f61b613a0936a62454b7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 27 Mar 2013 00:55:54 -0700 Subject: [PATCH 0301/3163] ARM: shmobile: sh7372: use fixed ratio clock Current clock-sh7372 is using own implement for each divX clocks. This patch switches to use fixed ratio clock, and was tesed on mackerel board. Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-sh7372.c | 44 +++++---------------------- 1 file changed, 7 insertions(+), 37 deletions(-) diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 6c23e3f22d62..7e105932c09d 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -21,6 +21,7 @@ #include #include #include +#include #include /* SH7372 registers */ @@ -83,39 +84,12 @@ struct clk sh7372_extal2_clk = { .rate = 48000000, }; -/* A fixed divide-by-2 block */ -static unsigned long div2_recalc(struct clk *clk) -{ - return clk->parent->rate / 2; -} - -static struct sh_clk_ops div2_clk_ops = { - .recalc = div2_recalc, -}; +SH_CLK_RATIO(div2, 1, 2); -/* Divide dv_clki by two */ -struct clk sh7372_dv_clki_div2_clk = { - .ops = &div2_clk_ops, - .parent = &sh7372_dv_clki_clk, -}; - -/* Divide extal1 by two */ -static struct clk extal1_div2_clk = { - .ops = &div2_clk_ops, - .parent = &sh7372_extal1_clk, -}; - -/* Divide extal2 by two */ -static struct clk extal2_div2_clk = { - .ops = &div2_clk_ops, - .parent = &sh7372_extal2_clk, -}; - -/* Divide extal2 by four */ -static struct clk extal2_div4_clk = { - .ops = &div2_clk_ops, - .parent = &extal2_div2_clk, -}; +SH_FIXED_RATIO_CLKg(sh7372_dv_clki_div2_clk, sh7372_dv_clki_clk, div2); +SH_FIXED_RATIO_CLK(extal1_div2_clk, sh7372_extal1_clk, div2); +SH_FIXED_RATIO_CLK(extal2_div2_clk, sh7372_extal2_clk, div2); +SH_FIXED_RATIO_CLK(extal2_div4_clk, extal2_div2_clk, div2); /* PLLC0 and PLLC1 */ static unsigned long pllc01_recalc(struct clk *clk) @@ -147,10 +121,7 @@ static struct clk pllc1_clk = { }; /* Divide PLLC1 by two */ -static struct clk pllc1_div2_clk = { - .ops = &div2_clk_ops, - .parent = &pllc1_clk, -}; +SH_FIXED_RATIO_CLK(pllc1_div2_clk, pllc1_clk, div2); /* PLLC2 */ @@ -650,5 +621,4 @@ void __init sh7372_clock_init(void) shmobile_clk_init(); else panic("failed to setup sh7372 clocks\n"); - } -- GitLab From 891cab3e7a71365eb8c79098e487b8f2056a1a73 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 27 Mar 2013 00:56:14 -0700 Subject: [PATCH 0302/3163] ARM: shmobile: sh73a0: use fixed ratio clock Current clock-sh73a0 is using own implement for each divX clocks. This patch switches to use fixed ratio clock, and was tesed on kzm9g board. Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-sh73a0.c | 72 +++++---------------------- 1 file changed, 12 insertions(+), 60 deletions(-) diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c index 26a580324105..784fbaa4cc55 100644 --- a/arch/arm/mach-shmobile/clock-sh73a0.c +++ b/arch/arm/mach-shmobile/clock-sh73a0.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #define FRQCRA IOMEM(0xe6150000) @@ -83,61 +84,16 @@ struct clk sh73a0_extal2_clk = { .rate = 48000000, }; -/* A fixed divide-by-2 block */ -static unsigned long div2_recalc(struct clk *clk) -{ - return clk->parent->rate / 2; -} - -static struct sh_clk_ops div2_clk_ops = { - .recalc = div2_recalc, -}; - -static unsigned long div7_recalc(struct clk *clk) -{ - return clk->parent->rate / 7; -} - -static struct sh_clk_ops div7_clk_ops = { - .recalc = div7_recalc, -}; - -static unsigned long div13_recalc(struct clk *clk) -{ - return clk->parent->rate / 13; -} - -static struct sh_clk_ops div13_clk_ops = { - .recalc = div13_recalc, -}; - -/* Divide extal1 by two */ -static struct clk extal1_div2_clk = { - .ops = &div2_clk_ops, - .parent = &sh73a0_extal1_clk, -}; - -/* Divide extal2 by two */ -static struct clk extal2_div2_clk = { - .ops = &div2_clk_ops, - .parent = &sh73a0_extal2_clk, -}; - static struct sh_clk_ops main_clk_ops = { .recalc = followparent_recalc, }; /* Main clock */ static struct clk main_clk = { + /* .parent wll be set on sh73a0_clock_init() */ .ops = &main_clk_ops, }; -/* Divide Main clock by two */ -static struct clk main_div2_clk = { - .ops = &div2_clk_ops, - .parent = &main_clk, -}; - /* PLL0, PLL1, PLL2, PLL3 */ static unsigned long pll_recalc(struct clk *clk) { @@ -193,21 +149,17 @@ static struct clk pll3_clk = { .enable_bit = 3, }; -/* Divide PLL */ -static struct clk pll1_div2_clk = { - .ops = &div2_clk_ops, - .parent = &pll1_clk, -}; - -static struct clk pll1_div7_clk = { - .ops = &div7_clk_ops, - .parent = &pll1_clk, -}; +/* A fixed divide block */ +SH_CLK_RATIO(div2, 1, 2); +SH_CLK_RATIO(div7, 1, 7); +SH_CLK_RATIO(div13, 1, 13); -static struct clk pll1_div13_clk = { - .ops = &div13_clk_ops, - .parent = &pll1_clk, -}; +SH_FIXED_RATIO_CLK(extal1_div2_clk, sh73a0_extal1_clk, div2); +SH_FIXED_RATIO_CLK(extal2_div2_clk, sh73a0_extal2_clk, div2); +SH_FIXED_RATIO_CLK(main_div2_clk, main_clk, div2); +SH_FIXED_RATIO_CLK(pll1_div2_clk, pll1_clk, div2); +SH_FIXED_RATIO_CLK(pll1_div7_clk, pll1_clk, div7); +SH_FIXED_RATIO_CLK(pll1_div13_clk, pll1_clk, div13); /* External input clock */ struct clk sh73a0_extcki_clk = { -- GitLab From 5d14ff082badf94c5f5eaf9bc3f53075792c4f44 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 27 Mar 2013 00:56:40 -0700 Subject: [PATCH 0303/3163] ARM: shmobile: r8a7740: tidyup comment/implementation mismatch Current clock-r8a7740's DIV4/DIV6/MSTP implemented area and its comment are mismatching. This patch tidyup its comment/implementation area. Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-r8a7740.c | 60 +++++++++++++------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/arch/arm/mach-shmobile/clock-r8a7740.c b/arch/arm/mach-shmobile/clock-r8a7740.c index 1feb9a2286a8..161e128e2157 100644 --- a/arch/arm/mach-shmobile/clock-r8a7740.c +++ b/arch/arm/mach-shmobile/clock-r8a7740.c @@ -323,6 +323,7 @@ struct clk *main_clks[] = { &fsibck_clk, }; +/* DIV4 clocks */ static void div4_kick(struct clk *clk) { unsigned long value; @@ -346,6 +347,26 @@ static struct clk_div4_table div4_table = { .kick = div4_kick, }; +enum { + DIV4_I, DIV4_ZG, DIV4_B, DIV4_M1, DIV4_HP, + DIV4_HPP, DIV4_USBP, DIV4_S, DIV4_ZB, DIV4_M3, DIV4_CP, + DIV4_NR +}; + +struct clk div4_clks[DIV4_NR] = { + [DIV4_I] = SH_CLK_DIV4(&pllc1_clk, FRQCRA, 20, 0x6fff, CLK_ENABLE_ON_INIT), + [DIV4_ZG] = SH_CLK_DIV4(&pllc1_clk, FRQCRA, 16, 0x6fff, CLK_ENABLE_ON_INIT), + [DIV4_B] = SH_CLK_DIV4(&pllc1_clk, FRQCRA, 8, 0x6fff, CLK_ENABLE_ON_INIT), + [DIV4_M1] = SH_CLK_DIV4(&pllc1_clk, FRQCRA, 4, 0x6fff, CLK_ENABLE_ON_INIT), + [DIV4_HP] = SH_CLK_DIV4(&pllc1_clk, FRQCRB, 4, 0x6fff, 0), + [DIV4_HPP] = SH_CLK_DIV4(&pllc1_clk, FRQCRC, 20, 0x6fff, 0), + [DIV4_USBP] = SH_CLK_DIV4(&pllc1_clk, FRQCRC, 16, 0x6fff, 0), + [DIV4_S] = SH_CLK_DIV4(&pllc1_clk, FRQCRC, 12, 0x6fff, 0), + [DIV4_ZB] = SH_CLK_DIV4(&pllc1_clk, FRQCRC, 8, 0x6fff, 0), + [DIV4_M3] = SH_CLK_DIV4(&pllc1_clk, FRQCRC, 4, 0x6fff, 0), + [DIV4_CP] = SH_CLK_DIV4(&pllc1_clk, FRQCRC, 0, 0x6fff, 0), +}; + /* DIV6 reparent */ enum { DIV6_HDMI, @@ -391,6 +412,16 @@ static struct clk div6_reparent_clks[DIV6_REPARENT_NR] = { fsib_parents, ARRAY_SIZE(fsib_parents), 6, 2), }; +/* DIV6 clocks */ +enum { + DIV6_SUB, + DIV6_NR +}; + +static struct clk div6_clks[DIV6_NR] = { + [DIV6_SUB] = SH_CLK_DIV6(&pllc1_div2_clk, SUBCKCR, 0), +}; + /* HDMI1/2 clock */ static unsigned long hdmi12_recalc(struct clk *clk) { @@ -455,35 +486,6 @@ static struct clk fsidivs[] = { }; /* MSTP */ -enum { - DIV4_I, DIV4_ZG, DIV4_B, DIV4_M1, DIV4_HP, - DIV4_HPP, DIV4_USBP, DIV4_S, DIV4_ZB, DIV4_M3, DIV4_CP, - DIV4_NR -}; - -struct clk div4_clks[DIV4_NR] = { - [DIV4_I] = SH_CLK_DIV4(&pllc1_clk, FRQCRA, 20, 0x6fff, CLK_ENABLE_ON_INIT), - [DIV4_ZG] = SH_CLK_DIV4(&pllc1_clk, FRQCRA, 16, 0x6fff, CLK_ENABLE_ON_INIT), - [DIV4_B] = SH_CLK_DIV4(&pllc1_clk, FRQCRA, 8, 0x6fff, CLK_ENABLE_ON_INIT), - [DIV4_M1] = SH_CLK_DIV4(&pllc1_clk, FRQCRA, 4, 0x6fff, CLK_ENABLE_ON_INIT), - [DIV4_HP] = SH_CLK_DIV4(&pllc1_clk, FRQCRB, 4, 0x6fff, 0), - [DIV4_HPP] = SH_CLK_DIV4(&pllc1_clk, FRQCRC, 20, 0x6fff, 0), - [DIV4_USBP] = SH_CLK_DIV4(&pllc1_clk, FRQCRC, 16, 0x6fff, 0), - [DIV4_S] = SH_CLK_DIV4(&pllc1_clk, FRQCRC, 12, 0x6fff, 0), - [DIV4_ZB] = SH_CLK_DIV4(&pllc1_clk, FRQCRC, 8, 0x6fff, 0), - [DIV4_M3] = SH_CLK_DIV4(&pllc1_clk, FRQCRC, 4, 0x6fff, 0), - [DIV4_CP] = SH_CLK_DIV4(&pllc1_clk, FRQCRC, 0, 0x6fff, 0), -}; - -enum { - DIV6_SUB, - DIV6_NR -}; - -static struct clk div6_clks[DIV6_NR] = { - [DIV6_SUB] = SH_CLK_DIV6(&pllc1_div2_clk, SUBCKCR, 0), -}; - enum { MSTP128, MSTP127, MSTP125, MSTP116, MSTP111, MSTP100, MSTP117, -- GitLab From 10d6db2ba2a68fd7d5639ce4f422ec9dff2af0e7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 27 Mar 2013 00:56:57 -0700 Subject: [PATCH 0304/3163] ARM: shmobile: r8a7740: use fixed ratio clock Current clock-r8a7740 is using own implement for each divX clocks. This patch switches to use fixed ratio clock, and was tesed on armadillo board. Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-r8a7740.c | 54 +++++--------------------- 1 file changed, 9 insertions(+), 45 deletions(-) diff --git a/arch/arm/mach-shmobile/clock-r8a7740.c b/arch/arm/mach-shmobile/clock-r8a7740.c index 161e128e2157..c0d39aa6de50 100644 --- a/arch/arm/mach-shmobile/clock-r8a7740.c +++ b/arch/arm/mach-shmobile/clock-r8a7740.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -97,42 +98,13 @@ static struct clk dv_clk = { .rate = 27000000, }; -static unsigned long div_recalc(struct clk *clk) -{ - return clk->parent->rate / (int)(clk->priv); -} - -static struct sh_clk_ops div_clk_ops = { - .recalc = div_recalc, -}; +SH_CLK_RATIO(div2, 1, 2); +SH_CLK_RATIO(div1k, 1, 1024); -/* extal1 / 2 */ -static struct clk extal1_div2_clk = { - .ops = &div_clk_ops, - .priv = (void *)2, - .parent = &extal1_clk, -}; - -/* extal1 / 1024 */ -static struct clk extal1_div1024_clk = { - .ops = &div_clk_ops, - .priv = (void *)1024, - .parent = &extal1_clk, -}; - -/* extal1 / 2 / 1024 */ -static struct clk extal1_div2048_clk = { - .ops = &div_clk_ops, - .priv = (void *)1024, - .parent = &extal1_div2_clk, -}; - -/* extal2 / 2 */ -static struct clk extal2_div2_clk = { - .ops = &div_clk_ops, - .priv = (void *)2, - .parent = &extal2_clk, -}; +SH_FIXED_RATIO_CLK(extal1_div2_clk, extal1_clk, div2); +SH_FIXED_RATIO_CLK(extal1_div1024_clk, extal1_clk, div1k); +SH_FIXED_RATIO_CLK(extal1_div2048_clk, extal1_div2_clk, div1k); +SH_FIXED_RATIO_CLK(extal2_div2_clk, extal2_clk, div2); static struct sh_clk_ops followparent_clk_ops = { .recalc = followparent_recalc, @@ -143,11 +115,7 @@ static struct clk system_clk = { .ops = &followparent_clk_ops, }; -static struct clk system_div2_clk = { - .ops = &div_clk_ops, - .priv = (void *)2, - .parent = &system_clk, -}; +SH_FIXED_RATIO_CLK(system_div2_clk, system_clk, div2); /* r_clk */ static struct clk r_clk = { @@ -184,11 +152,7 @@ static struct clk pllc1_clk = { }; /* PLLC1 / 2 */ -static struct clk pllc1_div2_clk = { - .ops = &div_clk_ops, - .priv = (void *)2, - .parent = &pllc1_clk, -}; +SH_FIXED_RATIO_CLK(pllc1_div2_clk, pllc1_clk, div2); /* USB clock */ /* -- GitLab From ec0728d67985690f329592e68f0f1fe1f2388e70 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 27 Mar 2013 00:57:38 -0700 Subject: [PATCH 0305/3163] ARM: shmobile: r8a7779: remove DIV4 clocks and use fixed ratio clock R-Car H1 has many clocks, and it is possible to read/use clock ratio of these clocks from FRQMRx as DIV4 clocks. But, these ratio are fixed value and these are decided by MD pin status. This means that we can use fixed ratio clock via MD pin status, instead of DIV4 clocks. This patch reads MD pin status, and sets PLLA clock (= root clock), and used fixed ratio clock for other clocks. It was tesed on marzen board. Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-r8a7779.c | 196 ++++++++++++++----------- 1 file changed, 109 insertions(+), 87 deletions(-) diff --git a/arch/arm/mach-shmobile/clock-r8a7779.c b/arch/arm/mach-shmobile/clock-r8a7779.c index d9edeaf66007..7d86bfbb5b06 100644 --- a/arch/arm/mach-shmobile/clock-r8a7779.c +++ b/arch/arm/mach-shmobile/clock-r8a7779.c @@ -17,13 +17,17 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include #include #include #include +#include #include +#define MD(nr) BIT(nr) + #define FRQMR IOMEM(0xffc80014) #define MSTPCR0 IOMEM(0xffc80030) #define MSTPCR1 IOMEM(0xffc80034) @@ -36,6 +40,9 @@ #define MSTPCR6 IOMEM(0xffc80058) #define MSTPCR7 IOMEM(0xffc80040) +#define MODEMR 0xffcc0020 + + /* ioremap() through clock mapping mandatory to avoid * collision with ARM coherent DMA virtual memory range. */ @@ -50,40 +57,39 @@ static struct clk_mapping cpg_mapping = { * from the platform code. */ static struct clk plla_clk = { - .rate = 1500000000, + /* .rate will be updated on r8a7779_clock_init() */ .mapping = &cpg_mapping, }; +/* + * clock ratio of these clock will be updated + * on r8a7779_clock_init() + */ +SH_FIXED_RATIO_CLK_SET(clkz_clk, plla_clk, 1, 1); +SH_FIXED_RATIO_CLK_SET(clkzs_clk, plla_clk, 1, 1); +SH_FIXED_RATIO_CLK_SET(clki_clk, plla_clk, 1, 1); +SH_FIXED_RATIO_CLK_SET(clks_clk, plla_clk, 1, 1); +SH_FIXED_RATIO_CLK_SET(clks1_clk, plla_clk, 1, 1); +SH_FIXED_RATIO_CLK_SET(clks3_clk, plla_clk, 1, 1); +SH_FIXED_RATIO_CLK_SET(clks4_clk, plla_clk, 1, 1); +SH_FIXED_RATIO_CLK_SET(clkb_clk, plla_clk, 1, 1); +SH_FIXED_RATIO_CLK_SET(clkout_clk, plla_clk, 1, 1); +SH_FIXED_RATIO_CLK_SET(clkp_clk, plla_clk, 1, 1); +SH_FIXED_RATIO_CLK_SET(clkg_clk, plla_clk, 1, 1); + static struct clk *main_clks[] = { &plla_clk, -}; - -static int divisors[] = { 0, 0, 0, 6, 8, 12, 16, 0, 24, 32, 36, 0, 0, 0, 0, 0 }; - -static struct clk_div_mult_table div4_div_mult_table = { - .divisors = divisors, - .nr_divisors = ARRAY_SIZE(divisors), -}; - -static struct clk_div4_table div4_table = { - .div_mult_table = &div4_div_mult_table, -}; - -enum { DIV4_S, DIV4_OUT, DIV4_S4, DIV4_S3, DIV4_S1, DIV4_P, DIV4_NR }; - -static struct clk div4_clks[DIV4_NR] = { - [DIV4_S] = SH_CLK_DIV4(&plla_clk, FRQMR, 20, - 0x0018, CLK_ENABLE_ON_INIT), - [DIV4_OUT] = SH_CLK_DIV4(&plla_clk, FRQMR, 16, - 0x0700, CLK_ENABLE_ON_INIT), - [DIV4_S4] = SH_CLK_DIV4(&plla_clk, FRQMR, 12, - 0x0040, CLK_ENABLE_ON_INIT), - [DIV4_S3] = SH_CLK_DIV4(&plla_clk, FRQMR, 8, - 0x0010, CLK_ENABLE_ON_INIT), - [DIV4_S1] = SH_CLK_DIV4(&plla_clk, FRQMR, 4, - 0x0060, CLK_ENABLE_ON_INIT), - [DIV4_P] = SH_CLK_DIV4(&plla_clk, FRQMR, 0, - 0x0300, CLK_ENABLE_ON_INIT), + &clkz_clk, + &clkzs_clk, + &clki_clk, + &clks_clk, + &clks1_clk, + &clks3_clk, + &clks4_clk, + &clkb_clk, + &clkout_clk, + &clkp_clk, + &clkg_clk, }; enum { MSTP323, MSTP322, MSTP321, MSTP320, @@ -96,52 +102,28 @@ enum { MSTP323, MSTP322, MSTP321, MSTP320, MSTP_NR }; static struct clk mstp_clks[MSTP_NR] = { - [MSTP323] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR3, 23, 0), /* SDHI0 */ - [MSTP322] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR3, 22, 0), /* SDHI1 */ - [MSTP321] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR3, 21, 0), /* SDHI2 */ - [MSTP320] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR3, 20, 0), /* SDHI3 */ - [MSTP115] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR1, 15, 0), /* SATA */ - [MSTP103] = SH_CLK_MSTP32(&div4_clks[DIV4_S], MSTPCR1, 3, 0), /* DU */ - [MSTP101] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR1, 1, 0), /* USB2 */ - [MSTP100] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR1, 0, 0), /* USB0/1 */ - [MSTP030] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR0, 30, 0), /* I2C0 */ - [MSTP029] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR0, 29, 0), /* I2C1 */ - [MSTP028] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR0, 28, 0), /* I2C2 */ - [MSTP027] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR0, 27, 0), /* I2C3 */ - [MSTP026] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR0, 26, 0), /* SCIF0 */ - [MSTP025] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR0, 25, 0), /* SCIF1 */ - [MSTP024] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR0, 24, 0), /* SCIF2 */ - [MSTP023] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR0, 23, 0), /* SCIF3 */ - [MSTP022] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR0, 22, 0), /* SCIF4 */ - [MSTP021] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR0, 21, 0), /* SCIF5 */ - [MSTP016] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR0, 16, 0), /* TMU0 */ - [MSTP015] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR0, 15, 0), /* TMU1 */ - [MSTP014] = SH_CLK_MSTP32(&div4_clks[DIV4_P], MSTPCR0, 14, 0), /* TMU2 */ - [MSTP007] = SH_CLK_MSTP32(&div4_clks[DIV4_S], MSTPCR0, 7, 0), /* HSPI */ -}; - -static unsigned long mul4_recalc(struct clk *clk) -{ - return clk->parent->rate * 4; -} - -static struct sh_clk_ops mul4_clk_ops = { - .recalc = mul4_recalc, -}; - -struct clk clkz_clk = { - .ops = &mul4_clk_ops, - .parent = &div4_clks[DIV4_S], -}; - -struct clk clkzs_clk = { - /* clks x 4 / 4 = clks */ - .parent = &div4_clks[DIV4_S], -}; - -static struct clk *late_main_clks[] = { - &clkz_clk, - &clkzs_clk, + [MSTP323] = SH_CLK_MSTP32(&clkp_clk, MSTPCR3, 23, 0), /* SDHI0 */ + [MSTP322] = SH_CLK_MSTP32(&clkp_clk, MSTPCR3, 22, 0), /* SDHI1 */ + [MSTP321] = SH_CLK_MSTP32(&clkp_clk, MSTPCR3, 21, 0), /* SDHI2 */ + [MSTP320] = SH_CLK_MSTP32(&clkp_clk, MSTPCR3, 20, 0), /* SDHI3 */ + [MSTP115] = SH_CLK_MSTP32(&clkp_clk, MSTPCR1, 15, 0), /* SATA */ + [MSTP103] = SH_CLK_MSTP32(&clks_clk, MSTPCR1, 3, 0), /* DU */ + [MSTP101] = SH_CLK_MSTP32(&clkp_clk, MSTPCR1, 1, 0), /* USB2 */ + [MSTP100] = SH_CLK_MSTP32(&clkp_clk, MSTPCR1, 0, 0), /* USB0/1 */ + [MSTP030] = SH_CLK_MSTP32(&clkp_clk, MSTPCR0, 30, 0), /* I2C0 */ + [MSTP029] = SH_CLK_MSTP32(&clkp_clk, MSTPCR0, 29, 0), /* I2C1 */ + [MSTP028] = SH_CLK_MSTP32(&clkp_clk, MSTPCR0, 28, 0), /* I2C2 */ + [MSTP027] = SH_CLK_MSTP32(&clkp_clk, MSTPCR0, 27, 0), /* I2C3 */ + [MSTP026] = SH_CLK_MSTP32(&clkp_clk, MSTPCR0, 26, 0), /* SCIF0 */ + [MSTP025] = SH_CLK_MSTP32(&clkp_clk, MSTPCR0, 25, 0), /* SCIF1 */ + [MSTP024] = SH_CLK_MSTP32(&clkp_clk, MSTPCR0, 24, 0), /* SCIF2 */ + [MSTP023] = SH_CLK_MSTP32(&clkp_clk, MSTPCR0, 23, 0), /* SCIF3 */ + [MSTP022] = SH_CLK_MSTP32(&clkp_clk, MSTPCR0, 22, 0), /* SCIF4 */ + [MSTP021] = SH_CLK_MSTP32(&clkp_clk, MSTPCR0, 21, 0), /* SCIF5 */ + [MSTP016] = SH_CLK_MSTP32(&clkp_clk, MSTPCR0, 16, 0), /* TMU0 */ + [MSTP015] = SH_CLK_MSTP32(&clkp_clk, MSTPCR0, 15, 0), /* TMU1 */ + [MSTP014] = SH_CLK_MSTP32(&clkp_clk, MSTPCR0, 14, 0), /* TMU2 */ + [MSTP007] = SH_CLK_MSTP32(&clks_clk, MSTPCR0, 7, 0), /* HSPI */ }; static struct clk_lookup lookups[] = { @@ -151,12 +133,12 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("clkzs_clk", &clkzs_clk), /* DIV4 clocks */ - CLKDEV_CON_ID("shyway_clk", &div4_clks[DIV4_S]), - CLKDEV_CON_ID("bus_clk", &div4_clks[DIV4_OUT]), - CLKDEV_CON_ID("shyway4_clk", &div4_clks[DIV4_S4]), - CLKDEV_CON_ID("shyway3_clk", &div4_clks[DIV4_S3]), - CLKDEV_CON_ID("shyway1_clk", &div4_clks[DIV4_S1]), - CLKDEV_CON_ID("peripheral_clk", &div4_clks[DIV4_P]), + CLKDEV_CON_ID("shyway_clk", &clks_clk), + CLKDEV_CON_ID("bus_clk", &clkout_clk), + CLKDEV_CON_ID("shyway4_clk", &clks4_clk), + CLKDEV_CON_ID("shyway3_clk", &clks3_clk), + CLKDEV_CON_ID("shyway1_clk", &clks1_clk), + CLKDEV_CON_ID("peripheral_clk", &clkp_clk), /* MSTP32 clocks */ CLKDEV_DEV_ID("sata_rcar", &mstp_clks[MSTP115]), /* SATA */ @@ -190,20 +172,60 @@ static struct clk_lookup lookups[] = { void __init r8a7779_clock_init(void) { + void __iomem *modemr = ioremap_nocache(MODEMR, PAGE_SIZE); + u32 mode; int k, ret = 0; + BUG_ON(!modemr); + mode = ioread32(modemr); + iounmap(modemr); + + if (mode & MD(1)) { + plla_clk.rate = 1500000000; + + SH_CLK_SET_RATIO(&clkz_clk_ratio, 2, 3); + SH_CLK_SET_RATIO(&clkzs_clk_ratio, 1, 6); + SH_CLK_SET_RATIO(&clki_clk_ratio, 1, 2); + SH_CLK_SET_RATIO(&clks_clk_ratio, 1, 6); + SH_CLK_SET_RATIO(&clks1_clk_ratio, 1, 12); + SH_CLK_SET_RATIO(&clks3_clk_ratio, 1, 8); + SH_CLK_SET_RATIO(&clks4_clk_ratio, 1, 16); + SH_CLK_SET_RATIO(&clkp_clk_ratio, 1, 24); + SH_CLK_SET_RATIO(&clkg_clk_ratio, 1, 24); + if (mode & MD(2)) { + SH_CLK_SET_RATIO(&clkb_clk_ratio, 1, 36); + SH_CLK_SET_RATIO(&clkout_clk_ratio, 1, 36); + } else { + SH_CLK_SET_RATIO(&clkb_clk_ratio, 1, 24); + SH_CLK_SET_RATIO(&clkout_clk_ratio, 1, 24); + } + } else { + plla_clk.rate = 1600000000; + + SH_CLK_SET_RATIO(&clkz_clk_ratio, 1, 2); + SH_CLK_SET_RATIO(&clkzs_clk_ratio, 1, 8); + SH_CLK_SET_RATIO(&clki_clk_ratio, 1, 2); + SH_CLK_SET_RATIO(&clks_clk_ratio, 1, 8); + SH_CLK_SET_RATIO(&clks1_clk_ratio, 1, 16); + SH_CLK_SET_RATIO(&clks3_clk_ratio, 1, 8); + SH_CLK_SET_RATIO(&clks4_clk_ratio, 1, 16); + SH_CLK_SET_RATIO(&clkp_clk_ratio, 1, 32); + SH_CLK_SET_RATIO(&clkg_clk_ratio, 1, 24); + if (mode & MD(2)) { + SH_CLK_SET_RATIO(&clkb_clk_ratio, 1, 32); + SH_CLK_SET_RATIO(&clkout_clk_ratio, 1, 32); + } else { + SH_CLK_SET_RATIO(&clkb_clk_ratio, 1, 24); + SH_CLK_SET_RATIO(&clkout_clk_ratio, 1, 24); + } + } + for (k = 0; !ret && (k < ARRAY_SIZE(main_clks)); k++) ret = clk_register(main_clks[k]); - if (!ret) - ret = sh_clk_div4_register(div4_clks, DIV4_NR, &div4_table); - if (!ret) ret = sh_clk_mstp_register(mstp_clks, MSTP_NR); - for (k = 0; !ret && (k < ARRAY_SIZE(late_main_clks)); k++) - ret = clk_register(late_main_clks[k]); - clkdev_add_table(lookups, ARRAY_SIZE(lookups)); if (!ret) -- GitLab From daf9aa98293528abcf24b015ae8aa6e075d37298 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 28 Mar 2013 01:48:19 -0700 Subject: [PATCH 0306/3163] ARM: shmobile: sh7372: move global functions to sh7372.h There is no reason each CPU's own function has to exist in common.h. sh7372_xxx() go to sh7372.h Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/include/mach/common.h | 15 --------------- arch/arm/mach-shmobile/include/mach/sh7372.h | 12 ++++++++++++ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h index 03f73def2fc6..d01a5511a5ac 100644 --- a/arch/arm/mach-shmobile/include/mach/common.h +++ b/arch/arm/mach-shmobile/include/mach/common.h @@ -19,21 +19,6 @@ extern int shmobile_enter_wfi(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); extern void shmobile_cpuidle_set_driver(struct cpuidle_driver *drv); -extern void sh7372_init_irq(void); -extern void sh7372_map_io(void); -extern void sh7372_earlytimer_init(void); -extern void sh7372_add_early_devices(void); -extern void sh7372_add_standard_devices(void); -extern void sh7372_add_early_devices_dt(void); -extern void sh7372_add_standard_devices_dt(void); -extern void sh7372_clock_init(void); -extern void sh7372_pinmux_init(void); -extern void sh7372_pm_init(void); -extern void sh7372_resume_core_standby_sysc(void); -extern int sh7372_do_idle_sysc(unsigned long sleep_mode); -extern struct clk sh7372_extal1_clk; -extern struct clk sh7372_extal2_clk; - extern void sh73a0_init_delay(void); extern void sh73a0_init_irq(void); extern void sh73a0_init_irq_dt(void); diff --git a/arch/arm/mach-shmobile/include/mach/sh7372.h b/arch/arm/mach-shmobile/include/mach/sh7372.h index b582facc1cf6..f0ea60d6648a 100644 --- a/arch/arm/mach-shmobile/include/mach/sh7372.h +++ b/arch/arm/mach-shmobile/include/mach/sh7372.h @@ -478,6 +478,18 @@ extern struct clk sh7372_dv_clki_clk; extern struct clk sh7372_dv_clki_div2_clk; extern struct clk sh7372_pllc2_clk; +extern void sh7372_init_irq(void); +extern void sh7372_map_io(void); +extern void sh7372_earlytimer_init(void); +extern void sh7372_add_early_devices(void); +extern void sh7372_add_standard_devices(void); +extern void sh7372_add_early_devices_dt(void); +extern void sh7372_add_standard_devices_dt(void); +extern void sh7372_clock_init(void); +extern void sh7372_pinmux_init(void); +extern void sh7372_pm_init(void); +extern void sh7372_resume_core_standby_sysc(void); +extern int sh7372_do_idle_sysc(unsigned long sleep_mode); extern void sh7372_intcs_suspend(void); extern void sh7372_intcs_resume(void); extern void sh7372_intca_suspend(void); -- GitLab From 014f93a08361282a0af0dd155c3b434431ea36df Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 28 Mar 2013 01:48:30 -0700 Subject: [PATCH 0307/3163] ARM: shmobile: sh73a0: move global functions to sh73a0.h There is no reason each CPU's own function has to exist in common.h. sh73a0_xxx() go to sh73a0.h Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/include/mach/common.h | 16 ---------------- arch/arm/mach-shmobile/include/mach/sh73a0.h | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h index d01a5511a5ac..48eeca9d25a3 100644 --- a/arch/arm/mach-shmobile/include/mach/common.h +++ b/arch/arm/mach-shmobile/include/mach/common.h @@ -19,22 +19,6 @@ extern int shmobile_enter_wfi(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); extern void shmobile_cpuidle_set_driver(struct cpuidle_driver *drv); -extern void sh73a0_init_delay(void); -extern void sh73a0_init_irq(void); -extern void sh73a0_init_irq_dt(void); -extern void sh73a0_map_io(void); -extern void sh73a0_earlytimer_init(void); -extern void sh73a0_add_early_devices(void); -extern void sh73a0_add_standard_devices(void); -extern void sh73a0_add_standard_devices_dt(void); -extern void sh73a0_clock_init(void); -extern void sh73a0_pinmux_init(void); -extern void sh73a0_pm_init(void); -extern struct clk sh73a0_extal1_clk; -extern struct clk sh73a0_extal2_clk; -extern struct clk sh73a0_extcki_clk; -extern struct clk sh73a0_extalr_clk; - extern void r8a7740_meram_workaround(void); extern void r8a7740_init_irq(void); extern void r8a7740_map_io(void); diff --git a/arch/arm/mach-shmobile/include/mach/sh73a0.h b/arch/arm/mach-shmobile/include/mach/sh73a0.h index 606d31d02a4e..936da1b4a9c5 100644 --- a/arch/arm/mach-shmobile/include/mach/sh73a0.h +++ b/arch/arm/mach-shmobile/include/mach/sh73a0.h @@ -557,6 +557,21 @@ enum { #define SH73A0_PINT0_IRQ(irq) ((irq) + 700) #define SH73A0_PINT1_IRQ(irq) ((irq) + 732) +extern void sh73a0_init_delay(void); +extern void sh73a0_init_irq(void); +extern void sh73a0_init_irq_dt(void); +extern void sh73a0_map_io(void); +extern void sh73a0_earlytimer_init(void); +extern void sh73a0_add_early_devices(void); +extern void sh73a0_add_standard_devices(void); +extern void sh73a0_add_standard_devices_dt(void); +extern void sh73a0_clock_init(void); +extern void sh73a0_pinmux_init(void); +extern void sh73a0_pm_init(void); +extern struct clk sh73a0_extal1_clk; +extern struct clk sh73a0_extal2_clk; +extern struct clk sh73a0_extcki_clk; +extern struct clk sh73a0_extalr_clk; extern struct smp_operations sh73a0_smp_ops; #endif /* __ASM_SH73A0_H__ */ -- GitLab From f96c764dac2c2761fc05164255c0ed689b8ac496 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 28 Mar 2013 01:49:15 -0700 Subject: [PATCH 0308/3163] ARM: shmobile: r8a7740: move global functions to r8a7740.h There is no reason each CPU's own function has to exist in common.h. r8a7740_xxx() go to r8a7740.h Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/include/mach/common.h | 9 --------- arch/arm/mach-shmobile/include/mach/r8a7740.h | 9 +++++++++ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h index 48eeca9d25a3..4d5410de00d6 100644 --- a/arch/arm/mach-shmobile/include/mach/common.h +++ b/arch/arm/mach-shmobile/include/mach/common.h @@ -19,15 +19,6 @@ extern int shmobile_enter_wfi(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); extern void shmobile_cpuidle_set_driver(struct cpuidle_driver *drv); -extern void r8a7740_meram_workaround(void); -extern void r8a7740_init_irq(void); -extern void r8a7740_map_io(void); -extern void r8a7740_add_early_devices(void); -extern void r8a7740_add_standard_devices(void); -extern void r8a7740_clock_init(u8 md_ck); -extern void r8a7740_pinmux_init(void); -extern void r8a7740_pm_init(void); - extern void r8a7779_init_delay(void); extern void r8a7779_init_irq(void); extern void r8a7779_init_irq_extpin(int irlm); diff --git a/arch/arm/mach-shmobile/include/mach/r8a7740.h b/arch/arm/mach-shmobile/include/mach/r8a7740.h index 59d252f4cf97..5a879bbe145f 100644 --- a/arch/arm/mach-shmobile/include/mach/r8a7740.h +++ b/arch/arm/mach-shmobile/include/mach/r8a7740.h @@ -606,6 +606,15 @@ enum { SHDMA_SLAVE_USBHS_RX, }; +extern void r8a7740_meram_workaround(void); +extern void r8a7740_init_irq(void); +extern void r8a7740_map_io(void); +extern void r8a7740_add_early_devices(void); +extern void r8a7740_add_standard_devices(void); +extern void r8a7740_clock_init(u8 md_ck); +extern void r8a7740_pinmux_init(void); +extern void r8a7740_pm_init(void); + #ifdef CONFIG_PM extern void __init r8a7740_init_pm_domains(void); #else -- GitLab From 60e3a566897dcdd8621464ff46f4537903c2255a Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 28 Mar 2013 01:49:27 -0700 Subject: [PATCH 0309/3163] ARM: shmobile: r8a7779: move global functions to r8a7779.h There is no reason each CPU's own function has to exist in common.h. r8a7779_xxx() go to r8a7779.h Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/include/mach/common.h | 14 -------------- arch/arm/mach-shmobile/include/mach/r8a7779.h | 13 +++++++++++++ 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h index 4d5410de00d6..e002cfd9d2df 100644 --- a/arch/arm/mach-shmobile/include/mach/common.h +++ b/arch/arm/mach-shmobile/include/mach/common.h @@ -19,20 +19,6 @@ extern int shmobile_enter_wfi(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); extern void shmobile_cpuidle_set_driver(struct cpuidle_driver *drv); -extern void r8a7779_init_delay(void); -extern void r8a7779_init_irq(void); -extern void r8a7779_init_irq_extpin(int irlm); -extern void r8a7779_init_irq_dt(void); -extern void r8a7779_map_io(void); -extern void r8a7779_earlytimer_init(void); -extern void r8a7779_add_early_devices(void); -extern void r8a7779_add_standard_devices(void); -extern void r8a7779_add_standard_devices_dt(void); -extern void r8a7779_clock_init(void); -extern void r8a7779_pinmux_init(void); -extern void r8a7779_pm_init(void); -extern void r8a7779_register_twd(void); - #ifdef CONFIG_SUSPEND int shmobile_suspend_init(void); #else diff --git a/arch/arm/mach-shmobile/include/mach/r8a7779.h b/arch/arm/mach-shmobile/include/mach/r8a7779.h index 8ab0cd6ad6b0..af38750f38f7 100644 --- a/arch/arm/mach-shmobile/include/mach/r8a7779.h +++ b/arch/arm/mach-shmobile/include/mach/r8a7779.h @@ -343,6 +343,19 @@ static inline struct r8a7779_pm_ch *to_r8a7779_ch(struct generic_pm_domain *d) return &container_of(d, struct r8a7779_pm_domain, genpd)->ch; } +extern void r8a7779_init_delay(void); +extern void r8a7779_init_irq(void); +extern void r8a7779_init_irq_extpin(int irlm); +extern void r8a7779_init_irq_dt(void); +extern void r8a7779_map_io(void); +extern void r8a7779_earlytimer_init(void); +extern void r8a7779_add_early_devices(void); +extern void r8a7779_add_standard_devices(void); +extern void r8a7779_add_standard_devices_dt(void); +extern void r8a7779_clock_init(void); +extern void r8a7779_pinmux_init(void); +extern void r8a7779_pm_init(void); +extern void r8a7779_register_twd(void); extern int r8a7779_sysc_power_down(struct r8a7779_pm_ch *r8a7779_ch); extern int r8a7779_sysc_power_up(struct r8a7779_pm_ch *r8a7779_ch); -- GitLab From 0468b2d6b6ae71699c22e67701e23d6ca8ff3046 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 28 Mar 2013 00:49:34 +0900 Subject: [PATCH 0310/3163] ARM: shmobile: Initial r8a7790 SoC support Add initial support for the r8a7790 SoC including: - Single Cortex-A15 CPU Core - GIC - Architecture timer No static virtual mappings are used, all the components make use of ioremap(). DT_MACHINE_START is still wrapped in CONFIG_USE_OF to match other mach-shmobile code. Signed-off-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/boot/dts/r8a7790.dtsi | 54 ++++++++++++++++ arch/arm/mach-shmobile/Kconfig | 7 +++ arch/arm/mach-shmobile/Makefile | 1 + arch/arm/mach-shmobile/clock-r8a7790.c | 61 +++++++++++++++++++ arch/arm/mach-shmobile/include/mach/r8a7790.h | 7 +++ arch/arm/mach-shmobile/setup-r8a7790.c | 51 ++++++++++++++++ 6 files changed, 181 insertions(+) create mode 100644 arch/arm/boot/dts/r8a7790.dtsi create mode 100644 arch/arm/mach-shmobile/clock-r8a7790.c create mode 100644 arch/arm/mach-shmobile/include/mach/r8a7790.h create mode 100644 arch/arm/mach-shmobile/setup-r8a7790.c diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi new file mode 100644 index 000000000000..1c58ffb6cccf --- /dev/null +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -0,0 +1,54 @@ +/* + * Device Tree Source for the r8a7790 SoC + * + * Copyright (C) 2013 Renesas Solutions Corp. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +/include/ "skeleton.dtsi" + +/ { + compatible = "renesas,r8a7790"; + interrupt-parent = <&gic>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + clock-frequency = <1300000000>; + }; + }; + + gic: interrupt-controller@f1001000 { + compatible = "arm,cortex-a15-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0xf1001000 0x1000>, + <0xf1002000 0x1000>, + <0xf1004000 0x2000>, + <0xf1006000 0x2000>; + interrupts = <1 9 0xf04>; + + gic-cpuif@4 { + compatible = "arm,gic-cpuif"; + cpuif-id = <4>; + cpu = <&cpu0>; + }; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; +}; diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index d569c34b1c86..749dfb4c63c0 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -51,6 +51,13 @@ config ARCH_R8A7779 select USB_ARCH_HAS_OHCI select RENESAS_INTC_IRQPIN +config ARCH_R8A7790 + bool "R-Car H2 (R8A77900)" + select ARM_GIC + select CPU_V7 + select ARM_ARCH_TIMER + select SH_CLK_CPG + config ARCH_EMEV2 bool "Emma Mobile EV2" select ARCH_WANT_OPTIONAL_GPIOLIB diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile index 2d42de46db8d..709b9b421f93 100644 --- a/arch/arm/mach-shmobile/Makefile +++ b/arch/arm/mach-shmobile/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_ARCH_R8A73A4) += setup-r8a73a4.o clock-r8a73a4.o obj-$(CONFIG_ARCH_R8A7740) += setup-r8a7740.o clock-r8a7740.o intc-r8a7740.o obj-$(CONFIG_ARCH_R8A7778) += setup-r8a7778.o clock-r8a7778.o obj-$(CONFIG_ARCH_R8A7779) += setup-r8a7779.o clock-r8a7779.o intc-r8a7779.o +obj-$(CONFIG_ARCH_R8A7790) += setup-r8a7790.o clock-r8a7790.o obj-$(CONFIG_ARCH_EMEV2) += setup-emev2.o clock-emev2.o # SMP objects diff --git a/arch/arm/mach-shmobile/clock-r8a7790.c b/arch/arm/mach-shmobile/clock-r8a7790.c new file mode 100644 index 000000000000..6869798effa3 --- /dev/null +++ b/arch/arm/mach-shmobile/clock-r8a7790.c @@ -0,0 +1,61 @@ +/* + * r8a7790 clock framework support + * + * Copyright (C) 2013 Renesas Solutions Corp. + * Copyright (C) 2013 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include +#include +#include + +#define CPG_BASE 0xe6150000 +#define CPG_LEN 0x1000 + +static struct clk_mapping cpg_mapping = { + .phys = CPG_BASE, + .len = CPG_LEN, +}; + +static struct clk *main_clks[] = { +}; + +enum { MSTP_NR }; +static struct clk mstp_clks[MSTP_NR] = { +}; + +static struct clk_lookup lookups[] = { +}; + +void __init r8a7790_clock_init(void) +{ + int k, ret = 0; + + for (k = 0; !ret && (k < ARRAY_SIZE(main_clks)); k++) + ret = clk_register(main_clks[k]); + + if (!ret) + ret = sh_clk_mstp_register(mstp_clks, MSTP_NR); + + clkdev_add_table(lookups, ARRAY_SIZE(lookups)); + + if (!ret) + shmobile_clk_init(); + else + panic("failed to setup r8a7790 clocks\n"); +} diff --git a/arch/arm/mach-shmobile/include/mach/r8a7790.h b/arch/arm/mach-shmobile/include/mach/r8a7790.h new file mode 100644 index 000000000000..f38ded61285f --- /dev/null +++ b/arch/arm/mach-shmobile/include/mach/r8a7790.h @@ -0,0 +1,7 @@ +#ifndef __ASM_R8A7790_H__ +#define __ASM_R8A7790_H__ + +void r8a7790_add_standard_devices(void); +void r8a7790_clock_init(void); + +#endif /* __ASM_R8A7790_H__ */ diff --git a/arch/arm/mach-shmobile/setup-r8a7790.c b/arch/arm/mach-shmobile/setup-r8a7790.c new file mode 100644 index 000000000000..af432ba11020 --- /dev/null +++ b/arch/arm/mach-shmobile/setup-r8a7790.c @@ -0,0 +1,51 @@ +/* + * r8a7790 processor support + * + * Copyright (C) 2013 Renesas Solutions Corp. + * Copyright (C) 2013 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +void __init r8a7790_add_standard_devices(void) +{ +} + +#ifdef CONFIG_USE_OF +void __init r8a7790_add_standard_devices_dt(void) +{ + of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); +} + +static const char *r8a7790_boards_compat_dt[] __initdata = { + "renesas,r8a7790", + NULL, +}; + +DT_MACHINE_START(R8A7790_DT, "Generic R8A7790 (Flattened Device Tree)") + .init_irq = irqchip_init, + .init_machine = r8a7790_add_standard_devices_dt, + .init_time = shmobile_timer_init, + .dt_compat = r8a7790_boards_compat_dt, +MACHINE_END +#endif /* CONFIG_USE_OF */ -- GitLab From 55d9fab280e6e587d634d2ec2effe94eabe90e9c Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 28 Mar 2013 00:49:44 +0900 Subject: [PATCH 0311/3163] ARM: shmobile: r8a7790 SCIF support Add SCIF serial port support to the r8a7790 SoC by adding platform devices for SCIFA0 -> SCIFA2 as well as SCIFB0 -> SCIFB2 and SCIF0 -> SCIF1 together with clock bindings. DT device description is excluded at this point since such bindings are still under development. Signed-off-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-r8a7790.c | 34 +++++++++++++++- arch/arm/mach-shmobile/setup-r8a7790.c | 55 ++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/clock-r8a7790.c b/arch/arm/mach-shmobile/clock-r8a7790.c index 6869798effa3..bad9bf2e34d6 100644 --- a/arch/arm/mach-shmobile/clock-r8a7790.c +++ b/arch/arm/mach-shmobile/clock-r8a7790.c @@ -27,19 +27,51 @@ #define CPG_BASE 0xe6150000 #define CPG_LEN 0x1000 +#define SMSTPCR2 0xe6150138 +#define SMSTPCR7 0xe615014c + static struct clk_mapping cpg_mapping = { .phys = CPG_BASE, .len = CPG_LEN, }; +static struct clk p_clk = { + .rate = 65000000, /* shortcut for now */ + .mapping = &cpg_mapping, +}; + +static struct clk mp_clk = { + .rate = 52000000, /* shortcut for now */ + .mapping = &cpg_mapping, +}; + static struct clk *main_clks[] = { + &p_clk, + &mp_clk, }; -enum { MSTP_NR }; +enum { MSTP721, MSTP720, + MSTP216, MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP_NR }; static struct clk mstp_clks[MSTP_NR] = { + [MSTP721] = SH_CLK_MSTP32(&p_clk, SMSTPCR7, 21, 0), /* SCIF0 */ + [MSTP720] = SH_CLK_MSTP32(&p_clk, SMSTPCR7, 20, 0), /* SCIF1 */ + [MSTP216] = SH_CLK_MSTP32(&mp_clk, SMSTPCR2, 16, 0), /* SCIFB2 */ + [MSTP207] = SH_CLK_MSTP32(&mp_clk, SMSTPCR2, 7, 0), /* SCIFB1 */ + [MSTP206] = SH_CLK_MSTP32(&mp_clk, SMSTPCR2, 6, 0), /* SCIFB0 */ + [MSTP204] = SH_CLK_MSTP32(&mp_clk, SMSTPCR2, 4, 0), /* SCIFA0 */ + [MSTP203] = SH_CLK_MSTP32(&mp_clk, SMSTPCR2, 3, 0), /* SCIFA1 */ + [MSTP202] = SH_CLK_MSTP32(&mp_clk, SMSTPCR2, 2, 0), /* SCIFA2 */ }; static struct clk_lookup lookups[] = { + CLKDEV_DEV_ID("sh-sci.0", &mstp_clks[MSTP204]), + CLKDEV_DEV_ID("sh-sci.1", &mstp_clks[MSTP203]), + CLKDEV_DEV_ID("sh-sci.2", &mstp_clks[MSTP206]), + CLKDEV_DEV_ID("sh-sci.3", &mstp_clks[MSTP207]), + CLKDEV_DEV_ID("sh-sci.4", &mstp_clks[MSTP216]), + CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP202]), + CLKDEV_DEV_ID("sh-sci.6", &mstp_clks[MSTP721]), + CLKDEV_DEV_ID("sh-sci.7", &mstp_clks[MSTP720]), }; void __init r8a7790_clock_init(void) diff --git a/arch/arm/mach-shmobile/setup-r8a7790.c b/arch/arm/mach-shmobile/setup-r8a7790.c index af432ba11020..3bb5bf16000c 100644 --- a/arch/arm/mach-shmobile/setup-r8a7790.c +++ b/arch/arm/mach-shmobile/setup-r8a7790.c @@ -22,13 +22,68 @@ #include #include #include +#include #include #include #include #include +#define SCIF_COMMON(scif_type, baseaddr, irq) \ + .type = scif_type, \ + .mapbase = baseaddr, \ + .flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP, \ + .irqs = SCIx_IRQ_MUXED(irq) + +#define SCIFA_DATA(index, baseaddr, irq) \ +[index] = { \ + SCIF_COMMON(PORT_SCIFA, baseaddr, irq), \ + .scbrr_algo_id = SCBRR_ALGO_4, \ + .scscr = SCSCR_RE | SCSCR_TE | SCSCR_CKE0, \ +} + +#define SCIFB_DATA(index, baseaddr, irq) \ +[index] = { \ + SCIF_COMMON(PORT_SCIFB, baseaddr, irq), \ + .scbrr_algo_id = SCBRR_ALGO_4, \ + .scscr = SCSCR_RE | SCSCR_TE, \ +} + +#define SCIF_DATA(index, baseaddr, irq) \ +[index] = { \ + SCIF_COMMON(PORT_SCIF, baseaddr, irq), \ + .scbrr_algo_id = SCBRR_ALGO_2, \ + .scscr = SCSCR_RE | SCSCR_TE | SCSCR_CKE1, \ +} + +enum { SCIFA0, SCIFA1, SCIFB0, SCIFB1, SCIFB2, SCIFA2, SCIF0, SCIF1 }; + +static const struct plat_sci_port scif[] = { + SCIFA_DATA(SCIFA0, 0xe6c40000, gic_spi(144)), /* SCIFA0 */ + SCIFA_DATA(SCIFA1, 0xe6c50000, gic_spi(145)), /* SCIFA1 */ + SCIFB_DATA(SCIFB0, 0xe6c20000, gic_spi(148)), /* SCIFB0 */ + SCIFB_DATA(SCIFB1, 0xe6c30000, gic_spi(149)), /* SCIFB1 */ + SCIFB_DATA(SCIFB2, 0xe6ce0000, gic_spi(150)), /* SCIFB2 */ + SCIFA_DATA(SCIFA2, 0xe6c60000, gic_spi(151)), /* SCIFA2 */ + SCIF_DATA(SCIF0, 0xe6e60000, gic_spi(152)), /* SCIF0 */ + SCIF_DATA(SCIF1, 0xe6e68000, gic_spi(153)), /* SCIF1 */ +}; + +static inline void r8a7790_register_scif(int idx) +{ + platform_device_register_data(&platform_bus, "sh-sci", idx, &scif[idx], + sizeof(struct plat_sci_port)); +} + void __init r8a7790_add_standard_devices(void) { + r8a7790_register_scif(SCIFA0); + r8a7790_register_scif(SCIFA1); + r8a7790_register_scif(SCIFB0); + r8a7790_register_scif(SCIFB1); + r8a7790_register_scif(SCIFB2); + r8a7790_register_scif(SCIFA2); + r8a7790_register_scif(SCIF0); + r8a7790_register_scif(SCIF1); } #ifdef CONFIG_USE_OF -- GitLab From 8f5ec0a57ef503e7609d763cadba55f12b9486ce Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 28 Mar 2013 00:49:54 +0900 Subject: [PATCH 0312/3163] ARM: shmobile: r8a7790 IRQC support Add IRQC interrupt controller support to r8a7790 by hooking up a single IRQC instances to handle 4 external IRQ signals. The IRQC controller is tied to SPIs of the GIC. On r8a7790 the external IRQ pins routing is handled by the PFC which is excluded from this patch. Both platform devices and DT devices are added in this patch. The platform device versions are used to provide a static interrupt map configuration for board code written in C. Signed-off-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/boot/dts/r8a7790.dtsi | 9 +++++++++ arch/arm/mach-shmobile/Kconfig | 1 + arch/arm/mach-shmobile/setup-r8a7790.c | 21 +++++++++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi index 1c58ffb6cccf..a1e0e0c64c3c 100644 --- a/arch/arm/boot/dts/r8a7790.dtsi +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -51,4 +51,13 @@ <1 11 0xf08>, <1 10 0xf08>; }; + + irqc0: interrupt-controller@e61c0000 { + compatible = "renesas,irqc"; + #interrupt-cells = <2>; + interrupt-controller; + reg = <0xe61c0000 0x200>; + interrupt-parent = <&gic>; + interrupts = <0 0 4>, <0 1 4>, <0 2 4>, <0 3 4>; + }; }; diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index 749dfb4c63c0..ccaea6aecea0 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -57,6 +57,7 @@ config ARCH_R8A7790 select CPU_V7 select ARM_ARCH_TIMER select SH_CLK_CPG + select RENESAS_IRQC config ARCH_EMEV2 bool "Emma Mobile EV2" diff --git a/arch/arm/mach-shmobile/setup-r8a7790.c b/arch/arm/mach-shmobile/setup-r8a7790.c index 3bb5bf16000c..9b4ccd7b5031 100644 --- a/arch/arm/mach-shmobile/setup-r8a7790.c +++ b/arch/arm/mach-shmobile/setup-r8a7790.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -74,6 +75,25 @@ static inline void r8a7790_register_scif(int idx) sizeof(struct plat_sci_port)); } +static struct renesas_irqc_config irqc0_data = { + .irq_base = irq_pin(0), /* IRQ0 -> IRQ3 */ +}; + +static struct resource irqc0_resources[] = { + DEFINE_RES_MEM(0xe61c0000, 0x200), /* IRQC Event Detector Block_0 */ + DEFINE_RES_IRQ(gic_spi(0)), /* IRQ0 */ + DEFINE_RES_IRQ(gic_spi(1)), /* IRQ1 */ + DEFINE_RES_IRQ(gic_spi(2)), /* IRQ2 */ + DEFINE_RES_IRQ(gic_spi(3)), /* IRQ3 */ +}; + +#define r8a7790_register_irqc(idx) \ + platform_device_register_resndata(&platform_bus, "renesas_irqc", \ + idx, irqc##idx##_resources, \ + ARRAY_SIZE(irqc##idx##_resources), \ + &irqc##idx##_data, \ + sizeof(struct renesas_irqc_config)) + void __init r8a7790_add_standard_devices(void) { r8a7790_register_scif(SCIFA0); @@ -84,6 +104,7 @@ void __init r8a7790_add_standard_devices(void) r8a7790_register_scif(SCIFA2); r8a7790_register_scif(SCIF0); r8a7790_register_scif(SCIF1); + r8a7790_register_irqc(0); } #ifdef CONFIG_USE_OF -- GitLab From 69e351d029985a31abd41b2b8729788a01e8588d Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 28 Mar 2013 00:50:03 +0900 Subject: [PATCH 0313/3163] ARM: shmobile: r8a7790 PFC support Add a platform device for the r8a7790 PFC. Signed-off-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/Kconfig | 1 + arch/arm/mach-shmobile/include/mach/r8a7790.h | 1 + arch/arm/mach-shmobile/setup-r8a7790.c | 10 ++++++++++ 3 files changed, 12 insertions(+) diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index ccaea6aecea0..ff674c5f2d03 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -53,6 +53,7 @@ config ARCH_R8A7779 config ARCH_R8A7790 bool "R-Car H2 (R8A77900)" + select ARCH_WANT_OPTIONAL_GPIOLIB select ARM_GIC select CPU_V7 select ARM_ARCH_TIMER diff --git a/arch/arm/mach-shmobile/include/mach/r8a7790.h b/arch/arm/mach-shmobile/include/mach/r8a7790.h index f38ded61285f..9bd6f5c894bb 100644 --- a/arch/arm/mach-shmobile/include/mach/r8a7790.h +++ b/arch/arm/mach-shmobile/include/mach/r8a7790.h @@ -3,5 +3,6 @@ void r8a7790_add_standard_devices(void); void r8a7790_clock_init(void); +void r8a7790_pinmux_init(void); #endif /* __ASM_R8A7790_H__ */ diff --git a/arch/arm/mach-shmobile/setup-r8a7790.c b/arch/arm/mach-shmobile/setup-r8a7790.c index 9b4ccd7b5031..481201a4f3f5 100644 --- a/arch/arm/mach-shmobile/setup-r8a7790.c +++ b/arch/arm/mach-shmobile/setup-r8a7790.c @@ -29,6 +29,16 @@ #include #include +static const struct resource pfc_resources[] = { + DEFINE_RES_MEM(0xe6060000, 0x250), +}; + +void __init r8a7790_pinmux_init(void) +{ + platform_device_register_simple("pfc-r8a7790", -1, pfc_resources, + ARRAY_SIZE(pfc_resources)); +} + #define SCIF_COMMON(scif_type, baseaddr, irq) \ .type = scif_type, \ .mapbase = baseaddr, \ -- GitLab From 26a0d2d47f5bfb75cd14d961f9d825338d471317 Mon Sep 17 00:00:00 2001 From: Takashi Yoshii Date: Fri, 29 Mar 2013 16:45:56 +0900 Subject: [PATCH 0314/3163] ARM: shmobile: r8a73a4 SoC 64-bit DT support The r8a73a4 SoC supports LPAE and has memory window up to 0x2ffffffff. Convert to 64-bit addresses by enlarging #addr-cells and #size-cells to 2. Signed-off-by: Takashi Yoshii Signed-off-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/boot/dts/r8a73a4.dtsi | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm/boot/dts/r8a73a4.dtsi b/arch/arm/boot/dts/r8a73a4.dtsi index 7db5b504e64c..fde2a337d1ff 100644 --- a/arch/arm/boot/dts/r8a73a4.dtsi +++ b/arch/arm/boot/dts/r8a73a4.dtsi @@ -9,11 +9,11 @@ * kind, whether express or implied. */ -/include/ "skeleton.dtsi" - / { compatible = "renesas,r8a73a4"; interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; cpus { #address-cells = <1>; @@ -32,10 +32,10 @@ #interrupt-cells = <3>; #address-cells = <0>; interrupt-controller; - reg = <0xf1001000 0x1000>, - <0xf1002000 0x1000>, - <0xf1004000 0x2000>, - <0xf1006000 0x2000>; + reg = <0 0xf1001000 0 0x1000>, + <0 0xf1002000 0 0x1000>, + <0 0xf1004000 0 0x2000>, + <0 0xf1006000 0 0x2000>; interrupts = <1 9 0xf04>; gic-cpuif@4 { @@ -57,7 +57,7 @@ compatible = "renesas,irqc"; #interrupt-cells = <2>; interrupt-controller; - reg = <0xe61c0000 0x200>; + reg = <0 0xe61c0000 0 0x200>; interrupt-parent = <&gic>; interrupts = <0 0 4>, <0 1 4>, <0 2 4>, <0 3 4>, <0 4 4>, <0 5 4>, <0 6 4>, <0 7 4>, @@ -73,7 +73,7 @@ compatible = "renesas,irqc"; #interrupt-cells = <2>; interrupt-controller; - reg = <0xe61c0200 0x200>; + reg = <0 0xe61c0200 0 0x200>; interrupt-parent = <&gic>; interrupts = <0 32 4>, <0 33 4>, <0 34 4>, <0 35 4>, <0 36 4>, <0 37 4>, <0 38 4>, <0 39 4>, @@ -86,8 +86,8 @@ thermal@e61f0000 { compatible = "renesas,rcar-thermal"; - reg = <0xe61f0000 0x14>, <0xe61f0100 0x38>, - <0xe61f0200 0x38>, <0xe61f0300 0x38>; + reg = <0 0xe61f0000 0 0x14>, <0 0xe61f0100 0 0x38>, + <0 0xe61f0200 0 0x38>, <0 0xe61f0300 0 0x38>; interrupt-parent = <&gic>; interrupts = <0 69 4>; }; -- GitLab From 8585deb18580d04209a2986430aa0959ef38fce2 Mon Sep 17 00:00:00 2001 From: Takashi Yoshii Date: Fri, 29 Mar 2013 16:49:17 +0900 Subject: [PATCH 0315/3163] ARM: shmobile: r8a7790 SoC 64-bit DT support The r8a7790 SoC supports LPAE and has memory window up to 0x2ffffffff. Convert to 64-bit addresses by enlarging #addr-cells and #size-cells to 2. Signed-off-by: Takashi Yoshii Signed-off-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/boot/dts/r8a7790.dtsi | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi index a1e0e0c64c3c..7a1711027e41 100644 --- a/arch/arm/boot/dts/r8a7790.dtsi +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -8,11 +8,11 @@ * kind, whether express or implied. */ -/include/ "skeleton.dtsi" - / { compatible = "renesas,r8a7790"; interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; cpus { #address-cells = <1>; @@ -31,10 +31,10 @@ #interrupt-cells = <3>; #address-cells = <0>; interrupt-controller; - reg = <0xf1001000 0x1000>, - <0xf1002000 0x1000>, - <0xf1004000 0x2000>, - <0xf1006000 0x2000>; + reg = <0 0xf1001000 0 0x1000>, + <0 0xf1002000 0 0x1000>, + <0 0xf1004000 0 0x2000>, + <0 0xf1006000 0 0x2000>; interrupts = <1 9 0xf04>; gic-cpuif@4 { @@ -56,7 +56,7 @@ compatible = "renesas,irqc"; #interrupt-cells = <2>; interrupt-controller; - reg = <0xe61c0000 0x200>; + reg = <0 0xe61c0000 0 0x200>; interrupt-parent = <&gic>; interrupts = <0 0 4>, <0 1 4>, <0 2 4>, <0 3 4>; }; -- GitLab From 181387da2d64c3129e5b5186c4dd388bc5041d53 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 1 Apr 2013 19:08:06 -0700 Subject: [PATCH 0316/3163] writeback: remove unused bdi_pending_list There's no user left. Remove it. Signed-off-by: Tejun Heo Reviewed-by: Jan Kara Cc: Jens Axboe Cc: Fengguang Wu --- include/linux/backing-dev.h | 1 - mm/backing-dev.c | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 350459910fe1..a5ef27f5411a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -130,7 +130,6 @@ void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); extern spinlock_t bdi_lock; extern struct list_head bdi_list; -extern struct list_head bdi_pending_list; static inline int wb_has_dirty_io(struct bdi_writeback *wb) { diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 41733c5dc820..657569b3fcf6 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -31,13 +31,11 @@ EXPORT_SYMBOL_GPL(noop_backing_dev_info); static struct class *bdi_class; /* - * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as - * reader side protection for bdi_pending_list. bdi_list has RCU reader side + * bdi_lock protects updates to bdi_list. bdi_list has RCU reader side * locking. */ DEFINE_SPINLOCK(bdi_lock); LIST_HEAD(bdi_list); -LIST_HEAD(bdi_pending_list); void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) { -- GitLab From 839a8e8660b6777e7fe4e80af1a048aebe2b5977 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 1 Apr 2013 19:08:06 -0700 Subject: [PATCH 0317/3163] writeback: replace custom worker pool implementation with unbound workqueue Writeback implements its own worker pool - each bdi can be associated with a worker thread which is created and destroyed dynamically. The worker thread for the default bdi is always present and serves as the "forker" thread which forks off worker threads for other bdis. there's no reason for writeback to implement its own worker pool when using unbound workqueue instead is much simpler and more efficient. This patch replaces custom worker pool implementation in writeback with an unbound workqueue. The conversion isn't too complicated but the followings are worth mentioning. * bdi_writeback->last_active, task and wakeup_timer are removed. delayed_work ->dwork is added instead. Explicit timer handling is no longer necessary. Everything works by either queueing / modding / flushing / canceling the delayed_work item. * bdi_writeback_thread() becomes bdi_writeback_workfn() which runs off bdi_writeback->dwork. On each execution, it processes bdi->work_list and reschedules itself if there are more things to do. The function also handles low-mem condition, which used to be handled by the forker thread. If the function is running off a rescuer thread, it only writes out limited number of pages so that the rescuer can serve other bdis too. This preserves the flusher creation failure behavior of the forker thread. * INIT_LIST_HEAD(&bdi->bdi_list) is used to tell bdi_writeback_workfn() about on-going bdi unregistration so that it always drains work_list even if it's running off the rescuer. Note that the original code was broken in this regard. Under memory pressure, a bdi could finish unregistration with non-empty work_list. * The default bdi is no longer special. It now is treated the same as any other bdi and bdi_cap_flush_forker() is removed. * BDI_pending is no longer used. Removed. * Some tracepoints become non-applicable. The following TPs are removed - writeback_nothread, writeback_wake_thread, writeback_wake_forker_thread, writeback_thread_start, writeback_thread_stop. Everything, including devices coming and going away and rescuer operation under simulated memory pressure, seems to work fine in my test setup. Signed-off-by: Tejun Heo Reviewed-by: Jan Kara Cc: Jens Axboe Cc: Fengguang Wu Cc: Jeff Moyer --- fs/fs-writeback.c | 102 ++++--------- include/linux/backing-dev.h | 15 +- include/trace/events/writeback.h | 5 - mm/backing-dev.c | 255 ++++--------------------------- 4 files changed, 65 insertions(+), 312 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 21f46fb3a101..8067d3719e94 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -88,20 +87,6 @@ static inline struct inode *wb_inode(struct list_head *head) #define CREATE_TRACE_POINTS #include -/* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */ -static void bdi_wakeup_flusher(struct backing_dev_info *bdi) -{ - if (bdi->wb.task) { - wake_up_process(bdi->wb.task); - } else { - /* - * The bdi thread isn't there, wake up the forker thread which - * will create and run it. - */ - wake_up_process(default_backing_dev_info.wb.task); - } -} - static void bdi_queue_work(struct backing_dev_info *bdi, struct wb_writeback_work *work) { @@ -109,10 +94,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi, spin_lock_bh(&bdi->wb_lock); list_add_tail(&work->list, &bdi->work_list); - if (!bdi->wb.task) - trace_writeback_nothread(bdi, work); - bdi_wakeup_flusher(bdi); spin_unlock_bh(&bdi->wb_lock); + + mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); } static void @@ -127,10 +111,8 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, */ work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) { - if (bdi->wb.task) { - trace_writeback_nowork(bdi); - wake_up_process(bdi->wb.task); - } + trace_writeback_nowork(bdi); + mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); return; } @@ -177,9 +159,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) * writeback as soon as there is no other work to do. */ trace_writeback_wake_background(bdi); - spin_lock_bh(&bdi->wb_lock); - bdi_wakeup_flusher(bdi); - spin_unlock_bh(&bdi->wb_lock); + mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); } /* @@ -1020,66 +1000,48 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) /* * Handle writeback of dirty data for the device backed by this bdi. Also - * wakes up periodically and does kupdated style flushing. + * reschedules periodically and does kupdated style flushing. */ -int bdi_writeback_thread(void *data) +void bdi_writeback_workfn(struct work_struct *work) { - struct bdi_writeback *wb = data; + struct bdi_writeback *wb = container_of(to_delayed_work(work), + struct bdi_writeback, dwork); struct backing_dev_info *bdi = wb->bdi; long pages_written; current->flags |= PF_SWAPWRITE; - set_freezable(); - wb->last_active = jiffies; - - /* - * Our parent may run at a different priority, just set us to normal - */ - set_user_nice(current, 0); - - trace_writeback_thread_start(bdi); - while (!kthread_freezable_should_stop(NULL)) { + if (likely(!current_is_workqueue_rescuer() || + list_empty(&bdi->bdi_list))) { /* - * Remove own delayed wake-up timer, since we are already awake - * and we'll take care of the periodic write-back. + * The normal path. Keep writing back @bdi until its + * work_list is empty. Note that this path is also taken + * if @bdi is shutting down even when we're running off the + * rescuer as work_list needs to be drained. */ - del_timer(&wb->wakeup_timer); - - pages_written = wb_do_writeback(wb, 0); - + do { + pages_written = wb_do_writeback(wb, 0); + trace_writeback_pages_written(pages_written); + } while (!list_empty(&bdi->work_list)); + } else { + /* + * bdi_wq can't get enough workers and we're running off + * the emergency worker. Don't hog it. Hopefully, 1024 is + * enough for efficient IO. + */ + pages_written = writeback_inodes_wb(&bdi->wb, 1024, + WB_REASON_FORKER_THREAD); trace_writeback_pages_written(pages_written); - - if (pages_written) - wb->last_active = jiffies; - - set_current_state(TASK_INTERRUPTIBLE); - if (!list_empty(&bdi->work_list) || kthread_should_stop()) { - __set_current_state(TASK_RUNNING); - continue; - } - - if (wb_has_dirty_io(wb) && dirty_writeback_interval) - schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); - else { - /* - * We have nothing to do, so can go sleep without any - * timeout and save power. When a work is queued or - * something is made dirty - we will be woken up. - */ - schedule(); - } } - /* Flush any work that raced with us exiting */ - if (!list_empty(&bdi->work_list)) - wb_do_writeback(wb, 1); + if (!list_empty(&bdi->work_list) || + (wb_has_dirty_io(wb) && dirty_writeback_interval)) + queue_delayed_work(bdi_wq, &wb->dwork, + msecs_to_jiffies(dirty_writeback_interval * 10)); - trace_writeback_thread_stop(bdi); - return 0; + current->flags &= ~PF_SWAPWRITE; } - /* * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back * the whole world. diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index a5ef27f5411a..c3881553f7d1 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -18,6 +18,7 @@ #include #include #include +#include struct page; struct device; @@ -27,7 +28,6 @@ struct dentry; * Bits in backing_dev_info.state */ enum bdi_state { - BDI_pending, /* On its way to being activated */ BDI_wb_alloc, /* Default embedded wb allocated */ BDI_async_congested, /* The async (write) queue is getting full */ BDI_sync_congested, /* The sync queue is getting full */ @@ -53,10 +53,8 @@ struct bdi_writeback { unsigned int nr; unsigned long last_old_flush; /* last old data flush */ - unsigned long last_active; /* last time bdi thread was active */ - struct task_struct *task; /* writeback thread */ - struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */ + struct delayed_work dwork; /* work item used for writeback */ struct list_head b_dirty; /* dirty inodes */ struct list_head b_io; /* parked for writeback */ struct list_head b_more_io; /* parked for more writeback */ @@ -123,7 +121,7 @@ int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, enum wb_reason reason); void bdi_start_background_writeback(struct backing_dev_info *bdi); -int bdi_writeback_thread(void *data); +void bdi_writeback_workfn(struct work_struct *work); int bdi_has_dirty_io(struct backing_dev_info *bdi); void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); @@ -131,6 +129,8 @@ void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); extern spinlock_t bdi_lock; extern struct list_head bdi_list; +extern struct workqueue_struct *bdi_wq; + static inline int wb_has_dirty_io(struct bdi_writeback *wb) { return !list_empty(&wb->b_dirty) || @@ -335,11 +335,6 @@ static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi) return bdi->capabilities & BDI_CAP_SWAP_BACKED; } -static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi) -{ - return bdi == &default_backing_dev_info; -} - static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) { return bdi_cap_writeback_dirty(mapping->backing_dev_info); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 6a16fd2e70ed..464ea82e10db 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -183,7 +183,6 @@ DECLARE_EVENT_CLASS(writeback_work_class, DEFINE_EVENT(writeback_work_class, name, \ TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \ TP_ARGS(bdi, work)) -DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread); DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); DEFINE_WRITEBACK_WORK_EVENT(writeback_start); @@ -222,12 +221,8 @@ DEFINE_EVENT(writeback_class, name, \ DEFINE_WRITEBACK_EVENT(writeback_nowork); DEFINE_WRITEBACK_EVENT(writeback_wake_background); -DEFINE_WRITEBACK_EVENT(writeback_wake_thread); -DEFINE_WRITEBACK_EVENT(writeback_wake_forker_thread); DEFINE_WRITEBACK_EVENT(writeback_bdi_register); DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); -DEFINE_WRITEBACK_EVENT(writeback_thread_start); -DEFINE_WRITEBACK_EVENT(writeback_thread_stop); DECLARE_EVENT_CLASS(wbc_class, TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 657569b3fcf6..2857d4f6bca4 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -37,6 +37,9 @@ static struct class *bdi_class; DEFINE_SPINLOCK(bdi_lock); LIST_HEAD(bdi_list); +/* bdi_wq serves all asynchronous writeback tasks */ +struct workqueue_struct *bdi_wq; + void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) { if (wb1 < wb2) { @@ -255,6 +258,11 @@ static int __init default_bdi_init(void) { int err; + bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE | + WQ_UNBOUND, 0); + if (!bdi_wq) + return -ENOMEM; + err = bdi_init(&default_backing_dev_info); if (!err) bdi_register(&default_backing_dev_info, NULL, "default"); @@ -269,26 +277,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi) return wb_has_dirty_io(&bdi->wb); } -static void wakeup_timer_fn(unsigned long data) -{ - struct backing_dev_info *bdi = (struct backing_dev_info *)data; - - spin_lock_bh(&bdi->wb_lock); - if (bdi->wb.task) { - trace_writeback_wake_thread(bdi); - wake_up_process(bdi->wb.task); - } else if (bdi->dev) { - /* - * When bdi tasks are inactive for long time, they are killed. - * In this case we have to wake-up the forker thread which - * should create and run the bdi thread. - */ - trace_writeback_wake_forker_thread(bdi); - wake_up_process(default_backing_dev_info.wb.task); - } - spin_unlock_bh(&bdi->wb_lock); -} - /* * This function is used when the first inode for this bdi is marked dirty. It * wakes-up the corresponding bdi thread which should then take care of the @@ -305,176 +293,7 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi) unsigned long timeout; timeout = msecs_to_jiffies(dirty_writeback_interval * 10); - mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout); -} - -/* - * Calculate the longest interval (jiffies) bdi threads are allowed to be - * inactive. - */ -static unsigned long bdi_longest_inactive(void) -{ - unsigned long interval; - - interval = msecs_to_jiffies(dirty_writeback_interval * 10); - return max(5UL * 60 * HZ, interval); -} - -/* - * Clear pending bit and wakeup anybody waiting for flusher thread creation or - * shutdown - */ -static void bdi_clear_pending(struct backing_dev_info *bdi) -{ - clear_bit(BDI_pending, &bdi->state); - smp_mb__after_clear_bit(); - wake_up_bit(&bdi->state, BDI_pending); -} - -static int bdi_forker_thread(void *ptr) -{ - struct bdi_writeback *me = ptr; - - current->flags |= PF_SWAPWRITE; - set_freezable(); - - /* - * Our parent may run at a different priority, just set us to normal - */ - set_user_nice(current, 0); - - for (;;) { - struct task_struct *task = NULL; - struct backing_dev_info *bdi; - enum { - NO_ACTION, /* Nothing to do */ - FORK_THREAD, /* Fork bdi thread */ - KILL_THREAD, /* Kill inactive bdi thread */ - } action = NO_ACTION; - - /* - * Temporary measure, we want to make sure we don't see - * dirty data on the default backing_dev_info - */ - if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) { - del_timer(&me->wakeup_timer); - wb_do_writeback(me, 0); - } - - spin_lock_bh(&bdi_lock); - /* - * In the following loop we are going to check whether we have - * some work to do without any synchronization with tasks - * waking us up to do work for them. Set the task state here - * so that we don't miss wakeups after verifying conditions. - */ - set_current_state(TASK_INTERRUPTIBLE); - - list_for_each_entry(bdi, &bdi_list, bdi_list) { - bool have_dirty_io; - - if (!bdi_cap_writeback_dirty(bdi) || - bdi_cap_flush_forker(bdi)) - continue; - - WARN(!test_bit(BDI_registered, &bdi->state), - "bdi %p/%s is not registered!\n", bdi, bdi->name); - - have_dirty_io = !list_empty(&bdi->work_list) || - wb_has_dirty_io(&bdi->wb); - - /* - * If the bdi has work to do, but the thread does not - * exist - create it. - */ - if (!bdi->wb.task && have_dirty_io) { - /* - * Set the pending bit - if someone will try to - * unregister this bdi - it'll wait on this bit. - */ - set_bit(BDI_pending, &bdi->state); - action = FORK_THREAD; - break; - } - - spin_lock(&bdi->wb_lock); - - /* - * If there is no work to do and the bdi thread was - * inactive long enough - kill it. The wb_lock is taken - * to make sure no-one adds more work to this bdi and - * wakes the bdi thread up. - */ - if (bdi->wb.task && !have_dirty_io && - time_after(jiffies, bdi->wb.last_active + - bdi_longest_inactive())) { - task = bdi->wb.task; - bdi->wb.task = NULL; - spin_unlock(&bdi->wb_lock); - set_bit(BDI_pending, &bdi->state); - action = KILL_THREAD; - break; - } - spin_unlock(&bdi->wb_lock); - } - spin_unlock_bh(&bdi_lock); - - /* Keep working if default bdi still has things to do */ - if (!list_empty(&me->bdi->work_list)) - __set_current_state(TASK_RUNNING); - - switch (action) { - case FORK_THREAD: - __set_current_state(TASK_RUNNING); - task = kthread_create(bdi_writeback_thread, &bdi->wb, - "flush-%s", dev_name(bdi->dev)); - if (IS_ERR(task)) { - /* - * If thread creation fails, force writeout of - * the bdi from the thread. Hopefully 1024 is - * large enough for efficient IO. - */ - writeback_inodes_wb(&bdi->wb, 1024, - WB_REASON_FORKER_THREAD); - } else { - /* - * The spinlock makes sure we do not lose - * wake-ups when racing with 'bdi_queue_work()'. - * And as soon as the bdi thread is visible, we - * can start it. - */ - spin_lock_bh(&bdi->wb_lock); - bdi->wb.task = task; - spin_unlock_bh(&bdi->wb_lock); - wake_up_process(task); - } - bdi_clear_pending(bdi); - break; - - case KILL_THREAD: - __set_current_state(TASK_RUNNING); - kthread_stop(task); - bdi_clear_pending(bdi); - break; - - case NO_ACTION: - if (!wb_has_dirty_io(me) || !dirty_writeback_interval) - /* - * There are no dirty data. The only thing we - * should now care about is checking for - * inactive bdi threads and killing them. Thus, - * let's sleep for longer time, save energy and - * be friendly for battery-driven devices. - */ - schedule_timeout(bdi_longest_inactive()); - else - schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); - try_to_freeze(); - break; - } - } - - return 0; + mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout); } /* @@ -487,6 +306,9 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi) spin_unlock_bh(&bdi_lock); synchronize_rcu_expedited(); + + /* bdi_list is now unused, clear it to mark @bdi dying */ + INIT_LIST_HEAD(&bdi->bdi_list); } int bdi_register(struct backing_dev_info *bdi, struct device *parent, @@ -506,20 +328,6 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, bdi->dev = dev; - /* - * Just start the forker thread for our default backing_dev_info, - * and add other bdi's to the list. They will get a thread created - * on-demand when they need it. - */ - if (bdi_cap_flush_forker(bdi)) { - struct bdi_writeback *wb = &bdi->wb; - - wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s", - dev_name(dev)); - if (IS_ERR(wb->task)) - return PTR_ERR(wb->task); - } - bdi_debug_register(bdi, dev_name(dev)); set_bit(BDI_registered, &bdi->state); @@ -543,8 +351,6 @@ EXPORT_SYMBOL(bdi_register_dev); */ static void bdi_wb_shutdown(struct backing_dev_info *bdi) { - struct task_struct *task; - if (!bdi_cap_writeback_dirty(bdi)) return; @@ -554,22 +360,20 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) bdi_remove_from_list(bdi); /* - * If setup is pending, wait for that to complete first + * Drain work list and shutdown the delayed_work. At this point, + * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi + * is dying and its work_list needs to be drained no matter what. */ - wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait, - TASK_UNINTERRUPTIBLE); + mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); + flush_delayed_work(&bdi->wb.dwork); + WARN_ON(!list_empty(&bdi->work_list)); /* - * Finally, kill the kernel thread. We don't need to be RCU - * safe anymore, since the bdi is gone from visibility. + * This shouldn't be necessary unless @bdi for some reason has + * unflushed dirty IO after work_list is drained. Do it anyway + * just in case. */ - spin_lock_bh(&bdi->wb_lock); - task = bdi->wb.task; - bdi->wb.task = NULL; - spin_unlock_bh(&bdi->wb_lock); - - if (task) - kthread_stop(task); + cancel_delayed_work_sync(&bdi->wb.dwork); } /* @@ -595,10 +399,8 @@ void bdi_unregister(struct backing_dev_info *bdi) bdi_set_min_ratio(bdi, 0); trace_writeback_bdi_unregister(bdi); bdi_prune_sb(bdi); - del_timer_sync(&bdi->wb.wakeup_timer); - if (!bdi_cap_flush_forker(bdi)) - bdi_wb_shutdown(bdi); + bdi_wb_shutdown(bdi); bdi_debug_unregister(bdi); spin_lock_bh(&bdi->wb_lock); @@ -620,7 +422,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) INIT_LIST_HEAD(&wb->b_io); INIT_LIST_HEAD(&wb->b_more_io); spin_lock_init(&wb->list_lock); - setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi); + INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn); } /* @@ -693,12 +495,11 @@ void bdi_destroy(struct backing_dev_info *bdi) bdi_unregister(bdi); /* - * If bdi_unregister() had already been called earlier, the - * wakeup_timer could still be armed because bdi_prune_sb() - * can race with the bdi_wakeup_thread_delayed() calls from - * __mark_inode_dirty(). + * If bdi_unregister() had already been called earlier, the dwork + * could still be pending because bdi_prune_sb() can race with the + * bdi_wakeup_thread_delayed() calls from __mark_inode_dirty(). */ - del_timer_sync(&bdi->wb.wakeup_timer); + cancel_delayed_work_sync(&bdi->wb.dwork); for (i = 0; i < NR_BDI_STAT_ITEMS; i++) percpu_counter_destroy(&bdi->bdi_stat[i]); -- GitLab From b5c872ddb7083c7909fb76a170c3807e04564bb3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 1 Apr 2013 19:08:06 -0700 Subject: [PATCH 0318/3163] writeback: expose the bdi_wq workqueue There are cases where userland wants to tweak the priority and affinity of writeback flushers. Expose bdi_wq to userland by setting WQ_SYSFS. It appears under /sys/bus/workqueue/devices/writeback/ and allows adjusting maximum concurrency level, cpumask and nice level. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Fengguang Wu Cc: Jeff Moyer Cc: Kay Sievers Cc: Greg Kroah-Hartman --- mm/backing-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 2857d4f6bca4..502517492258 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -259,7 +259,7 @@ static int __init default_bdi_init(void) int err; bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE | - WQ_UNBOUND, 0); + WQ_UNBOUND | WQ_SYSFS, 0); if (!bdi_wq) return -ENOMEM; -- GitLab From 1960d58003b735555bb2dc56699530d606698574 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 26 Mar 2013 01:44:52 +0100 Subject: [PATCH 0319/3163] sh-pfc: Fix compiler warning when BUG() The sh_pfc_phys_to_virt() function ends with a BUG() statement without a return. When CONFIG_BUG isn't set the function will thus have no return value. Fix it. Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/sh-pfc/core.c b/drivers/pinctrl/sh-pfc/core.c index feef89792568..97e6ea3147e0 100644 --- a/drivers/pinctrl/sh-pfc/core.c +++ b/drivers/pinctrl/sh-pfc/core.c @@ -72,6 +72,7 @@ static void __iomem *sh_pfc_phys_to_virt(struct sh_pfc *pfc, } BUG(); + return NULL; } int sh_pfc_get_pin_index(struct sh_pfc *pfc, unsigned int pin) -- GitLab From 6299e571b2d90dffcee7b58a9252f3ef29241922 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 26 Mar 2013 02:07:51 +0100 Subject: [PATCH 0320/3163] sh: sh7269: Fix compilation by adding missing includes struct resource is defined in linux/ioport.h. Include it. Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- arch/sh/kernel/cpu/sh2a/pinmux-sh7269.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/sh/kernel/cpu/sh2a/pinmux-sh7269.c b/arch/sh/kernel/cpu/sh2a/pinmux-sh7269.c index 1825b0bd523d..4c17fb6970b1 100644 --- a/arch/sh/kernel/cpu/sh2a/pinmux-sh7269.c +++ b/arch/sh/kernel/cpu/sh2a/pinmux-sh7269.c @@ -9,7 +9,9 @@ * for more details. */ +#include #include +#include #include #include -- GitLab From c09b51d5de108acbf9ae176cc81325afd79648dd Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 7 Mar 2013 14:33:32 +0100 Subject: [PATCH 0321/3163] sh-pfc: r8a7779: Remove function GPIOs All r8a7779 platforms use the pinctrl API to control functions. Function GPIOs are unused and unneeded, remove them. Signed-off-by: Laurent Pinchart Acked-by: Linus Walleij Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/pfc-r8a7779.c | 271 --------------------------- 1 file changed, 271 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7779.c b/drivers/pinctrl/sh-pfc/pfc-r8a7779.c index 1d7b0dfbbb21..4db99445acc2 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7779.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7779.c @@ -2670,274 +2670,6 @@ static const struct sh_pfc_function pinmux_functions[] = { SH_PFC_FUNCTION(usb2), }; -#define PINMUX_FN_BASE ARRAY_SIZE(pinmux_pins) - -static const struct pinmux_func pinmux_func_gpios[] = { - GPIO_FN(AVS1), GPIO_FN(AVS2), GPIO_FN(A17), GPIO_FN(A18), - GPIO_FN(A19), - - /* IPSR0 */ - GPIO_FN(PWM1), GPIO_FN(PWMFSW0), - GPIO_FN(SCIF_CLK), GPIO_FN(TCLK0_C), GPIO_FN(BS), - GPIO_FN(FD2), GPIO_FN(ATADIR0), GPIO_FN(SDSELF), - GPIO_FN(HCTS1), GPIO_FN(A0), - GPIO_FN(FD3), GPIO_FN(A20), - GPIO_FN(A21), - GPIO_FN(A22), - GPIO_FN(VI1_R0), GPIO_FN(A23), GPIO_FN(FCLE), - GPIO_FN(VI1_R1), GPIO_FN(A24), - GPIO_FN(FD4), GPIO_FN(VI1_R2), - GPIO_FN(SSI_WS78_B), GPIO_FN(A25), - GPIO_FN(FD5), GPIO_FN(VI1_R3), - GPIO_FN(SSI_SDATA7_B), GPIO_FN(CLKOUT), - GPIO_FN(PWM0_B), - GPIO_FN(SDSELF_B), GPIO_FN(RD_WR), GPIO_FN(FWE), GPIO_FN(ATAG0), - GPIO_FN(VI1_R7), GPIO_FN(HRTS1), - - /* IPSR1 */ - GPIO_FN(FD6), GPIO_FN(FD7), - GPIO_FN(FALE), - GPIO_FN(ATACS00), - GPIO_FN(FRE), GPIO_FN(ATACS10), GPIO_FN(VI1_R4), - GPIO_FN(HSCK1), GPIO_FN(SSI_SDATA8_B), - GPIO_FN(SSI_SDATA9), - GPIO_FN(FD0), GPIO_FN(ATARD0), GPIO_FN(VI1_R5), - GPIO_FN(HTX1), - GPIO_FN(SSI_SCK9), - GPIO_FN(FD1), GPIO_FN(ATAWR0), GPIO_FN(VI1_R6), - GPIO_FN(HRX1), GPIO_FN(SSI_WS9), - GPIO_FN(MLB_CLK), GPIO_FN(PWM2), GPIO_FN(MLB_SIG), - GPIO_FN(PWM3), GPIO_FN(MLB_DAT), GPIO_FN(PWM4), - GPIO_FN(HTX0), GPIO_FN(SDATA), - GPIO_FN(SUB_TCK), GPIO_FN(CC5_STATE2), - GPIO_FN(CC5_STATE10), GPIO_FN(CC5_STATE18), GPIO_FN(CC5_STATE26), - GPIO_FN(CC5_STATE34), - - /* IPSR2 */ - GPIO_FN(HRX0), GPIO_FN(SCKZ), - GPIO_FN(SUB_TDI), GPIO_FN(CC5_STATE3), GPIO_FN(CC5_STATE11), - GPIO_FN(CC5_STATE19), GPIO_FN(CC5_STATE27), GPIO_FN(CC5_STATE35), - GPIO_FN(HSCK0), GPIO_FN(MTS), GPIO_FN(PWM5), - GPIO_FN(SSI_SDATA9_B), GPIO_FN(SUB_TDO), - GPIO_FN(CC5_STATE0), GPIO_FN(CC5_STATE8), GPIO_FN(CC5_STATE16), - GPIO_FN(CC5_STATE24), GPIO_FN(CC5_STATE32), GPIO_FN(HCTS0), - GPIO_FN(STM), GPIO_FN(PWM0_D), - GPIO_FN(SCIF_CLK_C), GPIO_FN(SUB_TRST), GPIO_FN(TCLK1_B), - GPIO_FN(CC5_OSCOUT), GPIO_FN(HRTS0), - GPIO_FN(MDATA), GPIO_FN(SUB_TMS), GPIO_FN(CC5_STATE1), - GPIO_FN(CC5_STATE9), GPIO_FN(CC5_STATE17), GPIO_FN(CC5_STATE25), - GPIO_FN(CC5_STATE33), GPIO_FN(LCDOUT0), - GPIO_FN(DREQ0), GPIO_FN(GPS_CLK_B), GPIO_FN(AUDATA0), - GPIO_FN(LCDOUT1), GPIO_FN(DACK0), - GPIO_FN(DRACK0), GPIO_FN(GPS_SIGN_B), GPIO_FN(AUDATA1), - GPIO_FN(LCDOUT2), GPIO_FN(LCDOUT3), - GPIO_FN(LCDOUT4), GPIO_FN(LCDOUT5), - GPIO_FN(LCDOUT6), GPIO_FN(LCDOUT7), - GPIO_FN(LCDOUT8), GPIO_FN(DREQ1), GPIO_FN(SCL2), - GPIO_FN(AUDATA2), - - /* IPSR3 */ - GPIO_FN(LCDOUT9), GPIO_FN(DACK1), GPIO_FN(SDA2), - GPIO_FN(AUDATA3), GPIO_FN(LCDOUT10), - GPIO_FN(LCDOUT11), - GPIO_FN(LCDOUT12), GPIO_FN(LCDOUT13), - GPIO_FN(LCDOUT14), - GPIO_FN(LCDOUT15), GPIO_FN(LCDOUT16), - GPIO_FN(EX_WAIT1), GPIO_FN(SCL1), GPIO_FN(TCLK1), GPIO_FN(AUDATA4), - GPIO_FN(LCDOUT17), GPIO_FN(EX_WAIT2), GPIO_FN(SDA1), - GPIO_FN(GPS_MAG_B), GPIO_FN(AUDATA5), - GPIO_FN(LCDOUT18), - GPIO_FN(LCDOUT19), GPIO_FN(LCDOUT20), - GPIO_FN(LCDOUT21), - GPIO_FN(LCDOUT22), GPIO_FN(LCDOUT23), - GPIO_FN(QSTVA_QVS), - GPIO_FN(SCL3_B), GPIO_FN(QCLK), - GPIO_FN(QSTVB_QVE), - GPIO_FN(SDA3_B), GPIO_FN(SDA2_C), GPIO_FN(DACK0_B), GPIO_FN(DRACK0_B), - GPIO_FN(QSTH_QHS), - GPIO_FN(QSTB_QHE), - GPIO_FN(QCPV_QDE), - GPIO_FN(CAN1_TX), GPIO_FN(SCL2_C), GPIO_FN(REMOCON), - - /* IPSR4 */ - GPIO_FN(QPOLA), GPIO_FN(CAN_CLK_C), - GPIO_FN(QPOLB), GPIO_FN(CAN1_RX), - GPIO_FN(DREQ0_B), GPIO_FN(SSI_SCK78_B), - GPIO_FN(VI2_DATA0_VI2_B0), GPIO_FN(PWM6), - GPIO_FN(AUDCK), - GPIO_FN(PWMFSW0_B), GPIO_FN(VI2_DATA1_VI2_B1), - GPIO_FN(PWM0), - GPIO_FN(AUDSYNC), GPIO_FN(VI2_G0), - GPIO_FN(VI2_G1), GPIO_FN(VI2_G2), - GPIO_FN(VI2_G3), GPIO_FN(VI2_G4), - GPIO_FN(VI2_G5), - GPIO_FN(VI2_DATA2_VI2_B2), GPIO_FN(SCL1_B), - GPIO_FN(AUDATA6), - GPIO_FN(VI2_DATA3_VI2_B3), GPIO_FN(SDA1_B), - GPIO_FN(AUDATA7), - GPIO_FN(VI2_G6), GPIO_FN(VI2_G7), - GPIO_FN(VI2_R0), GPIO_FN(VI2_R1), - GPIO_FN(VI2_R2), GPIO_FN(VI2_R3), - GPIO_FN(VI2_DATA4_VI2_B4), GPIO_FN(SCL2_B), - - /* IPSR5 */ - GPIO_FN(VI2_DATA5_VI2_B5), GPIO_FN(SDA2_B), - GPIO_FN(VI2_R4), GPIO_FN(VI2_R5), - GPIO_FN(VI2_R6), GPIO_FN(VI2_R7), - GPIO_FN(SCL2_D), GPIO_FN(SDA2_D), - GPIO_FN(VI2_CLKENB), - GPIO_FN(SCL1_D), GPIO_FN(VI2_FIELD), - GPIO_FN(SDA1_D), GPIO_FN(VI2_HSYNC), - GPIO_FN(VI3_HSYNC), GPIO_FN(VI2_VSYNC), - GPIO_FN(VI3_VSYNC), - GPIO_FN(VI2_CLK), - GPIO_FN(VI1_CLKENB), GPIO_FN(VI3_CLKENB), - GPIO_FN(AUDIO_CLKC), GPIO_FN(SPEEDIN), - GPIO_FN(GPS_SIGN_D), GPIO_FN(VI2_DATA6_VI2_B6), - GPIO_FN(TCLK0), GPIO_FN(QSTVA_B_QVS_B), - GPIO_FN(AUDIO_CLKOUT_B), GPIO_FN(GPS_MAG_D), - GPIO_FN(VI2_DATA7_VI2_B7), - GPIO_FN(VI1_FIELD), - GPIO_FN(VI3_FIELD), GPIO_FN(AUDIO_CLKOUT), - GPIO_FN(GPS_CLK_C), GPIO_FN(GPS_CLK_D), GPIO_FN(AUDIO_CLKA), - GPIO_FN(CAN_TXCLK), GPIO_FN(AUDIO_CLKB), - GPIO_FN(CAN_DEBUGOUT0), GPIO_FN(MOUT0), - - /* IPSR6 */ - GPIO_FN(SSI_SCK0129), GPIO_FN(CAN_DEBUGOUT1), GPIO_FN(MOUT1), - GPIO_FN(SSI_WS0129), GPIO_FN(CAN_DEBUGOUT2), GPIO_FN(MOUT2), - GPIO_FN(SSI_SDATA0), GPIO_FN(CAN_DEBUGOUT3), GPIO_FN(MOUT5), - GPIO_FN(SSI_SDATA1), GPIO_FN(CAN_DEBUGOUT4), GPIO_FN(MOUT6), - GPIO_FN(SSI_SDATA2), GPIO_FN(CAN_DEBUGOUT5), GPIO_FN(SSI_SCK34), - GPIO_FN(CAN_DEBUGOUT6), GPIO_FN(CAN0_TX_B), GPIO_FN(IERX), - GPIO_FN(SSI_SCK9_C), GPIO_FN(SSI_WS34), GPIO_FN(CAN_DEBUGOUT7), - GPIO_FN(CAN0_RX_B), GPIO_FN(IETX), GPIO_FN(SSI_WS9_C), - GPIO_FN(SSI_SDATA3), GPIO_FN(PWM0_C), GPIO_FN(CAN_DEBUGOUT8), - GPIO_FN(CAN_CLK_B), GPIO_FN(IECLK), GPIO_FN(SCIF_CLK_B), - GPIO_FN(TCLK0_B), GPIO_FN(SSI_SDATA4), GPIO_FN(CAN_DEBUGOUT9), - GPIO_FN(SSI_SDATA9_C), GPIO_FN(SSI_SCK5), GPIO_FN(ADICLK), - GPIO_FN(CAN_DEBUGOUT10), GPIO_FN(TCLK0_D), - GPIO_FN(SSI_WS5), GPIO_FN(ADICS_SAMP), GPIO_FN(CAN_DEBUGOUT11), - GPIO_FN(SSI_SDATA5), GPIO_FN(ADIDATA), - GPIO_FN(CAN_DEBUGOUT12), GPIO_FN(SSI_SCK6), - GPIO_FN(ADICHS0), GPIO_FN(CAN0_TX), GPIO_FN(IERX_B), - - /* IPSR7 */ - GPIO_FN(SSI_WS6), GPIO_FN(ADICHS1), GPIO_FN(CAN0_RX), GPIO_FN(IETX_B), - GPIO_FN(SSI_SDATA6), GPIO_FN(ADICHS2), GPIO_FN(CAN_CLK), - GPIO_FN(IECLK_B), GPIO_FN(SSI_SCK78), GPIO_FN(CAN_DEBUGOUT13), - GPIO_FN(SSI_SCK9_B), - GPIO_FN(SSI_WS78), GPIO_FN(CAN_DEBUGOUT14), - GPIO_FN(SSI_WS9_B), GPIO_FN(SSI_SDATA7), - GPIO_FN(CAN_DEBUGOUT15), GPIO_FN(TCLK1_C), - GPIO_FN(SSI_SDATA8), GPIO_FN(VSP), - GPIO_FN(ATACS01), GPIO_FN(ATACS11), - GPIO_FN(CC5_TDO), GPIO_FN(ATADIR1), - GPIO_FN(CC5_TRST), GPIO_FN(ATAG1), - GPIO_FN(CC5_TMS), GPIO_FN(ATARD1), - GPIO_FN(CC5_TCK), GPIO_FN(ATAWR1), - GPIO_FN(CC5_TDI), GPIO_FN(DREQ2), - GPIO_FN(DACK2), - - /* IPSR8 */ - GPIO_FN(AD_CLK), - GPIO_FN(CC5_STATE4), GPIO_FN(CC5_STATE12), GPIO_FN(CC5_STATE20), - GPIO_FN(CC5_STATE28), GPIO_FN(CC5_STATE36), - GPIO_FN(AD_DI), - GPIO_FN(CC5_STATE5), GPIO_FN(CC5_STATE13), GPIO_FN(CC5_STATE21), - GPIO_FN(CC5_STATE29), GPIO_FN(CC5_STATE37), - GPIO_FN(CAN_DEBUG_HW_TRIGGER), GPIO_FN(AD_DO), - GPIO_FN(CC5_STATE6), GPIO_FN(CC5_STATE14), GPIO_FN(CC5_STATE22), - GPIO_FN(CC5_STATE30), GPIO_FN(CC5_STATE38), - GPIO_FN(CAN_STEP0), GPIO_FN(AD_NCS), GPIO_FN(CC5_STATE7), - GPIO_FN(CC5_STATE15), GPIO_FN(CC5_STATE23), GPIO_FN(CC5_STATE31), - GPIO_FN(CC5_STATE39), GPIO_FN(FMCLK), GPIO_FN(RDS_CLK), GPIO_FN(PCMOE), - GPIO_FN(BPFCLK), GPIO_FN(PCMWE), GPIO_FN(FMIN), GPIO_FN(RDS_DATA), - GPIO_FN(VI0_CLK), GPIO_FN(VI0_CLKENB), - GPIO_FN(HTX1_B), GPIO_FN(MT1_SYNC), - GPIO_FN(VI0_FIELD), GPIO_FN(HRX1_B), - GPIO_FN(VI0_HSYNC), GPIO_FN(VI0_DATA0_B_VI0_B0_B), - GPIO_FN(HSCK1_B), - GPIO_FN(VI0_VSYNC), GPIO_FN(VI0_DATA1_B_VI0_B1_B), - GPIO_FN(PWMFSW0_C), - - /* IPSR9 */ - GPIO_FN(VI0_DATA0_VI0_B0), GPIO_FN(HRTS1_B), GPIO_FN(MT1_VCXO), - GPIO_FN(VI0_DATA1_VI0_B1), GPIO_FN(HCTS1_B), GPIO_FN(MT1_PWM), - GPIO_FN(VI0_DATA2_VI0_B2), GPIO_FN(VI0_DATA3_VI0_B3), - GPIO_FN(VI0_DATA4_VI0_B4), - GPIO_FN(VI0_DATA5_VI0_B5), GPIO_FN(VI0_DATA6_VI0_B6), - GPIO_FN(ARM_TRACEDATA_0), GPIO_FN(VI0_DATA7_VI0_B7), - GPIO_FN(ARM_TRACEDATA_1), GPIO_FN(VI0_G0), - GPIO_FN(SSI_SCK78_C), GPIO_FN(ARM_TRACEDATA_2), - GPIO_FN(VI0_G1), GPIO_FN(SSI_WS78_C), - GPIO_FN(ARM_TRACEDATA_3), GPIO_FN(VI0_G2), GPIO_FN(ETH_TXD1), - GPIO_FN(ARM_TRACEDATA_4), GPIO_FN(TS_SPSYNC0), - GPIO_FN(VI0_G3), GPIO_FN(ETH_CRS_DV), - GPIO_FN(ARM_TRACEDATA_5), GPIO_FN(TS_SDAT0), GPIO_FN(VI0_G4), - GPIO_FN(ETH_TX_EN), GPIO_FN(ARM_TRACEDATA_6), - GPIO_FN(VI0_G5), GPIO_FN(ETH_RX_ER), - GPIO_FN(ARM_TRACEDATA_7), GPIO_FN(VI0_G6), GPIO_FN(ETH_RXD0), - GPIO_FN(ARM_TRACEDATA_8), GPIO_FN(VI0_G7), - GPIO_FN(ETH_RXD1), GPIO_FN(ARM_TRACEDATA_9), - - /* IPSR10 */ - GPIO_FN(VI0_R0), GPIO_FN(SSI_SDATA7_C), - GPIO_FN(DREQ1_B), GPIO_FN(ARM_TRACEDATA_10), GPIO_FN(DREQ0_C), - GPIO_FN(VI0_R1), GPIO_FN(SSI_SDATA8_C), GPIO_FN(DACK1_B), - GPIO_FN(ARM_TRACEDATA_11), GPIO_FN(DACK0_C), GPIO_FN(DRACK0_C), - GPIO_FN(VI0_R2), GPIO_FN(ETH_LINK), - GPIO_FN(ARM_TRACEDATA_12), GPIO_FN(VI0_R3), GPIO_FN(ETH_MAGIC), - GPIO_FN(ARM_TRACEDATA_13), - GPIO_FN(VI0_R4), GPIO_FN(ETH_REFCLK), - GPIO_FN(ARM_TRACEDATA_14), GPIO_FN(MT1_CLK), - GPIO_FN(TS_SCK0), GPIO_FN(VI0_R5), GPIO_FN(ETH_TXD0), - GPIO_FN(ARM_TRACEDATA_15), - GPIO_FN(MT1_D), GPIO_FN(TS_SDEN0), GPIO_FN(VI0_R6), GPIO_FN(ETH_MDC), - GPIO_FN(DREQ2_C), GPIO_FN(TRACECLK), - GPIO_FN(MT1_BEN), GPIO_FN(PWMFSW0_D), GPIO_FN(VI0_R7), - GPIO_FN(ETH_MDIO), GPIO_FN(DACK2_C), - GPIO_FN(SCIF_CLK_D), GPIO_FN(TRACECTL), GPIO_FN(MT1_PEN), - GPIO_FN(VI1_CLK), GPIO_FN(SIM_D), GPIO_FN(SDA3), GPIO_FN(VI1_HSYNC), - GPIO_FN(VI3_CLK), GPIO_FN(SSI_SCK4), GPIO_FN(GPS_SIGN_C), - GPIO_FN(PWMFSW0_E), GPIO_FN(VI1_VSYNC), GPIO_FN(AUDIO_CLKOUT_C), - GPIO_FN(SSI_WS4), GPIO_FN(SIM_CLK), GPIO_FN(GPS_MAG_C), - GPIO_FN(SPV_TRST), GPIO_FN(SCL3), - - /* IPSR11 */ - GPIO_FN(VI1_DATA0_VI1_B0), GPIO_FN(SIM_RST), - GPIO_FN(SPV_TCK), GPIO_FN(ADICLK_B), GPIO_FN(VI1_DATA1_VI1_B1), - GPIO_FN(MT0_CLK), GPIO_FN(SPV_TMS), - GPIO_FN(ADICS_B_SAMP_B), GPIO_FN(VI1_DATA2_VI1_B2), - GPIO_FN(MT0_D), GPIO_FN(SPVTDI), GPIO_FN(ADIDATA_B), - GPIO_FN(VI1_DATA3_VI1_B3), GPIO_FN(MT0_BEN), - GPIO_FN(SPV_TDO), GPIO_FN(ADICHS0_B), GPIO_FN(VI1_DATA4_VI1_B4), - GPIO_FN(MT0_PEN), GPIO_FN(SPA_TRST), - GPIO_FN(ADICHS1_B), GPIO_FN(VI1_DATA5_VI1_B5), - GPIO_FN(MT0_SYNC), GPIO_FN(SPA_TCK), - GPIO_FN(ADICHS2_B), GPIO_FN(VI1_DATA6_VI1_B6), - GPIO_FN(MT0_VCXO), GPIO_FN(SPA_TMS), - GPIO_FN(VI1_DATA7_VI1_B7), - GPIO_FN(MT0_PWM), GPIO_FN(SPA_TDI), - GPIO_FN(VI1_G0), GPIO_FN(VI3_DATA0), - GPIO_FN(TS_SCK1), GPIO_FN(DREQ2_B), GPIO_FN(SPA_TDO), - GPIO_FN(HCTS0_B), GPIO_FN(VI1_G1), GPIO_FN(VI3_DATA1), - GPIO_FN(SSI_SCK1), GPIO_FN(TS_SDEN1), GPIO_FN(DACK2_B), - GPIO_FN(HRTS0_B), - - /* IPSR12 */ - GPIO_FN(VI1_G2), GPIO_FN(VI3_DATA2), GPIO_FN(SSI_WS1), - GPIO_FN(TS_SPSYNC1), GPIO_FN(HSCK0_B), GPIO_FN(VI1_G3), - GPIO_FN(VI3_DATA3), GPIO_FN(SSI_SCK2), GPIO_FN(TS_SDAT1), - GPIO_FN(SCL1_C), GPIO_FN(HTX0_B), GPIO_FN(VI1_G4), GPIO_FN(VI3_DATA4), - GPIO_FN(SSI_WS2), GPIO_FN(SDA1_C), GPIO_FN(SIM_RST_B), - GPIO_FN(HRX0_B), GPIO_FN(VI1_G5), GPIO_FN(VI3_DATA5), - GPIO_FN(GPS_CLK), GPIO_FN(FSE), GPIO_FN(SIM_D_B), - GPIO_FN(VI1_G6), GPIO_FN(VI3_DATA6), GPIO_FN(GPS_SIGN), GPIO_FN(FRB), - GPIO_FN(SIM_CLK_B), GPIO_FN(VI1_G7), - GPIO_FN(VI3_DATA7), GPIO_FN(GPS_MAG), GPIO_FN(FCE), -}; - static const struct pinmux_cfg_reg pinmux_config_regs[] = { { PINMUX_CFG_REG("GPSR0", 0xfffc0004, 32, 1) { GP_0_31_FN, FN_IP3_31_29, @@ -3831,9 +3563,6 @@ const struct sh_pfc_soc_info r8a7779_pinmux_info = { .functions = pinmux_functions, .nr_functions = ARRAY_SIZE(pinmux_functions), - .func_gpios = pinmux_func_gpios, - .nr_func_gpios = ARRAY_SIZE(pinmux_func_gpios), - .cfg_regs = pinmux_config_regs, .data_regs = pinmux_data_regs, -- GitLab From 7417dae5214a19885220597562ea16f238d2c6fc Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 7 Mar 2013 23:47:18 +0100 Subject: [PATCH 0322/3163] sh-pfc: r8a7779: Don't use GPIO enum entries Refactor the GPIO macro magic to use GPIO numbers directly instead of the GPIO_GP_x_y enum entries. This will allow removing the GPIO enum entries from the mach/r8a7779.h header. Signed-off-by: Laurent Pinchart Acked-by: Linus Walleij Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/pfc-r8a7779.c | 104 +++++++++++++++++++-------- 1 file changed, 74 insertions(+), 30 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7779.c b/drivers/pinctrl/sh-pfc/pfc-r8a7779.c index 4db99445acc2..41d8bda45163 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7779.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7779.c @@ -19,39 +19,83 @@ */ #include -#include #include "sh_pfc.h" -#define CPU_32_PORT6(fn, pfx, sfx) \ - PORT_1(fn, pfx##0, sfx), PORT_1(fn, pfx##1, sfx), \ - PORT_1(fn, pfx##2, sfx), PORT_1(fn, pfx##3, sfx), \ - PORT_1(fn, pfx##4, sfx), PORT_1(fn, pfx##5, sfx), \ - PORT_1(fn, pfx##6, sfx), PORT_1(fn, pfx##7, sfx), \ - PORT_1(fn, pfx##8, sfx) - -#define CPU_ALL_PORT(fn, pfx, sfx) \ - PORT_32(fn, pfx##_0_, sfx), \ - PORT_32(fn, pfx##_1_, sfx), \ - PORT_32(fn, pfx##_2_, sfx), \ - PORT_32(fn, pfx##_3_, sfx), \ - PORT_32(fn, pfx##_4_, sfx), \ - PORT_32(fn, pfx##_5_, sfx), \ - CPU_32_PORT6(fn, pfx##_6_, sfx) - -#define _GP_GPIO(pfx, sfx) PINMUX_GPIO(GPIO_GP##pfx, GP##pfx##_DATA) -#define _GP_DATA(pfx, sfx) PINMUX_DATA(GP##pfx##_DATA, GP##pfx##_FN, \ - GP##pfx##_IN, GP##pfx##_OUT) - -#define _GP_INOUTSEL(pfx, sfx) GP##pfx##_IN, GP##pfx##_OUT -#define _GP_INDT(pfx, sfx) GP##pfx##_DATA - -#define GP_ALL(str) CPU_ALL_PORT(_PORT_ALL, GP, str) -#define PINMUX_GPIO_GP_ALL() CPU_ALL_PORT(_GP_GPIO, , unused) -#define PINMUX_DATA_GP_ALL() CPU_ALL_PORT(_GP_DATA, , unused) - -#define GP_INOUTSEL(bank) PORT_32_REV(_GP_INOUTSEL, _##bank##_, unused) -#define GP_INDT(bank) PORT_32_REV(_GP_INDT, _##bank##_, unused) +#define PORT_GP_1(bank, pin, fn, sfx) fn(bank, pin, GP_##bank##_##pin, sfx) + +#define PORT_GP_32(bank, fn, sfx) \ + PORT_GP_1(bank, 0, fn, sfx), PORT_GP_1(bank, 1, fn, sfx), \ + PORT_GP_1(bank, 2, fn, sfx), PORT_GP_1(bank, 3, fn, sfx), \ + PORT_GP_1(bank, 4, fn, sfx), PORT_GP_1(bank, 5, fn, sfx), \ + PORT_GP_1(bank, 6, fn, sfx), PORT_GP_1(bank, 7, fn, sfx), \ + PORT_GP_1(bank, 8, fn, sfx), PORT_GP_1(bank, 9, fn, sfx), \ + PORT_GP_1(bank, 10, fn, sfx), PORT_GP_1(bank, 11, fn, sfx), \ + PORT_GP_1(bank, 12, fn, sfx), PORT_GP_1(bank, 13, fn, sfx), \ + PORT_GP_1(bank, 14, fn, sfx), PORT_GP_1(bank, 15, fn, sfx), \ + PORT_GP_1(bank, 16, fn, sfx), PORT_GP_1(bank, 17, fn, sfx), \ + PORT_GP_1(bank, 18, fn, sfx), PORT_GP_1(bank, 19, fn, sfx), \ + PORT_GP_1(bank, 20, fn, sfx), PORT_GP_1(bank, 21, fn, sfx), \ + PORT_GP_1(bank, 22, fn, sfx), PORT_GP_1(bank, 23, fn, sfx), \ + PORT_GP_1(bank, 24, fn, sfx), PORT_GP_1(bank, 25, fn, sfx), \ + PORT_GP_1(bank, 26, fn, sfx), PORT_GP_1(bank, 27, fn, sfx), \ + PORT_GP_1(bank, 28, fn, sfx), PORT_GP_1(bank, 29, fn, sfx), \ + PORT_GP_1(bank, 30, fn, sfx), PORT_GP_1(bank, 31, fn, sfx) + +#define PORT_GP_32_9(bank, fn, sfx) \ + PORT_GP_1(bank, 0, fn, sfx), PORT_GP_1(bank, 1, fn, sfx), \ + PORT_GP_1(bank, 2, fn, sfx), PORT_GP_1(bank, 3, fn, sfx), \ + PORT_GP_1(bank, 4, fn, sfx), PORT_GP_1(bank, 5, fn, sfx), \ + PORT_GP_1(bank, 6, fn, sfx), PORT_GP_1(bank, 7, fn, sfx), \ + PORT_GP_1(bank, 8, fn, sfx) + +#define PORT_GP_32_REV(bank, fn, sfx) \ + PORT_GP_1(bank, 31, fn, sfx), PORT_GP_1(bank, 30, fn, sfx), \ + PORT_GP_1(bank, 29, fn, sfx), PORT_GP_1(bank, 28, fn, sfx), \ + PORT_GP_1(bank, 27, fn, sfx), PORT_GP_1(bank, 26, fn, sfx), \ + PORT_GP_1(bank, 25, fn, sfx), PORT_GP_1(bank, 24, fn, sfx), \ + PORT_GP_1(bank, 23, fn, sfx), PORT_GP_1(bank, 22, fn, sfx), \ + PORT_GP_1(bank, 21, fn, sfx), PORT_GP_1(bank, 20, fn, sfx), \ + PORT_GP_1(bank, 19, fn, sfx), PORT_GP_1(bank, 18, fn, sfx), \ + PORT_GP_1(bank, 17, fn, sfx), PORT_GP_1(bank, 16, fn, sfx), \ + PORT_GP_1(bank, 15, fn, sfx), PORT_GP_1(bank, 14, fn, sfx), \ + PORT_GP_1(bank, 13, fn, sfx), PORT_GP_1(bank, 12, fn, sfx), \ + PORT_GP_1(bank, 11, fn, sfx), PORT_GP_1(bank, 10, fn, sfx), \ + PORT_GP_1(bank, 9, fn, sfx), PORT_GP_1(bank, 8, fn, sfx), \ + PORT_GP_1(bank, 7, fn, sfx), PORT_GP_1(bank, 6, fn, sfx), \ + PORT_GP_1(bank, 5, fn, sfx), PORT_GP_1(bank, 4, fn, sfx), \ + PORT_GP_1(bank, 3, fn, sfx), PORT_GP_1(bank, 2, fn, sfx), \ + PORT_GP_1(bank, 1, fn, sfx), PORT_GP_1(bank, 0, fn, sfx) + +#define CPU_ALL_PORT(fn, sfx) \ + PORT_GP_32(0, fn, sfx), \ + PORT_GP_32(1, fn, sfx), \ + PORT_GP_32(2, fn, sfx), \ + PORT_GP_32(3, fn, sfx), \ + PORT_GP_32(4, fn, sfx), \ + PORT_GP_32(5, fn, sfx), \ + PORT_GP_32_9(6, fn, sfx) + +#define _GP_PORT_ALL(bank, pin, name, sfx) name##_##sfx + +#define _GP_GPIO(bank, pin, _name, sfx) \ + [(bank * 32) + pin] = { \ + .name = __stringify(_name), \ + .enum_id = _name##_DATA, \ + } + +#define _GP_DATA(bank, pin, name, sfx) \ + PINMUX_DATA(name##_DATA, name##_FN, name##_IN, name##_OUT) + +#define _GP_INOUTSEL(bank, pin, name, sfx) name##_IN, name##_OUT +#define _GP_INDT(bank, pin, name, sfx) name##_DATA + +#define GP_ALL(str) CPU_ALL_PORT(_GP_PORT_ALL, str) +#define PINMUX_GPIO_GP_ALL() CPU_ALL_PORT(_GP_GPIO, unused) +#define PINMUX_DATA_GP_ALL() CPU_ALL_PORT(_GP_DATA, unused) + +#define GP_INOUTSEL(bank) PORT_GP_32_REV(bank, _GP_INOUTSEL, unused) +#define GP_INDT(bank) PORT_GP_32_REV(bank, _GP_INDT, unused) #define PINMUX_IPSR_DATA(ipsr, fn) PINMUX_DATA(fn##_MARK, FN_##ipsr, FN_##fn) #define PINMUX_IPSR_MODSEL_DATA(ipsr, fn, ms) PINMUX_DATA(fn##_MARK, FN_##ms, \ -- GitLab From 87bd63bfcf177daa272432482c17195f3c0ebb21 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 7 Mar 2013 14:44:27 +0100 Subject: [PATCH 0323/3163] ARM: shmobile: r8a7779: Remove all GPIOs Function GPIOs are not used anymore, and all code use the GPIO numbers directly. Remove the GPIOs enumeration. Signed-off-by: Laurent Pinchart Acked-by: Linus Walleij Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/include/mach/r8a7779.h | 317 ------------------ 1 file changed, 317 deletions(-) diff --git a/arch/arm/mach-shmobile/include/mach/r8a7779.h b/arch/arm/mach-shmobile/include/mach/r8a7779.h index 68c3b2dfb018..945299ed1638 100644 --- a/arch/arm/mach-shmobile/include/mach/r8a7779.h +++ b/arch/arm/mach-shmobile/include/mach/r8a7779.h @@ -4,323 +4,6 @@ #include #include -/* Pin Function Controller: - * GPIO_FN_xx - GPIO used to select pin function - * GPIO_GP_x_x - GPIO mapped to real I/O pin on CPU - */ -enum { - GPIO_GP_0_0, GPIO_GP_0_1, GPIO_GP_0_2, GPIO_GP_0_3, - GPIO_GP_0_4, GPIO_GP_0_5, GPIO_GP_0_6, GPIO_GP_0_7, - GPIO_GP_0_8, GPIO_GP_0_9, GPIO_GP_0_10, GPIO_GP_0_11, - GPIO_GP_0_12, GPIO_GP_0_13, GPIO_GP_0_14, GPIO_GP_0_15, - GPIO_GP_0_16, GPIO_GP_0_17, GPIO_GP_0_18, GPIO_GP_0_19, - GPIO_GP_0_20, GPIO_GP_0_21, GPIO_GP_0_22, GPIO_GP_0_23, - GPIO_GP_0_24, GPIO_GP_0_25, GPIO_GP_0_26, GPIO_GP_0_27, - GPIO_GP_0_28, GPIO_GP_0_29, GPIO_GP_0_30, GPIO_GP_0_31, - - GPIO_GP_1_0, GPIO_GP_1_1, GPIO_GP_1_2, GPIO_GP_1_3, - GPIO_GP_1_4, GPIO_GP_1_5, GPIO_GP_1_6, GPIO_GP_1_7, - GPIO_GP_1_8, GPIO_GP_1_9, GPIO_GP_1_10, GPIO_GP_1_11, - GPIO_GP_1_12, GPIO_GP_1_13, GPIO_GP_1_14, GPIO_GP_1_15, - GPIO_GP_1_16, GPIO_GP_1_17, GPIO_GP_1_18, GPIO_GP_1_19, - GPIO_GP_1_20, GPIO_GP_1_21, GPIO_GP_1_22, GPIO_GP_1_23, - GPIO_GP_1_24, GPIO_GP_1_25, GPIO_GP_1_26, GPIO_GP_1_27, - GPIO_GP_1_28, GPIO_GP_1_29, GPIO_GP_1_30, GPIO_GP_1_31, - - GPIO_GP_2_0, GPIO_GP_2_1, GPIO_GP_2_2, GPIO_GP_2_3, - GPIO_GP_2_4, GPIO_GP_2_5, GPIO_GP_2_6, GPIO_GP_2_7, - GPIO_GP_2_8, GPIO_GP_2_9, GPIO_GP_2_10, GPIO_GP_2_11, - GPIO_GP_2_12, GPIO_GP_2_13, GPIO_GP_2_14, GPIO_GP_2_15, - GPIO_GP_2_16, GPIO_GP_2_17, GPIO_GP_2_18, GPIO_GP_2_19, - GPIO_GP_2_20, GPIO_GP_2_21, GPIO_GP_2_22, GPIO_GP_2_23, - GPIO_GP_2_24, GPIO_GP_2_25, GPIO_GP_2_26, GPIO_GP_2_27, - GPIO_GP_2_28, GPIO_GP_2_29, GPIO_GP_2_30, GPIO_GP_2_31, - - GPIO_GP_3_0, GPIO_GP_3_1, GPIO_GP_3_2, GPIO_GP_3_3, - GPIO_GP_3_4, GPIO_GP_3_5, GPIO_GP_3_6, GPIO_GP_3_7, - GPIO_GP_3_8, GPIO_GP_3_9, GPIO_GP_3_10, GPIO_GP_3_11, - GPIO_GP_3_12, GPIO_GP_3_13, GPIO_GP_3_14, GPIO_GP_3_15, - GPIO_GP_3_16, GPIO_GP_3_17, GPIO_GP_3_18, GPIO_GP_3_19, - GPIO_GP_3_20, GPIO_GP_3_21, GPIO_GP_3_22, GPIO_GP_3_23, - GPIO_GP_3_24, GPIO_GP_3_25, GPIO_GP_3_26, GPIO_GP_3_27, - GPIO_GP_3_28, GPIO_GP_3_29, GPIO_GP_3_30, GPIO_GP_3_31, - - GPIO_GP_4_0, GPIO_GP_4_1, GPIO_GP_4_2, GPIO_GP_4_3, - GPIO_GP_4_4, GPIO_GP_4_5, GPIO_GP_4_6, GPIO_GP_4_7, - GPIO_GP_4_8, GPIO_GP_4_9, GPIO_GP_4_10, GPIO_GP_4_11, - GPIO_GP_4_12, GPIO_GP_4_13, GPIO_GP_4_14, GPIO_GP_4_15, - GPIO_GP_4_16, GPIO_GP_4_17, GPIO_GP_4_18, GPIO_GP_4_19, - GPIO_GP_4_20, GPIO_GP_4_21, GPIO_GP_4_22, GPIO_GP_4_23, - GPIO_GP_4_24, GPIO_GP_4_25, GPIO_GP_4_26, GPIO_GP_4_27, - GPIO_GP_4_28, GPIO_GP_4_29, GPIO_GP_4_30, GPIO_GP_4_31, - - GPIO_GP_5_0, GPIO_GP_5_1, GPIO_GP_5_2, GPIO_GP_5_3, - GPIO_GP_5_4, GPIO_GP_5_5, GPIO_GP_5_6, GPIO_GP_5_7, - GPIO_GP_5_8, GPIO_GP_5_9, GPIO_GP_5_10, GPIO_GP_5_11, - GPIO_GP_5_12, GPIO_GP_5_13, GPIO_GP_5_14, GPIO_GP_5_15, - GPIO_GP_5_16, GPIO_GP_5_17, GPIO_GP_5_18, GPIO_GP_5_19, - GPIO_GP_5_20, GPIO_GP_5_21, GPIO_GP_5_22, GPIO_GP_5_23, - GPIO_GP_5_24, GPIO_GP_5_25, GPIO_GP_5_26, GPIO_GP_5_27, - GPIO_GP_5_28, GPIO_GP_5_29, GPIO_GP_5_30, GPIO_GP_5_31, - - GPIO_GP_6_0, GPIO_GP_6_1, GPIO_GP_6_2, GPIO_GP_6_3, - GPIO_GP_6_4, GPIO_GP_6_5, GPIO_GP_6_6, GPIO_GP_6_7, - GPIO_GP_6_8, - - GPIO_FN_AVS1, GPIO_FN_AVS2, GPIO_FN_A17, GPIO_FN_A18, - GPIO_FN_A19, - - /* IPSR0 */ - GPIO_FN_PWM1, GPIO_FN_PWMFSW0, - GPIO_FN_SCIF_CLK, GPIO_FN_TCLK0_C, GPIO_FN_BS, - GPIO_FN_FD2, GPIO_FN_ATADIR0, GPIO_FN_SDSELF, - GPIO_FN_HCTS1, GPIO_FN_A0, - GPIO_FN_FD3, GPIO_FN_A20, - GPIO_FN_A21, - GPIO_FN_A22, GPIO_FN_VI1_R0, - GPIO_FN_A23, GPIO_FN_FCLE, GPIO_FN_VI1_R1, - GPIO_FN_A24, GPIO_FN_FD4, - GPIO_FN_VI1_R2, GPIO_FN_SSI_WS78_B, GPIO_FN_A25, - GPIO_FN_FD5, - GPIO_FN_VI1_R3, GPIO_FN_SSI_SDATA7_B, - GPIO_FN_CLKOUT, GPIO_FN_PWM0_B, - GPIO_FN_SDSELF_B, GPIO_FN_RD_WR, GPIO_FN_FWE, GPIO_FN_ATAG0, - GPIO_FN_VI1_R7, GPIO_FN_HRTS1, - - /* IPSR1 */ - GPIO_FN_FD6, GPIO_FN_FD7, - GPIO_FN_FALE, - GPIO_FN_ATACS00, - GPIO_FN_FRE, GPIO_FN_ATACS10, GPIO_FN_VI1_R4, - GPIO_FN_HSCK1, GPIO_FN_SSI_SDATA8_B, - GPIO_FN_SSI_SDATA9, - GPIO_FN_FD0, GPIO_FN_ATARD0, GPIO_FN_VI1_R5, - GPIO_FN_HTX1, GPIO_FN_SSI_SCK9, - GPIO_FN_FD1, - GPIO_FN_ATAWR0, GPIO_FN_VI1_R6, GPIO_FN_HRX1, - GPIO_FN_SSI_WS9, GPIO_FN_MLB_CLK, GPIO_FN_PWM2, - GPIO_FN_MLB_SIG, GPIO_FN_PWM3, - GPIO_FN_MLB_DAT, GPIO_FN_PWM4, GPIO_FN_HTX0, - GPIO_FN_SDATA, GPIO_FN_SUB_TCK, - GPIO_FN_CC5_STATE2, GPIO_FN_CC5_STATE10, GPIO_FN_CC5_STATE18, - GPIO_FN_CC5_STATE26, GPIO_FN_CC5_STATE34, - - /* IPSR2 */ - GPIO_FN_HRX0, GPIO_FN_SCKZ, - GPIO_FN_SUB_TDI, GPIO_FN_CC5_STATE3, GPIO_FN_CC5_STATE11, - GPIO_FN_CC5_STATE19, GPIO_FN_CC5_STATE27, GPIO_FN_CC5_STATE35, - GPIO_FN_HSCK0, GPIO_FN_MTS, GPIO_FN_PWM5, - GPIO_FN_SSI_SDATA9_B, GPIO_FN_SUB_TDO, - GPIO_FN_CC5_STATE0, GPIO_FN_CC5_STATE8, GPIO_FN_CC5_STATE16, - GPIO_FN_CC5_STATE24, GPIO_FN_CC5_STATE32, GPIO_FN_HCTS0, - GPIO_FN_STM, GPIO_FN_PWM0_D, GPIO_FN_SCIF_CLK_C, - GPIO_FN_SUB_TRST, GPIO_FN_TCLK1_B, GPIO_FN_CC5_OSCOUT, GPIO_FN_HRTS0, - GPIO_FN_MDATA, GPIO_FN_SUB_TMS, - GPIO_FN_CC5_STATE1, GPIO_FN_CC5_STATE9, GPIO_FN_CC5_STATE17, - GPIO_FN_CC5_STATE25, GPIO_FN_CC5_STATE33, - GPIO_FN_LCDOUT0, GPIO_FN_DREQ0, GPIO_FN_GPS_CLK_B, GPIO_FN_AUDATA0, - GPIO_FN_LCDOUT1, GPIO_FN_DACK0, - GPIO_FN_DRACK0, GPIO_FN_GPS_SIGN_B, GPIO_FN_AUDATA1, - GPIO_FN_LCDOUT2, GPIO_FN_LCDOUT3, - GPIO_FN_LCDOUT4, GPIO_FN_LCDOUT5, - GPIO_FN_LCDOUT6, GPIO_FN_LCDOUT7, - GPIO_FN_LCDOUT8, GPIO_FN_DREQ1, GPIO_FN_SCL2, - GPIO_FN_AUDATA2, - - /* IPSR3 */ - GPIO_FN_LCDOUT9, GPIO_FN_DACK1, GPIO_FN_SDA2, - GPIO_FN_AUDATA3, GPIO_FN_LCDOUT10, - GPIO_FN_LCDOUT11, GPIO_FN_LCDOUT12, - GPIO_FN_LCDOUT13, GPIO_FN_LCDOUT14, - GPIO_FN_LCDOUT15, GPIO_FN_LCDOUT16, GPIO_FN_EX_WAIT1, - GPIO_FN_SCL1, GPIO_FN_TCLK1, GPIO_FN_AUDATA4, - GPIO_FN_LCDOUT17, GPIO_FN_EX_WAIT2, GPIO_FN_SDA1, GPIO_FN_GPS_MAG_B, - GPIO_FN_AUDATA5, GPIO_FN_LCDOUT18, - GPIO_FN_LCDOUT19, GPIO_FN_LCDOUT20, - GPIO_FN_LCDOUT21, GPIO_FN_LCDOUT22, - GPIO_FN_LCDOUT23, - GPIO_FN_QSTVA_QVS, GPIO_FN_SCL3_B, - GPIO_FN_QCLK, - GPIO_FN_QSTVB_QVE, GPIO_FN_SDA3_B, - GPIO_FN_SDA2_C, GPIO_FN_DACK0_B, GPIO_FN_DRACK0_B, - GPIO_FN_QSTH_QHS, - GPIO_FN_QSTB_QHE, - GPIO_FN_QCPV_QDE, - GPIO_FN_CAN1_TX, GPIO_FN_SCL2_C, GPIO_FN_REMOCON, - - /* IPSR4 */ - GPIO_FN_QPOLA, GPIO_FN_CAN_CLK_C, - GPIO_FN_QPOLB, GPIO_FN_CAN1_RX, - GPIO_FN_DREQ0_B, GPIO_FN_SSI_SCK78_B, - GPIO_FN_VI2_DATA0_VI2_B0, GPIO_FN_PWM6, - GPIO_FN_AUDCK, GPIO_FN_PWMFSW0_B, - GPIO_FN_VI2_DATA1_VI2_B1, GPIO_FN_PWM0, - GPIO_FN_AUDSYNC, - GPIO_FN_VI2_G0, - GPIO_FN_VI2_G1, GPIO_FN_VI2_G2, - GPIO_FN_VI2_G3, GPIO_FN_VI2_G4, - GPIO_FN_VI2_G5, GPIO_FN_VI2_DATA2_VI2_B2, - GPIO_FN_SCL1_B, GPIO_FN_AUDATA6, - GPIO_FN_VI2_DATA3_VI2_B3, - GPIO_FN_SDA1_B, GPIO_FN_AUDATA7, - GPIO_FN_VI2_G6, - GPIO_FN_VI2_G7, GPIO_FN_VI2_R0, - GPIO_FN_VI2_R1, GPIO_FN_VI2_R2, - GPIO_FN_VI2_R3, GPIO_FN_VI2_DATA4_VI2_B4, - GPIO_FN_SCL2_B, - - /* IPSR5 */ - GPIO_FN_VI2_DATA5_VI2_B5, GPIO_FN_SDA2_B, - GPIO_FN_VI2_R4, GPIO_FN_VI2_R5, - GPIO_FN_VI2_R6, GPIO_FN_VI2_R7, - GPIO_FN_SCL2_D, GPIO_FN_SDA2_D, - GPIO_FN_VI2_CLKENB, - GPIO_FN_SCL1_D, GPIO_FN_VI2_FIELD, - GPIO_FN_SDA1_D, GPIO_FN_VI2_HSYNC, - GPIO_FN_VI3_HSYNC, GPIO_FN_VI2_VSYNC, - GPIO_FN_VI3_VSYNC, - GPIO_FN_VI2_CLK, - GPIO_FN_VI1_CLKENB, GPIO_FN_VI3_CLKENB, - GPIO_FN_AUDIO_CLKC, GPIO_FN_SPEEDIN, - GPIO_FN_GPS_SIGN_D, GPIO_FN_VI2_DATA6_VI2_B6, - GPIO_FN_TCLK0, GPIO_FN_QSTVA_B_QVS_B, - GPIO_FN_AUDIO_CLKOUT_B, GPIO_FN_GPS_MAG_D, - GPIO_FN_VI2_DATA7_VI2_B7, - GPIO_FN_VI1_FIELD, GPIO_FN_VI3_FIELD, - GPIO_FN_AUDIO_CLKOUT, GPIO_FN_GPS_CLK_C, - GPIO_FN_GPS_CLK_D, GPIO_FN_AUDIO_CLKA, GPIO_FN_CAN_TXCLK, - GPIO_FN_AUDIO_CLKB, GPIO_FN_CAN_DEBUGOUT0, - GPIO_FN_MOUT0, - - /* IPSR6 */ - GPIO_FN_SSI_SCK0129, GPIO_FN_CAN_DEBUGOUT1, GPIO_FN_MOUT1, - GPIO_FN_SSI_WS0129, GPIO_FN_CAN_DEBUGOUT2, GPIO_FN_MOUT2, - GPIO_FN_SSI_SDATA0, GPIO_FN_CAN_DEBUGOUT3, GPIO_FN_MOUT5, - GPIO_FN_SSI_SDATA1, GPIO_FN_CAN_DEBUGOUT4, GPIO_FN_MOUT6, - GPIO_FN_SSI_SDATA2, GPIO_FN_CAN_DEBUGOUT5, GPIO_FN_SSI_SCK34, - GPIO_FN_CAN_DEBUGOUT6, GPIO_FN_CAN0_TX_B, GPIO_FN_IERX, - GPIO_FN_SSI_SCK9_C, GPIO_FN_SSI_WS34, GPIO_FN_CAN_DEBUGOUT7, - GPIO_FN_CAN0_RX_B, GPIO_FN_IETX, GPIO_FN_SSI_WS9_C, - GPIO_FN_SSI_SDATA3, GPIO_FN_PWM0_C, GPIO_FN_CAN_DEBUGOUT8, - GPIO_FN_CAN_CLK_B, GPIO_FN_IECLK, GPIO_FN_SCIF_CLK_B, GPIO_FN_TCLK0_B, - GPIO_FN_SSI_SDATA4, GPIO_FN_CAN_DEBUGOUT9, GPIO_FN_SSI_SDATA9_C, - GPIO_FN_SSI_SCK5, GPIO_FN_ADICLK, GPIO_FN_CAN_DEBUGOUT10, - GPIO_FN_TCLK0_D, GPIO_FN_SSI_WS5, GPIO_FN_ADICS_SAMP, - GPIO_FN_CAN_DEBUGOUT11, GPIO_FN_SSI_SDATA5, - GPIO_FN_ADIDATA, GPIO_FN_CAN_DEBUGOUT12, - GPIO_FN_SSI_SCK6, GPIO_FN_ADICHS0, GPIO_FN_CAN0_TX, GPIO_FN_IERX_B, - - /* IPSR7 */ - GPIO_FN_SSI_WS6, GPIO_FN_ADICHS1, GPIO_FN_CAN0_RX, GPIO_FN_IETX_B, - GPIO_FN_SSI_SDATA6, GPIO_FN_ADICHS2, GPIO_FN_CAN_CLK, GPIO_FN_IECLK_B, - GPIO_FN_SSI_SCK78, GPIO_FN_CAN_DEBUGOUT13, - GPIO_FN_SSI_SCK9_B, GPIO_FN_SSI_WS78, - GPIO_FN_CAN_DEBUGOUT14, GPIO_FN_SSI_WS9_B, - GPIO_FN_SSI_SDATA7, GPIO_FN_CAN_DEBUGOUT15, - GPIO_FN_TCLK1_C, - GPIO_FN_SSI_SDATA8, GPIO_FN_VSP, - GPIO_FN_ATACS01, - GPIO_FN_ATACS11, GPIO_FN_CC5_TDO, - GPIO_FN_ATADIR1, GPIO_FN_CC5_TRST, - GPIO_FN_ATAG1, GPIO_FN_CC5_TMS, - GPIO_FN_ATARD1, GPIO_FN_CC5_TCK, - GPIO_FN_ATAWR1, GPIO_FN_CC5_TDI, - GPIO_FN_DREQ2, GPIO_FN_DACK2, - - /* IPSR8 */ - GPIO_FN_AD_CLK, - GPIO_FN_CC5_STATE4, GPIO_FN_CC5_STATE12, GPIO_FN_CC5_STATE20, - GPIO_FN_CC5_STATE28, GPIO_FN_CC5_STATE36, - GPIO_FN_AD_DI, - GPIO_FN_CC5_STATE5, GPIO_FN_CC5_STATE13, GPIO_FN_CC5_STATE21, - GPIO_FN_CC5_STATE29, GPIO_FN_CC5_STATE37, - GPIO_FN_CAN_DEBUG_HW_TRIGGER, GPIO_FN_AD_DO, - GPIO_FN_CC5_STATE6, GPIO_FN_CC5_STATE14, GPIO_FN_CC5_STATE22, - GPIO_FN_CC5_STATE30, GPIO_FN_CC5_STATE38, - GPIO_FN_CAN_STEP0, GPIO_FN_AD_NCS, GPIO_FN_CC5_STATE7, - GPIO_FN_CC5_STATE15, GPIO_FN_CC5_STATE23, GPIO_FN_CC5_STATE31, - GPIO_FN_CC5_STATE39, GPIO_FN_FMCLK, GPIO_FN_RDS_CLK, GPIO_FN_PCMOE, - GPIO_FN_BPFCLK, GPIO_FN_PCMWE, GPIO_FN_FMIN, GPIO_FN_RDS_DATA, - GPIO_FN_VI0_CLK, GPIO_FN_VI0_CLKENB, - GPIO_FN_HTX1_B, GPIO_FN_MT1_SYNC, GPIO_FN_VI0_FIELD, - GPIO_FN_HRX1_B, GPIO_FN_VI0_HSYNC, GPIO_FN_VI0_DATA0_B_VI0_B0_B, - GPIO_FN_HSCK1_B, - GPIO_FN_VI0_VSYNC, GPIO_FN_VI0_DATA1_B_VI0_B1_B, - GPIO_FN_PWMFSW0_C, - - /* IPSR9 */ - GPIO_FN_VI0_DATA0_VI0_B0, GPIO_FN_HRTS1_B, GPIO_FN_MT1_VCXO, - GPIO_FN_VI0_DATA1_VI0_B1, GPIO_FN_HCTS1_B, GPIO_FN_MT1_PWM, - GPIO_FN_VI0_DATA2_VI0_B2, GPIO_FN_VI0_DATA3_VI0_B3, - GPIO_FN_VI0_DATA4_VI0_B4, - GPIO_FN_VI0_DATA5_VI0_B5, GPIO_FN_VI0_DATA6_VI0_B6, - GPIO_FN_ARM_TRACEDATA_0, GPIO_FN_VI0_DATA7_VI0_B7, - GPIO_FN_ARM_TRACEDATA_1, GPIO_FN_VI0_G0, - GPIO_FN_SSI_SCK78_C, GPIO_FN_ARM_TRACEDATA_2, - GPIO_FN_VI0_G1, GPIO_FN_SSI_WS78_C, - GPIO_FN_ARM_TRACEDATA_3, GPIO_FN_VI0_G2, GPIO_FN_ETH_TXD1, - GPIO_FN_ARM_TRACEDATA_4, GPIO_FN_TS_SPSYNC0, - GPIO_FN_VI0_G3, GPIO_FN_ETH_CRS_DV, - GPIO_FN_ARM_TRACEDATA_5, GPIO_FN_TS_SDAT0, GPIO_FN_VI0_G4, - GPIO_FN_ETH_TX_EN, GPIO_FN_ARM_TRACEDATA_6, - GPIO_FN_VI0_G5, GPIO_FN_ETH_RX_ER, - GPIO_FN_ARM_TRACEDATA_7, GPIO_FN_VI0_G6, GPIO_FN_ETH_RXD0, - GPIO_FN_ARM_TRACEDATA_8, GPIO_FN_VI0_G7, - GPIO_FN_ETH_RXD1, GPIO_FN_ARM_TRACEDATA_9, - - /* IPSR10 */ - GPIO_FN_VI0_R0, GPIO_FN_SSI_SDATA7_C, GPIO_FN_DREQ1_B, - GPIO_FN_ARM_TRACEDATA_10, GPIO_FN_DREQ0_C, GPIO_FN_VI0_R1, - GPIO_FN_SSI_SDATA8_C, GPIO_FN_DACK1_B, GPIO_FN_ARM_TRACEDATA_11, - GPIO_FN_DACK0_C, GPIO_FN_DRACK0_C, GPIO_FN_VI0_R2, GPIO_FN_ETH_LINK, - GPIO_FN_ARM_TRACEDATA_12, - GPIO_FN_VI0_R3, GPIO_FN_ETH_MAGIC, - GPIO_FN_ARM_TRACEDATA_13, GPIO_FN_VI0_R4, GPIO_FN_ETH_REFCLK, - GPIO_FN_ARM_TRACEDATA_14, - GPIO_FN_MT1_CLK, GPIO_FN_TS_SCK0, GPIO_FN_VI0_R5, GPIO_FN_ETH_TXD0, - GPIO_FN_ARM_TRACEDATA_15, - GPIO_FN_MT1_D, GPIO_FN_TS_SDEN0, GPIO_FN_VI0_R6, GPIO_FN_ETH_MDC, - GPIO_FN_DREQ2_C, GPIO_FN_TRACECLK, - GPIO_FN_MT1_BEN, GPIO_FN_PWMFSW0_D, GPIO_FN_VI0_R7, GPIO_FN_ETH_MDIO, - GPIO_FN_DACK2_C, GPIO_FN_SCIF_CLK_D, - GPIO_FN_TRACECTL, GPIO_FN_MT1_PEN, GPIO_FN_VI1_CLK, GPIO_FN_SIM_D, - GPIO_FN_SDA3, GPIO_FN_VI1_HSYNC, GPIO_FN_VI3_CLK, GPIO_FN_SSI_SCK4, - GPIO_FN_GPS_SIGN_C, GPIO_FN_PWMFSW0_E, GPIO_FN_VI1_VSYNC, - GPIO_FN_AUDIO_CLKOUT_C, GPIO_FN_SSI_WS4, GPIO_FN_SIM_CLK, - GPIO_FN_GPS_MAG_C, GPIO_FN_SPV_TRST, GPIO_FN_SCL3, - - /* IPSR11 */ - GPIO_FN_VI1_DATA0_VI1_B0, GPIO_FN_SIM_RST, - GPIO_FN_SPV_TCK, GPIO_FN_ADICLK_B, GPIO_FN_VI1_DATA1_VI1_B1, - GPIO_FN_MT0_CLK, GPIO_FN_SPV_TMS, - GPIO_FN_ADICS_B_SAMP_B, GPIO_FN_VI1_DATA2_VI1_B2, - GPIO_FN_MT0_D, GPIO_FN_SPVTDI, GPIO_FN_ADIDATA_B, - GPIO_FN_VI1_DATA3_VI1_B3, GPIO_FN_MT0_BEN, - GPIO_FN_SPV_TDO, GPIO_FN_ADICHS0_B, GPIO_FN_VI1_DATA4_VI1_B4, - GPIO_FN_MT0_PEN, GPIO_FN_SPA_TRST, - GPIO_FN_ADICHS1_B, GPIO_FN_VI1_DATA5_VI1_B5, - GPIO_FN_MT0_SYNC, GPIO_FN_SPA_TCK, - GPIO_FN_ADICHS2_B, GPIO_FN_VI1_DATA6_VI1_B6, - GPIO_FN_MT0_VCXO, GPIO_FN_SPA_TMS, - GPIO_FN_VI1_DATA7_VI1_B7, GPIO_FN_MT0_PWM, - GPIO_FN_SPA_TDI, GPIO_FN_VI1_G0, GPIO_FN_VI3_DATA0, - GPIO_FN_TS_SCK1, GPIO_FN_DREQ2_B, - GPIO_FN_SPA_TDO, GPIO_FN_HCTS0_B, GPIO_FN_VI1_G1, GPIO_FN_VI3_DATA1, - GPIO_FN_SSI_SCK1, GPIO_FN_TS_SDEN1, GPIO_FN_DACK2_B, - GPIO_FN_HRTS0_B, - - /* IPSR12 */ - GPIO_FN_VI1_G2, GPIO_FN_VI3_DATA2, GPIO_FN_SSI_WS1, GPIO_FN_TS_SPSYNC1, - GPIO_FN_HSCK0_B, GPIO_FN_VI1_G3, GPIO_FN_VI3_DATA3, - GPIO_FN_SSI_SCK2, GPIO_FN_TS_SDAT1, GPIO_FN_SCL1_C, GPIO_FN_HTX0_B, - GPIO_FN_VI1_G4, GPIO_FN_VI3_DATA4, GPIO_FN_SSI_WS2, GPIO_FN_SDA1_C, - GPIO_FN_SIM_RST_B, GPIO_FN_HRX0_B, GPIO_FN_VI1_G5, GPIO_FN_VI3_DATA5, - GPIO_FN_GPS_CLK, GPIO_FN_FSE, GPIO_FN_SIM_D_B, - GPIO_FN_VI1_G6, GPIO_FN_VI3_DATA6, GPIO_FN_GPS_SIGN, GPIO_FN_FRB, - GPIO_FN_SIM_CLK_B, GPIO_FN_VI1_G7, GPIO_FN_VI3_DATA7, - GPIO_FN_GPS_MAG, GPIO_FN_FCE, -}; - struct platform_device; struct r8a7779_pm_ch { -- GitLab From 633b076464da52b3c7bf0f62932fbfc0ea23d8b3 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Mon, 21 Jan 2013 17:01:25 +0900 Subject: [PATCH 0324/3163] slub: correct to calculate num of acquired objects in get_partial_node() There is a subtle bug when calculating a number of acquired objects. Currently, we calculate "available = page->objects - page->inuse", after acquire_slab() is called in get_partial_node(). In acquire_slab() with mode = 1, we always set new.inuse = page->objects. So, acquire_slab(s, n, page, object == NULL); if (!object) { c->page = page; stat(s, ALLOC_FROM_PARTIAL); object = t; available = page->objects - page->inuse; !!! availabe is always 0 !!! ... Therfore, "available > s->cpu_partial / 2" is always false and we always go to second iteration. This patch correct this problem. After that, we don't need return value of put_cpu_partial(). So remove it. Reviewed-by: Wanpeng Li Acked-by: Christoph Lameter Signed-off-by: Joonsoo Kim Signed-off-by: Pekka Enberg --- mm/slub.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index aa0728daf8bb..8f73593d4f21 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1493,7 +1493,7 @@ static inline void remove_partial(struct kmem_cache_node *n, */ static inline void *acquire_slab(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page, - int mode) + int mode, int *objects) { void *freelist; unsigned long counters; @@ -1507,6 +1507,7 @@ static inline void *acquire_slab(struct kmem_cache *s, freelist = page->freelist; counters = page->counters; new.counters = counters; + *objects = new.objects - new.inuse; if (mode) { new.inuse = page->objects; new.freelist = NULL; @@ -1528,7 +1529,7 @@ static inline void *acquire_slab(struct kmem_cache *s, return freelist; } -static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); +static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags); /* @@ -1539,6 +1540,8 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, { struct page *page, *page2; void *object = NULL; + int available = 0; + int objects; /* * Racy check. If we mistakenly see no partial slabs then we @@ -1552,22 +1555,21 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, spin_lock(&n->list_lock); list_for_each_entry_safe(page, page2, &n->partial, lru) { void *t; - int available; if (!pfmemalloc_match(page, flags)) continue; - t = acquire_slab(s, n, page, object == NULL); + t = acquire_slab(s, n, page, object == NULL, &objects); if (!t) break; + available += objects; if (!object) { c->page = page; stat(s, ALLOC_FROM_PARTIAL); object = t; - available = page->objects - page->inuse; } else { - available = put_cpu_partial(s, page, 0); + put_cpu_partial(s, page, 0); stat(s, CPU_PARTIAL_NODE); } if (kmem_cache_debug(s) || available > s->cpu_partial / 2) @@ -1946,7 +1948,7 @@ static void unfreeze_partials(struct kmem_cache *s, * If we did not find a slot then simply move all the partials to the * per node partial list. */ -static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) +static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) { struct page *oldpage; int pages; @@ -1984,7 +1986,6 @@ static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) page->next = oldpage; } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); - return pobjects; } static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) -- GitLab From 338b2642290ef3193229ece8cfc776ac4fe8869d Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Mon, 21 Jan 2013 17:01:27 +0900 Subject: [PATCH 0325/3163] slub: add 'likely' macro to inc_slabs_node() After boot phase, 'n' always exist. So add 'likely' macro for helping compiler. Acked-by: Christoph Lameter Signed-off-by: Joonsoo Kim Signed-off-by: Pekka Enberg --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 8f73593d4f21..21b3f004f614 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1005,7 +1005,7 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) * dilemma by deferring the increment of the count during * bootstrap (see early_kmem_cache_node_alloc). */ - if (n) { + if (likely(n)) { atomic_long_inc(&n->nr_slabs); atomic_long_add(objects, &n->total_objects); } -- GitLab From 563861cd633ae52932843477bb6ca3f1c9e2f78b Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 30 Mar 2013 20:43:22 +0800 Subject: [PATCH 0326/3163] pwm: spear: Fix checking return value of clk_enable() and clk_prepare() The logic to check return value of clk_enable() and clk_prepare() is reversed, fix it. Signed-off-by: Axel Lin Cc: stable@vger.kernel.org Acked-by: Viresh Kumar Signed-off-by: Thierry Reding --- drivers/pwm/pwm-spear.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pwm/pwm-spear.c b/drivers/pwm/pwm-spear.c index 69a2d9eb34db..3223b57e8f9c 100644 --- a/drivers/pwm/pwm-spear.c +++ b/drivers/pwm/pwm-spear.c @@ -143,7 +143,7 @@ static int spear_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) u32 val; rc = clk_enable(pc->clk); - if (!rc) + if (rc) return rc; val = spear_pwm_readl(pc, pwm->hwpwm, PWMCR); @@ -209,12 +209,12 @@ static int spear_pwm_probe(struct platform_device *pdev) pc->chip.npwm = NUM_PWM; ret = clk_prepare(pc->clk); - if (!ret) + if (ret) return ret; if (of_device_is_compatible(np, "st,spear1340-pwm")) { ret = clk_enable(pc->clk); - if (!ret) { + if (ret) { clk_unprepare(pc->clk); return ret; } -- GitLab From fa0abee9b89e3a0bab207823bd16372de53c3896 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 31 Mar 2013 11:14:02 +0800 Subject: [PATCH 0327/3163] pwm: ab8500: Add .owner to struct pwm_ops Add missing .owner of struct pwm_ops. This prevents the module from being removed from underneath its users. Signed-off-by: Axel Lin Signed-off-by: Thierry Reding --- drivers/pwm/pwm-ab8500.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-ab8500.c b/drivers/pwm/pwm-ab8500.c index 3beb2b52bd2e..1d07a6f99375 100644 --- a/drivers/pwm/pwm-ab8500.c +++ b/drivers/pwm/pwm-ab8500.c @@ -88,6 +88,7 @@ static const struct pwm_ops ab8500_pwm_ops = { .config = ab8500_pwm_config, .enable = ab8500_pwm_enable, .disable = ab8500_pwm_disable, + .owner = THIS_MODULE, }; static int ab8500_pwm_probe(struct platform_device *pdev) -- GitLab From 83c80dc5358270a665666ea5b9ddd24351d86354 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 31 Mar 2013 11:15:15 +0800 Subject: [PATCH 0328/3163] pwm: atmel-tcb: Add .owner to struct pwm_ops Add missing .owner of struct pwm_ops. This prevents the module from being removed from underneath its users. Signed-off-by: Axel Lin Signed-off-by: Thierry Reding --- drivers/pwm/pwm-atmel-tcb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c index 16cb53092857..0a7b6582edb1 100644 --- a/drivers/pwm/pwm-atmel-tcb.c +++ b/drivers/pwm/pwm-atmel-tcb.c @@ -358,6 +358,7 @@ static const struct pwm_ops atmel_tcb_pwm_ops = { .set_polarity = atmel_tcb_pwm_set_polarity, .enable = atmel_tcb_pwm_enable, .disable = atmel_tcb_pwm_disable, + .owner = THIS_MODULE, }; static int atmel_tcb_pwm_probe(struct platform_device *pdev) -- GitLab From 7fa25314d534b9449d1705722eac412cad5c9042 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 31 Mar 2013 11:16:14 +0800 Subject: [PATCH 0329/3163] pwm: twl-led: Add .owner to struct pwm_ops Add missing .owner of struct pwm_ops. This prevents the module from being removed from underneath its users. Signed-off-by: Axel Lin Acked-by: Peter Ujfalusi Signed-off-by: Thierry Reding --- drivers/pwm/pwm-twl-led.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pwm/pwm-twl-led.c b/drivers/pwm/pwm-twl-led.c index 83e25d45d640..f912e87aed88 100644 --- a/drivers/pwm/pwm-twl-led.c +++ b/drivers/pwm/pwm-twl-led.c @@ -271,6 +271,7 @@ static const struct pwm_ops twl4030_pwmled_ops = { .enable = twl4030_pwmled_enable, .disable = twl4030_pwmled_disable, .config = twl4030_pwmled_config, + .owner = THIS_MODULE, }; static const struct pwm_ops twl6030_pwmled_ops = { @@ -279,6 +280,7 @@ static const struct pwm_ops twl6030_pwmled_ops = { .config = twl6030_pwmled_config, .request = twl6030_pwmled_request, .free = twl6030_pwmled_free, + .owner = THIS_MODULE, }; static int twl_pwmled_probe(struct platform_device *pdev) -- GitLab From d5714e8b9de5d3a82347fe37a9038373c44afce0 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 31 Mar 2013 11:17:05 +0800 Subject: [PATCH 0330/3163] pwm: twl: Add .owner to struct pwm_ops Add missing .owner of struct pwm_ops. This prevents the module from being removed from underneath its users. Signed-off-by: Axel Lin Acked-by: Peter Ujfalusi Signed-off-by: Thierry Reding --- drivers/pwm/pwm-twl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c index bf3fda294223..ee7fa5ddba5d 100644 --- a/drivers/pwm/pwm-twl.c +++ b/drivers/pwm/pwm-twl.c @@ -287,12 +287,14 @@ static const struct pwm_ops twl4030_pwm_ops = { .disable = twl4030_pwm_disable, .request = twl4030_pwm_request, .free = twl4030_pwm_free, + .owner = THIS_MODULE, }; static const struct pwm_ops twl6030_pwm_ops = { .config = twl_pwm_config, .enable = twl6030_pwm_enable, .disable = twl6030_pwm_disable, + .owner = THIS_MODULE, }; static int twl_pwm_probe(struct platform_device *pdev) -- GitLab From 72da70e77f885ec9a79af9c13ff1c79b6136b75f Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 31 Mar 2013 22:59:47 +0800 Subject: [PATCH 0331/3163] pwm: imx: Remove enabled field from struct imx_chip We can test PWMF_ENABLED bit to know if pwm is enabled or not. Thus remove enabled field from struct imx_chip. Signed-off-by: Axel Lin Acked-by: Sascha Hauer Signed-off-by: Thierry Reding --- drivers/pwm/pwm-imx.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c index 3f5677b7690e..ec287989eafc 100644 --- a/drivers/pwm/pwm-imx.c +++ b/drivers/pwm/pwm-imx.c @@ -43,7 +43,6 @@ struct imx_chip { struct clk *clk_per; struct clk *clk_ipg; - int enabled; void __iomem *mmio_base; struct pwm_chip chip; @@ -135,7 +134,7 @@ static int imx_pwm_config_v2(struct pwm_chip *chip, MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN | MX3_PWMCR_DBGEN | MX3_PWMCR_CLKSRC_IPG_HIGH; - if (imx->enabled) + if (test_bit(PWMF_ENABLED, &pwm->flags)) cr |= MX3_PWMCR_EN; writel(cr, imx->mmio_base + MX3_PWMCR); @@ -186,8 +185,6 @@ static int imx_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) imx->set_enable(chip, true); - imx->enabled = 1; - return 0; } @@ -198,7 +195,6 @@ static void imx_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) imx->set_enable(chip, false); clk_disable_unprepare(imx->clk_per); - imx->enabled = 0; } static struct pwm_ops imx_pwm_ops = { -- GitLab From b014a30c5991e67aa90b6ff9bd4ec16435bbcefd Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 31 Mar 2013 23:04:31 +0800 Subject: [PATCH 0332/3163] pwm: pxa: Remove clk_enabled field from struct pxa_pwm_chip clk_enable/clk_disable maintain an enable_count, clk_prepare and clk_unprepare also maintain a prepare_count. These APIs will do prepare/enable when the first user calling these APIs, and do disable/unprepare when the corresponding counter reach 0. Thus We don't need to maintain a clk_enabled counter here. Signed-off-by: Axel Lin Acked-by: Eric Miao Signed-off-by: Thierry Reding --- drivers/pwm/pwm-pxa.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/pwm/pwm-pxa.c b/drivers/pwm/pwm-pxa.c index 20370e61de5a..b78988255aee 100644 --- a/drivers/pwm/pwm-pxa.c +++ b/drivers/pwm/pwm-pxa.c @@ -48,7 +48,6 @@ struct pxa_pwm_chip { struct device *dev; struct clk *clk; - int clk_enabled; void __iomem *mmio_base; }; @@ -108,24 +107,15 @@ static int pxa_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, static int pxa_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) { struct pxa_pwm_chip *pc = to_pxa_pwm_chip(chip); - int rc = 0; - if (!pc->clk_enabled) { - rc = clk_prepare_enable(pc->clk); - if (!rc) - pc->clk_enabled++; - } - return rc; + return clk_prepare_enable(pc->clk); } static void pxa_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) { struct pxa_pwm_chip *pc = to_pxa_pwm_chip(chip); - if (pc->clk_enabled) { - clk_disable_unprepare(pc->clk); - pc->clk_enabled--; - } + clk_disable_unprepare(pc->clk); } static struct pwm_ops pxa_pwm_ops = { @@ -152,8 +142,6 @@ static int pwm_probe(struct platform_device *pdev) if (IS_ERR(pwm->clk)) return PTR_ERR(pwm->clk); - pwm->clk_enabled = 0; - pwm->chip.dev = &pdev->dev; pwm->chip.ops = &pxa_pwm_ops; pwm->chip.base = -1; -- GitLab From c8e4df3109646762f21c6e288187ec395fdf67be Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 31 Mar 2013 23:07:30 +0800 Subject: [PATCH 0333/3163] pwm: twl: Return proper error if twl6030_pwm_enable() fails Return proper error instead of 0 if twl6030_pwm_enable() fails. Signed-off-by: Axel Lin Acked-by: Peter Ujfalusi Signed-off-by: Thierry Reding --- drivers/pwm/pwm-twl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c index ee7fa5ddba5d..2782001ba183 100644 --- a/drivers/pwm/pwm-twl.c +++ b/drivers/pwm/pwm-twl.c @@ -248,7 +248,7 @@ static int twl6030_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) twl->twl6030_toggle3 = val; out: mutex_unlock(&twl->mutex); - return 0; + return ret; } static void twl6030_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) -- GitLab From 4e61573df893abcb7b647fc4f3f4b60819704883 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 1 Apr 2013 10:48:13 +0800 Subject: [PATCH 0334/3163] pwm: mxs: Remove unused *dev from struct mxs_pwm_chip Signed-off-by: Axel Lin Acked-by: Shawn Guo Signed-off-by: Thierry Reding --- drivers/pwm/pwm-mxs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c index a53d3094b75a..23d51806aeaa 100644 --- a/drivers/pwm/pwm-mxs.c +++ b/drivers/pwm/pwm-mxs.c @@ -38,7 +38,6 @@ struct mxs_pwm_chip { struct pwm_chip chip; - struct device *dev; struct clk *clk; void __iomem *base; }; @@ -166,7 +165,6 @@ static int mxs_pwm_probe(struct platform_device *pdev) return ret; } - mxs->dev = &pdev->dev; platform_set_drvdata(pdev, mxs); stmp_reset_block(mxs->base); -- GitLab From b3fef7f10010e7946b3db1a34d8e0208e4f10001 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 1 Apr 2013 12:29:48 +0800 Subject: [PATCH 0335/3163] pwm: spear: Remove unused *dev from struct spear_pwm_chip Signed-off-by: Axel Lin Acked-by: Shiraz Hashim Acked-by: Viresh Kumar Signed-off-by: Thierry Reding --- drivers/pwm/pwm-spear.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/pwm/pwm-spear.c b/drivers/pwm/pwm-spear.c index 3223b57e8f9c..9563599a117e 100644 --- a/drivers/pwm/pwm-spear.c +++ b/drivers/pwm/pwm-spear.c @@ -49,13 +49,11 @@ * @mmio_base: base address of pwm chip * @clk: pointer to clk structure of pwm chip * @chip: linux pwm chip representation - * @dev: pointer to device structure of pwm chip */ struct spear_pwm_chip { void __iomem *mmio_base; struct clk *clk; struct pwm_chip chip; - struct device *dev; }; static inline struct spear_pwm_chip *to_spear_pwm_chip(struct pwm_chip *chip) @@ -200,7 +198,6 @@ static int spear_pwm_probe(struct platform_device *pdev) if (IS_ERR(pc->clk)) return PTR_ERR(pc->clk); - pc->dev = &pdev->dev; platform_set_drvdata(pdev, pc); pc->chip.dev = &pdev->dev; -- GitLab From 22976a5dada65a0f44918e91c5f1fb5041dc1d5e Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 1 Apr 2013 15:41:22 +0800 Subject: [PATCH 0336/3163] pwm: pxa: Remove PWM_ID_BASE macro PWM_ID_BASE() is not used after convert to PWM framework, remove it. Also update driver_data field of struct platform_device_id accordingly. Signed-off-by: Axel Lin Acked-by: Eric Miao Signed-off-by: Thierry Reding --- drivers/pwm/pwm-pxa.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/pwm/pwm-pxa.c b/drivers/pwm/pwm-pxa.c index b78988255aee..dee6ab552a0a 100644 --- a/drivers/pwm/pwm-pxa.c +++ b/drivers/pwm/pwm-pxa.c @@ -23,14 +23,13 @@ #include #define HAS_SECONDARY_PWM 0x10 -#define PWM_ID_BASE(d) ((d) & 0xf) static const struct platform_device_id pwm_id_table[] = { /* PWM has_secondary_pwm? */ { "pxa25x-pwm", 0 }, - { "pxa27x-pwm", 0 | HAS_SECONDARY_PWM }, - { "pxa168-pwm", 1 }, - { "pxa910-pwm", 1 }, + { "pxa27x-pwm", HAS_SECONDARY_PWM }, + { "pxa168-pwm", 0 }, + { "pxa910-pwm", 0 }, { }, }; MODULE_DEVICE_TABLE(platform, pwm_id_table); -- GitLab From a46ef4d5723aca785e6b03b7972dd83e43e73977 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 31 Mar 2013 23:01:11 +0800 Subject: [PATCH 0337/3163] pwm: puv3: Remove unused enabled filed from struct puv3_pwm_chip Signed-off-by: Axel Lin Signed-off-by: Thierry Reding --- drivers/pwm/pwm-puv3.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pwm/pwm-puv3.c b/drivers/pwm/pwm-puv3.c index db964e6ecf5c..d1eb499fb15d 100644 --- a/drivers/pwm/pwm-puv3.c +++ b/drivers/pwm/pwm-puv3.c @@ -27,7 +27,6 @@ struct puv3_pwm_chip { struct pwm_chip chip; void __iomem *base; struct clk *clk; - bool enabled; }; static inline struct puv3_pwm_chip *to_puv3(struct pwm_chip *chip) -- GitLab From fa0d654c84c7705d90a2492b4611e1da7ccdf69c Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Tue, 2 Apr 2013 01:37:41 +0000 Subject: [PATCH 0338/3163] thermal: Add driver for Armada 370/XP SoC thermal management This driver supports both Armada 370 and Armada XP SoC thermal management controllers. Armada 370 has a register to check a valid temperature, whereas Armada XP does not. Each has a different initialization (i.e. calibration) function. The temperature conversion formula is the same for both. The controller present in each SoC have a very similar feature set, so it corresponds to have one driver to support both of them. Although this driver may present similarities to Dove and Kirkwood thermal driver, the exact differences and coincidences are not fully known. For this reason, support is given through a separate driver. Signed-off-by: Ezequiel Garcia Signed-off-by: Zhang Rui --- .../bindings/thermal/armada-thermal.txt | 22 ++ drivers/thermal/Kconfig | 8 + drivers/thermal/Makefile | 1 + drivers/thermal/armada_thermal.c | 232 ++++++++++++++++++ 4 files changed, 263 insertions(+) create mode 100644 Documentation/devicetree/bindings/thermal/armada-thermal.txt create mode 100644 drivers/thermal/armada_thermal.c diff --git a/Documentation/devicetree/bindings/thermal/armada-thermal.txt b/Documentation/devicetree/bindings/thermal/armada-thermal.txt new file mode 100644 index 000000000000..fff93d5f92de --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/armada-thermal.txt @@ -0,0 +1,22 @@ +* Marvell Armada 370/XP thermal management + +Required properties: + +- compatible: Should be set to one of the following: + marvell,armada370-thermal + marvell,armadaxp-thermal + +- reg: Device's register space. + Two entries are expected, see the examples below. + The first one is required for the sensor register; + the second one is required for the control register + to be used for sensor initialization (a.k.a. calibration). + +Example: + + thermal@d0018300 { + compatible = "marvell,armada370-thermal"; + reg = <0xd0018300 0x4 + 0xd0018304 0x4>; + status = "okay"; + }; diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index a764f165b589..9eddf744c94f 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -144,6 +144,14 @@ config DB8500_THERMAL created. Cooling devices can be bound to the trip points to cool this thermal zone if trip points reached. +config ARMADA_THERMAL + tristate "Armada 370/XP thermal management" + depends on ARCH_MVEBU + depends on OF + help + Enable this option if you want to have support for thermal management + controller present in Armada 370 and Armada XP SoC. + config DB8500_CPUFREQ_COOLING tristate "DB8500 cpufreq cooling" depends on ARCH_U8500 diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index d3a2b38c31e8..7f6509a97c14 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_KIRKWOOD_THERMAL) += kirkwood_thermal.o obj-$(CONFIG_EXYNOS_THERMAL) += exynos_thermal.o obj-$(CONFIG_DOVE_THERMAL) += dove_thermal.o obj-$(CONFIG_DB8500_THERMAL) += db8500_thermal.o +obj-$(CONFIG_ARMADA_THERMAL) += armada_thermal.o obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c new file mode 100644 index 000000000000..5b4d75fd7b49 --- /dev/null +++ b/drivers/thermal/armada_thermal.c @@ -0,0 +1,232 @@ +/* + * Marvell Armada 370/XP thermal sensor driver + * + * Copyright (C) 2013 Marvell + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define THERMAL_VALID_OFFSET 9 +#define THERMAL_VALID_MASK 0x1 +#define THERMAL_TEMP_OFFSET 10 +#define THERMAL_TEMP_MASK 0x1ff + +/* Thermal Manager Control and Status Register */ +#define PMU_TDC0_SW_RST_MASK (0x1 << 1) +#define PMU_TM_DISABLE_OFFS 0 +#define PMU_TM_DISABLE_MASK (0x1 << PMU_TM_DISABLE_OFFS) +#define PMU_TDC0_REF_CAL_CNT_OFFS 11 +#define PMU_TDC0_REF_CAL_CNT_MASK (0x1ff << PMU_TDC0_REF_CAL_CNT_OFFS) +#define PMU_TDC0_OTF_CAL_MASK (0x1 << 30) +#define PMU_TDC0_START_CAL_MASK (0x1 << 25) + +struct armada_thermal_ops; + +/* Marvell EBU Thermal Sensor Dev Structure */ +struct armada_thermal_priv { + void __iomem *sensor; + void __iomem *control; + struct armada_thermal_ops *ops; +}; + +struct armada_thermal_ops { + /* Initialize the sensor */ + void (*init_sensor)(struct armada_thermal_priv *); + + /* Test for a valid sensor value (optional) */ + bool (*is_valid)(struct armada_thermal_priv *); +}; + +static void armadaxp_init_sensor(struct armada_thermal_priv *priv) +{ + unsigned long reg; + + reg = readl_relaxed(priv->control); + reg |= PMU_TDC0_OTF_CAL_MASK; + writel(reg, priv->control); + + /* Reference calibration value */ + reg &= ~PMU_TDC0_REF_CAL_CNT_MASK; + reg |= (0xf1 << PMU_TDC0_REF_CAL_CNT_OFFS); + writel(reg, priv->control); + + /* Reset the sensor */ + reg = readl_relaxed(priv->control); + writel((reg | PMU_TDC0_SW_RST_MASK), priv->control); + + writel(reg, priv->control); + + /* Enable the sensor */ + reg = readl_relaxed(priv->sensor); + reg &= ~PMU_TM_DISABLE_MASK; + writel(reg, priv->sensor); +} + +static void armada370_init_sensor(struct armada_thermal_priv *priv) +{ + unsigned long reg; + + reg = readl_relaxed(priv->control); + reg |= PMU_TDC0_OTF_CAL_MASK; + writel(reg, priv->control); + + /* Reference calibration value */ + reg &= ~PMU_TDC0_REF_CAL_CNT_MASK; + reg |= (0xf1 << PMU_TDC0_REF_CAL_CNT_OFFS); + writel(reg, priv->control); + + reg &= ~PMU_TDC0_START_CAL_MASK; + writel(reg, priv->control); + + mdelay(10); +} + +static bool armada_is_valid(struct armada_thermal_priv *priv) +{ + unsigned long reg = readl_relaxed(priv->sensor); + + return (reg >> THERMAL_VALID_OFFSET) & THERMAL_VALID_MASK; +} + +static int armada_get_temp(struct thermal_zone_device *thermal, + unsigned long *temp) +{ + struct armada_thermal_priv *priv = thermal->devdata; + unsigned long reg; + + /* Valid check */ + if (priv->ops->is_valid && !priv->ops->is_valid(priv)) { + dev_err(&thermal->device, + "Temperature sensor reading not valid\n"); + return -EIO; + } + + reg = readl_relaxed(priv->sensor); + reg = (reg >> THERMAL_TEMP_OFFSET) & THERMAL_TEMP_MASK; + *temp = (3153000000UL - (10000000UL*reg)) / 13825; + return 0; +} + +static struct thermal_zone_device_ops ops = { + .get_temp = armada_get_temp, +}; + +static const struct armada_thermal_ops armadaxp_ops = { + .init_sensor = armadaxp_init_sensor, +}; + +static const struct armada_thermal_ops armada370_ops = { + .is_valid = armada_is_valid, + .init_sensor = armada370_init_sensor, +}; + +static const struct of_device_id armada_thermal_id_table[] = { + { + .compatible = "marvell,armadaxp-thermal", + .data = &armadaxp_ops, + }, + { + .compatible = "marvell,armada370-thermal", + .data = &armada370_ops, + }, + { + /* sentinel */ + }, +}; +MODULE_DEVICE_TABLE(of, armada_thermal_id_table); + +static int armada_thermal_probe(struct platform_device *pdev) +{ + struct thermal_zone_device *thermal; + const struct of_device_id *match; + struct armada_thermal_priv *priv; + struct resource *res; + + match = of_match_device(armada_thermal_id_table, &pdev->dev); + if (!match) + return -ENODEV; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "Failed to get platform resource\n"); + return -ENODEV; + } + + priv->sensor = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(priv->sensor)) + return PTR_ERR(priv->sensor); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res) { + dev_err(&pdev->dev, "Failed to get platform resource\n"); + return -ENODEV; + } + + priv->control = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(priv->control)) + return PTR_ERR(priv->control); + + priv->ops = (struct armada_thermal_ops *)match->data; + priv->ops->init_sensor(priv); + + thermal = thermal_zone_device_register("armada_thermal", 0, 0, + priv, &ops, NULL, 0, 0); + if (IS_ERR(thermal)) { + dev_err(&pdev->dev, + "Failed to register thermal zone device\n"); + return PTR_ERR(thermal); + } + + platform_set_drvdata(pdev, thermal); + + return 0; +} + +static int armada_thermal_exit(struct platform_device *pdev) +{ + struct thermal_zone_device *armada_thermal = + platform_get_drvdata(pdev); + + thermal_zone_device_unregister(armada_thermal); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static struct platform_driver armada_thermal_driver = { + .probe = armada_thermal_probe, + .remove = armada_thermal_exit, + .driver = { + .name = "armada_thermal", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(armada_thermal_id_table), + }, +}; + +module_platform_driver(armada_thermal_driver); + +MODULE_AUTHOR("Ezequiel Garcia "); +MODULE_DESCRIPTION("Armada 370/XP thermal driver"); +MODULE_LICENSE("GPL v2"); -- GitLab From 2cef4deb4018c02fb3cd08f76c8a988f7ddee480 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 25 Mar 2013 17:22:48 +0100 Subject: [PATCH 0339/3163] KVM: s390: Dont do a gmap update on minor memslot changes Some memslot updates dont affect the gmap implementation, e.g. setting/unsetting dirty tracking. Since a gmap update will cause tlb flushes and segment table invalidations we want to avoid that. Signed-off-by: Christian Borntraeger Signed-off-by: Cornelia Huck Signed-off-by: Gleb Natapov --- arch/s390/kvm/kvm-s390.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 33161b4a8280..f241e3315ebb 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1007,6 +1007,16 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, { int rc; + /* If the basics of the memslot do not change, we do not want + * to update the gmap. Every update causes several unnecessary + * segment translation exceptions. This is usually handled just + * fine by the normal fault handler + gmap, but it will also + * cause faults on the prefix page of running guest CPUs. + */ + if (old->userspace_addr == mem->userspace_addr && + old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && + old->npages * PAGE_SIZE == mem->memory_size) + return; rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, mem->guest_phys_addr, mem->memory_size); -- GitLab From d21683ea1f1b03823928a98b6380332b9385e3a7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Mar 2013 17:22:49 +0100 Subject: [PATCH 0340/3163] KVM: s390: fix 24 bit psw handling in lpsw/lpswe handler When checking for validity the lpsw/lpswe handler check that only the lower 20 bits instead of 24 bits have a non-zero value. There handling valid psws as invalid ones. Fix the 24 bit psw mask. Signed-off-by: Heiko Carstens Acked-by: Cornelia Huck Signed-off-by: Cornelia Huck Signed-off-by: Gleb Natapov --- arch/s390/kvm/priv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 7db2ad076f31..7b397b37d11a 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -255,7 +255,7 @@ static void handle_new_psw(struct kvm_vcpu *vcpu) #define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) #define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL -#define PSW_ADDR_24 0x00000000000fffffUL +#define PSW_ADDR_24 0x0000000000ffffffUL #define PSW_ADDR_31 0x000000007fffffffUL int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) -- GitLab From ace5058763b72d128efcbe27969e89226c9c593a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Mar 2013 17:22:50 +0100 Subject: [PATCH 0341/3163] KVM: s390: fix psw conversion in lpsw handler When converting a 64 bit psw to a 128 bit psw the addressing mode bit of the "addr" part of the 64 bit psw must be moved to the basic addressing mode bit of the "mask" part of the 128 bit psw. In addition the addressing mode bit must be cleared when moved to the "addr" part of the 128 bit psw. Otherwise an invalid psw would be generated if the orginal psw was in the 31 bit addressing mode. Signed-off-by: Heiko Carstens Acked-by: Cornelia Huck Signed-off-by: Cornelia Huck Signed-off-by: Gleb Natapov --- arch/s390/kvm/priv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 7b397b37d11a..844a2b986112 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -286,7 +286,8 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gpsw.mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32; - vcpu->arch.sie_block->gpsw.addr = new_psw.addr; + vcpu->arch.sie_block->gpsw.mask |= new_psw.addr & PSW32_ADDR_AMODE; + vcpu->arch.sie_block->gpsw.addr = new_psw.addr & ~PSW32_ADDR_AMODE; if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && -- GitLab From 6fd0fcc93b1eaf82911782de5c7aa35c174bf620 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Mar 2013 17:22:51 +0100 Subject: [PATCH 0342/3163] KVM: s390: fix return code handling in lpsw/lpswe handlers kvm_s390_inject_program_int() may return with a non-zero return value, in case of an error (out of memory). Report that to the calling functions instead of ignoring the error case. Signed-off-by: Heiko Carstens Acked-by: Cornelia Huck Signed-off-by: Cornelia Huck Signed-off-by: Gleb Natapov --- arch/s390/kvm/priv.c | 44 ++++++++++++++------------------------------ 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 844a2b986112..9d32c56fb02c 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -269,20 +269,14 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) addr = kvm_s390_get_base_disp_s(vcpu); - if (addr & 7) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - if (!(new_psw.mask & PSW32_MASK_BASE)) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (!(new_psw.mask & PSW32_MASK_BASE)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); vcpu->arch.sie_block->gpsw.mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32; @@ -293,13 +287,10 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_EA)) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + PSW_MASK_EA)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); handle_new_psw(vcpu); -out: return 0; } @@ -310,15 +301,11 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) addr = kvm_s390_get_base_disp_s(vcpu); - if (addr & 7) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); vcpu->arch.sie_block->gpsw.mask = new_psw.mask; vcpu->arch.sie_block->gpsw.addr = new_psw.addr; @@ -330,13 +317,10 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_EA)) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + PSW_MASK_EA)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); handle_new_psw(vcpu); -out: return 0; } -- GitLab From 3736b874a39a1df2a94186c357aabeb6a7d7d4f6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Mar 2013 17:22:52 +0100 Subject: [PATCH 0343/3163] KVM: s390: make if statements in lpsw/lpswe handlers readable Being unable to parse the 5- and 8-line if statements I had to split them to be able to make any sense of them and verify that they match the architecture. So change the code since I guess that other people will also have a hard time parsing such long conditional statements with line breaks. Introduce a common is_valid_psw() function which does all the checks needed. In case of lpsw (64 bit psw -> 128 bit psw conversion) it will do some not needed additional checks, since a couple of bits can't be set anyway, but that doesn't hurt. Signed-off-by: Heiko Carstens Acked-by: Cornelia Huck Signed-off-by: Cornelia Huck Signed-off-by: Gleb Natapov --- arch/s390/kvm/priv.c | 58 ++++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 9d32c56fb02c..05d186c21eca 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -258,68 +258,58 @@ static void handle_new_psw(struct kvm_vcpu *vcpu) #define PSW_ADDR_24 0x0000000000ffffffUL #define PSW_ADDR_31 0x000000007fffffffUL +static int is_valid_psw(psw_t *psw) { + if (psw->mask & PSW_MASK_UNASSIGNED) + return 0; + if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) { + if (psw->addr & ~PSW_ADDR_31) + return 0; + } + if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24)) + return 0; + if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA) + return 0; + return 1; +} + int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) { - u64 addr; + psw_t *gpsw = &vcpu->arch.sie_block->gpsw; psw_compat_t new_psw; + u64 addr; - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + if (gpsw->mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OPERATION); - addr = kvm_s390_get_base_disp_s(vcpu); - if (addr & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - if (!(new_psw.mask & PSW32_MASK_BASE)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - - vcpu->arch.sie_block->gpsw.mask = - (new_psw.mask & ~PSW32_MASK_BASE) << 32; - vcpu->arch.sie_block->gpsw.mask |= new_psw.addr & PSW32_ADDR_AMODE; - vcpu->arch.sie_block->gpsw.addr = new_psw.addr & ~PSW32_ADDR_AMODE; - - if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || - (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && - (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || - ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_EA)) + gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32; + gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE; + gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE; + if (!is_valid_psw(gpsw)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - handle_new_psw(vcpu); return 0; } static int handle_lpswe(struct kvm_vcpu *vcpu) { - u64 addr; psw_t new_psw; + u64 addr; addr = kvm_s390_get_base_disp_s(vcpu); - if (addr & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - - vcpu->arch.sie_block->gpsw.mask = new_psw.mask; - vcpu->arch.sie_block->gpsw.addr = new_psw.addr; - - if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || - (((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_BA) && - (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) || - (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && - (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || - ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_EA)) + vcpu->arch.sie_block->gpsw = new_psw; + if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - handle_new_psw(vcpu); return 0; } -- GitLab From db4a29cb6ac7b2fda505923bdbc58fc35a719f62 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Mar 2013 17:22:53 +0100 Subject: [PATCH 0344/3163] KVM: s390: fix and enforce return code handling for irq injections kvm_s390_inject_program_int() and friends may fail if no memory is available. This must be reported to the calling functions, so that this gets passed down to user space which should fix the situation. Alternatively we end up with guest state corruption. So fix this and enforce return value checking by adding a __must_check annotation to all of these function prototypes. Signed-off-by: Heiko Carstens Acked-by: Cornelia Huck Signed-off-by: Cornelia Huck Signed-off-by: Gleb Natapov --- arch/s390/kvm/intercept.c | 12 ++---- arch/s390/kvm/kvm-s390.c | 3 +- arch/s390/kvm/kvm-s390.h | 12 +++--- arch/s390/kvm/priv.c | 83 ++++++++++++--------------------------- 4 files changed, 37 insertions(+), 73 deletions(-) diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index c6ba4dfd7f1e..b7d1b2edeeb3 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -45,10 +45,8 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) do { rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], (u64 __user *) useraddr); - if (rc) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - break; - } + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); useraddr += 8; if (reg == reg3) break; @@ -79,10 +77,8 @@ static int handle_lctl(struct kvm_vcpu *vcpu) reg = reg1; do { rc = get_guest(vcpu, val, (u32 __user *) useraddr); - if (rc) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - break; - } + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; vcpu->arch.sie_block->gcr[reg] |= val; useraddr += 4; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f241e3315ebb..d05a59c1eea7 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -633,8 +633,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) } else { VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); trace_kvm_s390_sie_fault(vcpu); - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - rc = 0; + rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } } VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 4d89d64a8161..efc14f687265 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -110,12 +110,12 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); void kvm_s390_tasklet(unsigned long parm); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); -int kvm_s390_inject_vm(struct kvm *kvm, - struct kvm_s390_interrupt *s390int); -int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt *s390int); -int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); -int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); +int __must_check kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int); +int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt *s390int); +int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); +int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 cr6, u64 schid); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 05d186c21eca..23a8370b1045 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -36,31 +36,24 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ - if (operand2 & 3) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); /* get the value */ - if (get_guest(vcpu, address, (u32 __user *) operand2)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (get_guest(vcpu, address, (u32 __user *) operand2)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); address = address & 0x7fffe000u; /* make sure that the new value is valid memory */ if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || - (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); kvm_s390_set_prefix(vcpu, address); VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); trace_kvm_s390_handle_prefix(vcpu, 1, address); -out: return 0; } @@ -74,49 +67,37 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ - if (operand2 & 3) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); address = vcpu->arch.sie_block->prefix; address = address & 0x7fffe000u; /* get the value */ - if (put_guest(vcpu, address, (u32 __user *)operand2)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (put_guest(vcpu, address, (u32 __user *)operand2)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); trace_kvm_s390_handle_prefix(vcpu, 0, address); -out: return 0; } static int handle_store_cpu_address(struct kvm_vcpu *vcpu) { u64 useraddr; - int rc; vcpu->stat.instruction_stap++; useraddr = kvm_s390_get_base_disp_s(vcpu); - if (useraddr & 1) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (useraddr & 1) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr); - if (rc) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr); trace_kvm_s390_handle_stap(vcpu, useraddr); -out: return 0; } @@ -135,10 +116,8 @@ static int handle_tpi(struct kvm_vcpu *vcpu) int cc; addr = kvm_s390_get_base_disp_s(vcpu); - if (addr & 3) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (addr & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); cc = 0; inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); if (!inti) @@ -167,7 +146,6 @@ static int handle_tpi(struct kvm_vcpu *vcpu) /* Set condition code and we're done. */ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; -out: return 0; } @@ -237,12 +215,9 @@ static int handle_stfl(struct kvm_vcpu *vcpu) rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), &facility_list, sizeof(facility_list)); if (rc) - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - else { - VCPU_EVENT(vcpu, 5, "store facility list value %x", - facility_list); - trace_kvm_s390_handle_stfl(vcpu, facility_list); - } + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list); + trace_kvm_s390_handle_stfl(vcpu, facility_list); return 0; } @@ -317,25 +292,18 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) static int handle_stidp(struct kvm_vcpu *vcpu) { u64 operand2; - int rc; vcpu->stat.instruction_stidp++; operand2 = kvm_s390_get_base_disp_s(vcpu); - if (operand2 & 7) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2); - if (rc) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); -out: return 0; } @@ -377,6 +345,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; u64 operand2; unsigned long mem; + int rc = 0; vcpu->stat.instruction_stsi++; VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); @@ -412,7 +381,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) } if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); goto out_mem; } trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); @@ -425,7 +394,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) out_fail: /* condition code 3 */ vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; - return 0; + return rc; } static const intercept_handler_t b2_handlers[256] = { -- GitLab From c51f068c23c76a86d427260b8219430ee6f99516 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Mar 2013 17:22:54 +0100 Subject: [PATCH 0345/3163] KVM: s390: fix stsi exception handling In case of an exception the guest psw condition code should be left alone. Signed-off-by: Heiko Carstens Acked-By: Cornelia Huck Signed-off-by: Cornelia Huck Signed-off-by: Gleb Natapov --- arch/s390/kvm/priv.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 23a8370b1045..de1b1b6128e1 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -343,8 +343,8 @@ static int handle_stsi(struct kvm_vcpu *vcpu) int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; + unsigned long mem = 0; u64 operand2; - unsigned long mem; int rc = 0; vcpu->stat.instruction_stsi++; @@ -364,36 +364,36 @@ static int handle_stsi(struct kvm_vcpu *vcpu) case 2: mem = get_zeroed_page(GFP_KERNEL); if (!mem) - goto out_fail; + goto out_no_data; if (stsi((void *) mem, fc, sel1, sel2)) - goto out_mem; + goto out_no_data; break; case 3: if (sel1 != 2 || sel2 != 2) - goto out_fail; + goto out_no_data; mem = get_zeroed_page(GFP_KERNEL); if (!mem) - goto out_fail; + goto out_no_data; handle_stsi_3_2_2(vcpu, (void *) mem); break; default: - goto out_fail; + goto out_no_data; } if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out_mem; + goto out_exception; } trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); free_page(mem); vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); vcpu->run->s.regs.gprs[0] = 0; return 0; -out_mem: - free_page(mem); -out_fail: +out_no_data: /* condition code 3 */ vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; +out_exception: + free_page(mem); return rc; } -- GitLab From b13b5dc7c96d40ebdadbdb752a92ecde5a9f2914 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Mar 2013 17:22:55 +0100 Subject: [PATCH 0346/3163] KVM: s390: fix compile with !CONFIG_COMPAT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/s390/kvm/priv.c should include both linux/compat.h and asm/compat.h. Fixes this one: In file included from arch/s390/kvm/priv.c:23:0: arch/s390/include/asm/compat.h: In function ‘arch_compat_alloc_user_space’: arch/s390/include/asm/compat.h:258:2: error: implicit declaration of function ‘is_compat_task’ Signed-off-by: Heiko Carstens Signed-off-by: Cornelia Huck Signed-off-by: Gleb Natapov --- arch/s390/kvm/priv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index de1b1b6128e1..6bbd7b5a0bbe 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include -- GitLab From 3188bf6b809ba5e7df7b9f000634f08e8abbb76a Mon Sep 17 00:00:00 2001 From: Nick Wang Date: Mon, 25 Mar 2013 17:22:56 +0100 Subject: [PATCH 0347/3163] KVM: s390: Change the virtual memory mapping location for virtio devices The current location for mapping virtio devices does not take into consideration the standby memory. This causes the failure of mapping standby memory since the location for the mapping is already taken by the virtio devices. To fix the problem, we move the location to beyond the end of standby memory. Signed-off-by: Nick Wang Reviewed-by: Christian Borntraeger Signed-off-by: Cornelia Huck Signed-off-by: Gleb Natapov --- drivers/s390/kvm/kvm_virtio.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 6711e65764b5..2ea6165366b6 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -443,29 +443,30 @@ static int __init test_devices_support(unsigned long addr) } /* * Init function for virtio - * devices are in a single page above top of "normal" mem + * devices are in a single page above top of "normal" + standby mem */ static int __init kvm_devices_init(void) { int rc; + unsigned long total_memory_size = sclp_get_rzm() * sclp_get_rnmax(); if (!MACHINE_IS_KVM) return -ENODEV; - if (test_devices_support(real_memory_size) < 0) + if (test_devices_support(total_memory_size) < 0) return -ENODEV; - rc = vmem_add_mapping(real_memory_size, PAGE_SIZE); + rc = vmem_add_mapping(total_memory_size, PAGE_SIZE); if (rc) return rc; - kvm_devices = (void *) real_memory_size; + kvm_devices = (void *) total_memory_size; kvm_root = root_device_register("kvm_s390"); if (IS_ERR(kvm_root)) { rc = PTR_ERR(kvm_root); printk(KERN_ERR "Could not register kvm_s390 root device"); - vmem_remove_mapping(real_memory_size, PAGE_SIZE); + vmem_remove_mapping(total_memory_size, PAGE_SIZE); return rc; } -- GitLab From dd2887e7c36d0be986ef17a9dbec904e3e334566 Mon Sep 17 00:00:00 2001 From: Nick Wang Date: Mon, 25 Mar 2013 17:22:57 +0100 Subject: [PATCH 0348/3163] KVM: s390: Remove the sanity checks for kvm memory slot To model the standby memory with memory_region_add_subregion and friends, the guest would have one or more regions of ram. Remove the check allowing only one memory slot and the check requiring the real address of memory slot starts at zero. Signed-off-by: Nick Wang Signed-off-by: Cornelia Huck Signed-off-by: Gleb Natapov --- arch/s390/kvm/kvm-s390.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d05a59c1eea7..b322ff15751d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -977,18 +977,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { - /* A few sanity checks. We can have exactly one memory slot which has - to start at guest virtual zero and which has to be located at a - page boundary in userland and which has to end at a page boundary. - The memory in userland is ok to be fragmented into various different - vmas. It is okay to mmap() and munmap() stuff in this slot after - doing this call at any time */ - - if (mem->slot) - return -EINVAL; - - if (mem->guest_phys_addr) - return -EINVAL; + /* A few sanity checks. We can have memory slots which have to be + located/ended at a segment boundary (1MB). The memory in userland is + ok to be fragmented into various different vmas. It is okay to mmap() + and munmap() stuff in this slot after doing this call at any time */ if (mem->userspace_addr & 0xffffful) return -EINVAL; -- GitLab From e1e2e605c2ad6791ce6346b22443ce611709fa65 Mon Sep 17 00:00:00 2001 From: Nick Wang Date: Mon, 25 Mar 2013 17:22:58 +0100 Subject: [PATCH 0349/3163] KVM: s390: Enable KVM_CAP_NR_MEMSLOTS on s390 Return KVM_USER_MEM_SLOTS in kvm_dev_ioctl_check_extension(). Signed-off-by: Nick Wang Reviewed-by: Christian Borntraeger Signed-off-by: Cornelia Huck Signed-off-by: Gleb Natapov --- arch/s390/kvm/kvm-s390.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b322ff15751d..c1c7c683fa26 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -149,6 +149,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; + case KVM_CAP_NR_MEMSLOTS: + r = KVM_USER_MEM_SLOTS; + break; case KVM_CAP_S390_COW: r = MACHINE_HAS_ESOP; break; -- GitLab From 1dc20828e674a781635286072bae909dc4e5c377 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 26 Mar 2013 06:08:10 +0000 Subject: [PATCH 0350/3163] thermal: rcar: tidyup registration failure case Current rcar_thermal driver didn't care about rcar_theraml_irq_disable() when registration failure case on _probe(), and _remove(). And, it returns without unregistering thermal zone when registration failure case on _probe(). This patch fixes these issue. Signed-off-by: Kuninori Morimoto Signed-off-by: Zhang Rui --- drivers/thermal/rcar_thermal.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index 2cc5b6115e3e..4d6095b9f9df 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -419,12 +419,15 @@ static int rcar_thermal_probe(struct platform_device *pdev) priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) { dev_err(dev, "Could not allocate priv\n"); - return -ENOMEM; + ret = -ENOMEM; + goto error_unregister; } priv->base = devm_ioremap_resource(dev, res); - if (IS_ERR(priv->base)) - return PTR_ERR(priv->base); + if (IS_ERR(priv->base)) { + ret = PTR_ERR(priv->base); + goto error_unregister; + } priv->common = common; priv->id = i; @@ -443,10 +446,10 @@ static int rcar_thermal_probe(struct platform_device *pdev) goto error_unregister; } - list_move_tail(&priv->list, &common->head); - if (rcar_has_irq_support(priv)) rcar_thermal_irq_enable(priv); + + list_move_tail(&priv->list, &common->head); } platform_set_drvdata(pdev, common); @@ -456,8 +459,11 @@ static int rcar_thermal_probe(struct platform_device *pdev) return 0; error_unregister: - rcar_thermal_for_each_priv(priv, common) + rcar_thermal_for_each_priv(priv, common) { thermal_zone_device_unregister(priv->zone); + if (rcar_has_irq_support(priv)) + rcar_thermal_irq_disable(priv); + } return ret; } @@ -467,8 +473,11 @@ static int rcar_thermal_remove(struct platform_device *pdev) struct rcar_thermal_common *common = platform_get_drvdata(pdev); struct rcar_thermal_priv *priv; - rcar_thermal_for_each_priv(priv, common) + rcar_thermal_for_each_priv(priv, common) { thermal_zone_device_unregister(priv->zone); + if (rcar_has_irq_support(priv)) + rcar_thermal_irq_disable(priv); + } platform_set_drvdata(pdev, NULL); -- GitLab From 51d45d25948bdf7422958b92a2d91dc703b1a4cc Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 26 Mar 2013 06:08:52 +0000 Subject: [PATCH 0351/3163] thermal: rcar: add pm_runtime_xxx() support Current rcar_thermal() didn't care about own power. Without this patch, rcar_thermal doesn't work on APE6 board Signed-off-by: Kuninori Morimoto Signed-off-by: Zhang Rui --- drivers/thermal/rcar_thermal.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index 4d6095b9f9df..8d7edd4c8228 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -377,6 +378,9 @@ static int rcar_thermal_probe(struct platform_device *pdev) spin_lock_init(&common->lock); common->dev = dev; + pm_runtime_enable(dev); + pm_runtime_get_sync(dev); + irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (irq) { int ret; @@ -465,12 +469,16 @@ static int rcar_thermal_probe(struct platform_device *pdev) rcar_thermal_irq_disable(priv); } + pm_runtime_put_sync(dev); + pm_runtime_disable(dev); + return ret; } static int rcar_thermal_remove(struct platform_device *pdev) { struct rcar_thermal_common *common = platform_get_drvdata(pdev); + struct device *dev = &pdev->dev; struct rcar_thermal_priv *priv; rcar_thermal_for_each_priv(priv, common) { @@ -481,6 +489,9 @@ static int rcar_thermal_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); + pm_runtime_put_sync(dev); + pm_runtime_disable(dev); + return 0; } -- GitLab From bffd1f8ac87a798515a8aed5f64047b182e049f5 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Mon, 11 Feb 2013 03:54:23 +0000 Subject: [PATCH 0352/3163] thermal: exynos: Adapt to temperature emulation core thermal framework This removes the driver specific sysfs support of the temperature emulation and uses the newly added core thermal framework for thermal emulation. An exynos platform specific handler is added to support this. In this patch, the exynos senor(tmu) related code and exynos framework related (thermal zone, cooling devices) code are intentionally kept separate. So an emulated function pointer is passed from sensor to framework. This is beneficial in adding more sensor support using the same framework code which is an ongoing work. The goal is to finally split them totally. Even the existing read_temperature also follows the same execution method. Acked-by: Kukjin Kim Signed-off-by: Amit Daniel Kachhap Signed-off-by: Zhang Rui --- .../thermal/exynos_thermal_emulation | 8 +- drivers/thermal/Kconfig | 9 - drivers/thermal/exynos_thermal.c | 158 +++++++----------- 3 files changed, 67 insertions(+), 108 deletions(-) diff --git a/Documentation/thermal/exynos_thermal_emulation b/Documentation/thermal/exynos_thermal_emulation index b73bbfb697bb..36a3e79c1203 100644 --- a/Documentation/thermal/exynos_thermal_emulation +++ b/Documentation/thermal/exynos_thermal_emulation @@ -13,11 +13,11 @@ Thermal emulation mode supports software debug for TMU's operation. User can set manually with software code and TMU will read current temperature from user value not from sensor's value. -Enabling CONFIG_EXYNOS_THERMAL_EMUL option will make this support in available. -When it's enabled, sysfs node will be created under -/sys/bus/platform/devices/'exynos device name'/ with name of 'emulation'. +Enabling CONFIG_THERMAL_EMULATION option will make this support available. +When it's enabled, sysfs node will be created as +/sys/devices/virtual/thermal/thermal_zone'zone id'/emul_temp. -The sysfs node, 'emulation', will contain value 0 for the initial state. When you input any +The sysfs node, 'emul_node', will contain value 0 for the initial state. When you input any temperature you want to update to sysfs node, it automatically enable emulation mode and current temperature will be changed into it. (Exynos also supports user changable delay time which would be used to delay of diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 9eddf744c94f..2a19120c32bd 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -117,15 +117,6 @@ config EXYNOS_THERMAL If you say yes here you get support for TMU (Thermal Management Unit) on SAMSUNG EXYNOS series of SoC. -config EXYNOS_THERMAL_EMUL - bool "EXYNOS TMU emulation mode support" - depends on EXYNOS_THERMAL - help - Exynos 4412 and 4414 and 5 series has emulation mode on TMU. - Enable this option will be make sysfs node in exynos thermal platform - device directory to support emulation mode. With emulation mode sysfs - node, you can manually input temperature to TMU for simulation purpose. - config DOVE_THERMAL tristate "Temperature sensor on Marvell Dove SoCs" depends on ARCH_DOVE diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c index 541257888c3e..75bca0d6daf0 100644 --- a/drivers/thermal/exynos_thermal.c +++ b/drivers/thermal/exynos_thermal.c @@ -100,13 +100,13 @@ #define IDLE_INTERVAL 10000 #define MCELSIUS 1000 -#ifdef CONFIG_EXYNOS_THERMAL_EMUL +#ifdef CONFIG_THERMAL_EMULATION #define EXYNOS_EMUL_TIME 0x57F0 #define EXYNOS_EMUL_TIME_SHIFT 16 #define EXYNOS_EMUL_DATA_SHIFT 8 #define EXYNOS_EMUL_DATA_MASK 0xFF #define EXYNOS_EMUL_ENABLE 0x1 -#endif /* CONFIG_EXYNOS_THERMAL_EMUL */ +#endif /* CONFIG_THERMAL_EMULATION */ /* CPU Zone information */ #define PANIC_ZONE 4 @@ -145,6 +145,7 @@ struct thermal_cooling_conf { struct thermal_sensor_conf { char name[SENSOR_NAME_LEN]; int (*read_temperature)(void *data); + int (*write_emul_temp)(void *drv_data, unsigned long temp); struct thermal_trip_point_conf trip_data; struct thermal_cooling_conf cooling_data; void *private_data; @@ -349,6 +350,23 @@ static int exynos_get_temp(struct thermal_zone_device *thermal, return 0; } +/* Get temperature callback functions for thermal zone */ +static int exynos_set_emul_temp(struct thermal_zone_device *thermal, + unsigned long temp) +{ + void *data; + int ret = -EINVAL; + + if (!th_zone->sensor_conf) { + pr_info("Temperature sensor not initialised\n"); + return -EINVAL; + } + data = th_zone->sensor_conf->private_data; + if (th_zone->sensor_conf->write_emul_temp) + ret = th_zone->sensor_conf->write_emul_temp(data, temp); + return ret; +} + /* Get the temperature trend */ static int exynos_get_trend(struct thermal_zone_device *thermal, int trip, enum thermal_trend *trend) @@ -372,6 +390,7 @@ static struct thermal_zone_device_ops const exynos_dev_ops = { .bind = exynos_bind, .unbind = exynos_unbind, .get_temp = exynos_get_temp, + .set_emul_temp = exynos_set_emul_temp, .get_trend = exynos_get_trend, .get_mode = exynos_get_mode, .set_mode = exynos_set_mode, @@ -694,6 +713,47 @@ static int exynos_tmu_read(struct exynos_tmu_data *data) return temp; } +#ifdef CONFIG_THERMAL_EMULATION +static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) +{ + struct exynos_tmu_data *data = drv_data; + unsigned int reg; + int ret = -EINVAL; + + if (data->soc == SOC_ARCH_EXYNOS4210) + goto out; + + if (temp && temp < MCELSIUS) + goto out; + + mutex_lock(&data->lock); + clk_enable(data->clk); + + reg = readl(data->base + EXYNOS_EMUL_CON); + + if (temp) { + temp /= MCELSIUS; + + reg = (EXYNOS_EMUL_TIME << EXYNOS_EMUL_TIME_SHIFT) | + (temp_to_code(data, temp) + << EXYNOS_EMUL_DATA_SHIFT) | EXYNOS_EMUL_ENABLE; + } else { + reg &= ~EXYNOS_EMUL_ENABLE; + } + + writel(reg, data->base + EXYNOS_EMUL_CON); + + clk_disable(data->clk); + mutex_unlock(&data->lock); + return 0; +out: + return ret; +} +#else +static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) + { return -EINVAL; } +#endif/*CONFIG_THERMAL_EMULATION*/ + static void exynos_tmu_work(struct work_struct *work) { struct exynos_tmu_data *data = container_of(work, @@ -727,6 +787,7 @@ static irqreturn_t exynos_tmu_irq(int irq, void *id) static struct thermal_sensor_conf exynos_sensor_conf = { .name = "exynos-therm", .read_temperature = (int (*)(void *))exynos_tmu_read, + .write_emul_temp = exynos_tmu_set_emulation, }; #if defined(CONFIG_CPU_EXYNOS4210) @@ -833,93 +894,6 @@ static inline struct exynos_tmu_platform_data *exynos_get_driver_data( platform_get_device_id(pdev)->driver_data; } -#ifdef CONFIG_EXYNOS_THERMAL_EMUL -static ssize_t exynos_tmu_emulation_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct platform_device *pdev = container_of(dev, - struct platform_device, dev); - struct exynos_tmu_data *data = platform_get_drvdata(pdev); - unsigned int reg; - u8 temp_code; - int temp = 0; - - if (data->soc == SOC_ARCH_EXYNOS4210) - goto out; - - mutex_lock(&data->lock); - clk_enable(data->clk); - reg = readl(data->base + EXYNOS_EMUL_CON); - clk_disable(data->clk); - mutex_unlock(&data->lock); - - if (reg & EXYNOS_EMUL_ENABLE) { - reg >>= EXYNOS_EMUL_DATA_SHIFT; - temp_code = reg & EXYNOS_EMUL_DATA_MASK; - temp = code_to_temp(data, temp_code); - } -out: - return sprintf(buf, "%d\n", temp * MCELSIUS); -} - -static ssize_t exynos_tmu_emulation_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct platform_device *pdev = container_of(dev, - struct platform_device, dev); - struct exynos_tmu_data *data = platform_get_drvdata(pdev); - unsigned int reg; - int temp; - - if (data->soc == SOC_ARCH_EXYNOS4210) - goto out; - - if (!sscanf(buf, "%d\n", &temp) || temp < 0) - return -EINVAL; - - mutex_lock(&data->lock); - clk_enable(data->clk); - - reg = readl(data->base + EXYNOS_EMUL_CON); - - if (temp) { - /* Both CELSIUS and MCELSIUS type are available for input */ - if (temp > MCELSIUS) - temp /= MCELSIUS; - - reg = (EXYNOS_EMUL_TIME << EXYNOS_EMUL_TIME_SHIFT) | - (temp_to_code(data, (temp / MCELSIUS)) - << EXYNOS_EMUL_DATA_SHIFT) | EXYNOS_EMUL_ENABLE; - } else { - reg &= ~EXYNOS_EMUL_ENABLE; - } - - writel(reg, data->base + EXYNOS_EMUL_CON); - - clk_disable(data->clk); - mutex_unlock(&data->lock); - -out: - return count; -} - -static DEVICE_ATTR(emulation, 0644, exynos_tmu_emulation_show, - exynos_tmu_emulation_store); -static int create_emulation_sysfs(struct device *dev) -{ - return device_create_file(dev, &dev_attr_emulation); -} -static void remove_emulation_sysfs(struct device *dev) -{ - device_remove_file(dev, &dev_attr_emulation); -} -#else -static inline int create_emulation_sysfs(struct device *dev) { return 0; } -static inline void remove_emulation_sysfs(struct device *dev) {} -#endif - static int exynos_tmu_probe(struct platform_device *pdev) { struct exynos_tmu_data *data; @@ -1019,10 +993,6 @@ static int exynos_tmu_probe(struct platform_device *pdev) goto err_clk; } - ret = create_emulation_sysfs(&pdev->dev); - if (ret) - dev_err(&pdev->dev, "Failed to create emulation mode sysfs node\n"); - return 0; err_clk: platform_set_drvdata(pdev, NULL); @@ -1034,8 +1004,6 @@ static int exynos_tmu_remove(struct platform_device *pdev) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); - remove_emulation_sysfs(&pdev->dev); - exynos_tmu_control(pdev, false); exynos_unregister_thermal(); -- GitLab From 8837295a73f3500b32e18f9862c7bdde0b958648 Mon Sep 17 00:00:00 2001 From: Eduardo Valentin Date: Tue, 26 Mar 2013 21:38:34 +0000 Subject: [PATCH 0353/3163] thermal: add a warning for temperature emulation feature Because this feature is for debuging purposes, it is highly recommended to do not enable this on production systems. This patch adds warnings for system integrators, so that people are aware of this potential security issue. Signed-off-by: Eduardo Valentin Signed-off-by: Zhang Rui --- Documentation/thermal/sysfs-api.txt | 4 ++++ drivers/thermal/Kconfig | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt index 6859661c9d31..277530a5786c 100644 --- a/Documentation/thermal/sysfs-api.txt +++ b/Documentation/thermal/sysfs-api.txt @@ -265,6 +265,10 @@ emul_temp Unit: millidegree Celsius WO, Optional + WARNING: Be careful while enabling this option on production systems, + because userland can easily disable the thermal policy by simply + flooding this sysfs node with low temperature values. + ***************************** * Cooling device attributes * ***************************** diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 2a19120c32bd..fb0672baff40 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -86,6 +86,10 @@ config THERMAL_EMULATION user can manually input temperature and test the different trip threshold behaviour for simulation purpose. + WARNING: Be careful while enabling this option on production systems, + because userland can easily disable the thermal policy by simply + flooding this sysfs node with low temperature values. + config SPEAR_THERMAL bool "SPEAr thermal sensor driver" depends on PLAT_SPEAR -- GitLab From afd80d85aefac27e6e2f9dc10f60515357c504d2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 28 Mar 2013 17:18:35 +0100 Subject: [PATCH 0354/3163] pmu: prepare for migration support In order to migrate the PMU state correctly, we need to restore the values of MSR_CORE_PERF_GLOBAL_STATUS (a read-only register) and MSR_CORE_PERF_GLOBAL_OVF_CTRL (which has side effects when written). We also need to write the full 40-bit value of the performance counter, which would only be possible with a v3 architectural PMU's full-width counter MSRs. To distinguish host-initiated writes from the guest's, pass the full struct msr_data to kvm_pmu_set_msr. Signed-off-by: Paolo Bonzini Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/pmu.c | 14 +++++++++++--- arch/x86/kvm/x86.c | 4 ++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b5a64621d5af..3dd84c996d56 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1030,7 +1030,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu); void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); -int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); +int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); void kvm_deliver_pmi(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index cfc258a6bf97..c53e797e7369 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -360,10 +360,12 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) return 1; } -int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) +int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc; + u32 index = msr_info->index; + u64 data = msr_info->data; switch (index) { case MSR_CORE_PERF_FIXED_CTR_CTRL: @@ -375,6 +377,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) } break; case MSR_CORE_PERF_GLOBAL_STATUS: + if (msr_info->host_initiated) { + pmu->global_status = data; + return 0; + } break; /* RO MSR */ case MSR_CORE_PERF_GLOBAL_CTRL: if (pmu->global_ctrl == data) @@ -386,7 +392,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { - pmu->global_status &= ~data; + if (!msr_info->host_initiated) + pmu->global_status &= ~data; pmu->global_ovf_ctrl = data; return 0; } @@ -394,7 +401,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) default: if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || (pmc = get_fixed_pmc(pmu, index))) { - data = (s64)(s32)data; + if (!msr_info->host_initiated) + data = (s64)(s32)data; pmc->counter += data - read_pmc(pmc); return 0; } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2aaba814f1c8..78c6f90a60cc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2040,7 +2040,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_P6_EVNTSEL0: case MSR_P6_EVNTSEL1: if (kvm_pmu_msr(vcpu, msr)) - return kvm_pmu_set_msr(vcpu, msr, data); + return kvm_pmu_set_msr(vcpu, msr_info); if (pr || data != 0) vcpu_unimpl(vcpu, "disabled perfctr wrmsr: " @@ -2086,7 +2086,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) return xen_hvm_config(vcpu, data); if (kvm_pmu_msr(vcpu, msr)) - return kvm_pmu_set_msr(vcpu, msr, data); + return kvm_pmu_set_msr(vcpu, msr_info); if (!ignore_msrs) { vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); -- GitLab From 925fe08bce38d1ff052fe2209b9e2b8d5fbb7f98 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 27 Mar 2013 18:51:52 -0500 Subject: [PATCH 0355/3163] iommu/amd: Re-enable IOMMU event log interrupt after handling. Current driver does not clear the IOMMU event log interrupt bit in the IOMMU status register after processing an interrupt. This causes the IOMMU hardware to generate event log interrupt only once. This has been observed in both IOMMU v1 and V2 hardware. This patch clears the bit by writing 1 to bit 1 of the IOMMU status register (MMIO Offset 2020h) Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 3 +++ drivers/iommu/amd_iommu_types.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b287ca33833d..d6433e2a3bb4 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -703,6 +703,9 @@ static void iommu_poll_events(struct amd_iommu *iommu) u32 head, tail; unsigned long flags; + /* enable event interrupts again */ + writel(MMIO_STATUS_EVT_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET); + spin_lock_irqsave(&iommu->lock, flags); head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index e38ab438bb34..083f98c0488b 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -99,6 +99,7 @@ #define PASID_MASK 0x000fffff /* MMIO status bits */ +#define MMIO_STATUS_EVT_INT_MASK (1 << 1) #define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2) #define MMIO_STATUS_PPR_INT_MASK (1 << 6) -- GitLab From bb5547acfcd842950b8a22aa83f84af93388b9f2 Mon Sep 17 00:00:00 2001 From: Varun Sethi Date: Fri, 29 Mar 2013 01:23:58 +0530 Subject: [PATCH 0356/3163] iommu/fsl: Make iova dma_addr_t in the iommu_iova_to_phys API. This is required in case of PAMU, as it can support a window size of up to 64G (even on 32bit). Signed-off-by: Varun Sethi Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 2 +- drivers/iommu/exynos-iommu.c | 2 +- drivers/iommu/intel-iommu.c | 2 +- drivers/iommu/iommu.c | 3 +-- drivers/iommu/msm_iommu.c | 2 +- drivers/iommu/omap-iommu.c | 2 +- drivers/iommu/shmobile-iommu.c | 2 +- drivers/iommu/tegra-gart.c | 2 +- drivers/iommu/tegra-smmu.c | 2 +- include/linux/iommu.h | 9 +++------ 10 files changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b287ca33833d..a7f6b04eaa5e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3410,7 +3410,7 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, - unsigned long iova) + dma_addr_t iova) { struct protection_domain *domain = dom->priv; unsigned long offset_mask; diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 238a3caa949a..3f32d64ab87a 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1027,7 +1027,7 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain, } static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) + dma_addr_t iova) { struct exynos_iommu_domain *priv = domain->priv; unsigned long *entry; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0099667a397e..6e0b9ffc79b5 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4111,7 +4111,7 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, } static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) + dma_addr_t iova) { struct dmar_domain *dmar_domain = domain->priv; struct dma_pte *pte; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index b972d430d92b..f730ed9d8af9 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -706,8 +706,7 @@ void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_detach_group); -phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) +phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { if (unlikely(domain->ops->iova_to_phys == NULL)) return 0; diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 6a8870a31668..8ab4f41090af 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -554,7 +554,7 @@ static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long va, } static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long va) + dma_addr_t va) { struct msm_priv *priv; struct msm_iommu_drvdata *iommu_drvdata; diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 6ac02fa5910f..e02e5d71745b 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1219,7 +1219,7 @@ static void omap_iommu_domain_destroy(struct iommu_domain *domain) } static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long da) + dma_addr_t da) { struct omap_iommu_domain *omap_domain = domain->priv; struct omap_iommu *oiommu = omap_domain->iommu_dev; diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c index b6e8b57cf0a8..d572863dfccd 100644 --- a/drivers/iommu/shmobile-iommu.c +++ b/drivers/iommu/shmobile-iommu.c @@ -296,7 +296,7 @@ static size_t shmobile_iommu_unmap(struct iommu_domain *domain, } static phys_addr_t shmobile_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) + dma_addr_t iova) { struct shmobile_iommu_domain *sh_domain = domain->priv; uint32_t l1entry = 0, l2entry = 0; diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 86437575f94d..4aec8be38054 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -279,7 +279,7 @@ static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova, } static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) + dma_addr_t iova) { struct gart_device *gart = domain->priv; unsigned long pte; diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index b34e5fd7fd9e..bc9b59949d09 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -757,7 +757,7 @@ static size_t smmu_iommu_unmap(struct iommu_domain *domain, unsigned long iova, } static phys_addr_t smmu_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) + dma_addr_t iova) { struct smmu_as *as = domain->priv; unsigned long *pte; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ba3b8a98a049..bb0a0fc26729 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -91,8 +91,7 @@ struct iommu_ops { phys_addr_t paddr, size_t size, int prot); size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, size_t size); - phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, - unsigned long iova); + phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); int (*domain_has_cap)(struct iommu_domain *domain, unsigned long cap); int (*add_device)(struct device *dev); @@ -134,8 +133,7 @@ extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); -extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova); +extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova); extern int iommu_domain_has_cap(struct iommu_domain *domain, unsigned long cap); extern void iommu_set_fault_handler(struct iommu_domain *domain, @@ -267,8 +265,7 @@ static inline void iommu_domain_window_disable(struct iommu_domain *domain, { } -static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) +static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { return 0; } -- GitLab From 80f97f0f73b82444f714651ea053838d27779dca Mon Sep 17 00:00:00 2001 From: Varun Sethi Date: Fri, 29 Mar 2013 01:24:00 +0530 Subject: [PATCH 0357/3163] iommu/fsl: Add the window permission flag as a parameter to iommu_window_enable API. Each iommu window can have access permissions associated with it. Extended the window_enable API to incorporate window access permissions. In case of PAMU each window can have its specific set of permissions. Signed-off-by: Varun Sethi Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 5 +++-- include/linux/iommu.h | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index f730ed9d8af9..1d72b4f5b006 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -853,12 +853,13 @@ EXPORT_SYMBOL_GPL(iommu_unmap); int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, - phys_addr_t paddr, u64 size) + phys_addr_t paddr, u64 size, int prot) { if (unlikely(domain->ops->domain_window_enable == NULL)) return -ENODEV; - return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size); + return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size, + prot); } EXPORT_SYMBOL_GPL(iommu_domain_window_enable); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bb0a0fc26729..272781073110 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -104,7 +104,7 @@ struct iommu_ops { /* Window handling functions */ int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr, - phys_addr_t paddr, u64 size); + phys_addr_t paddr, u64 size, int prot); void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr); /* Set the numer of window per domain */ int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count); @@ -169,7 +169,8 @@ extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr, /* Window handling function prototypes */ extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, - phys_addr_t offset, u64 size); + phys_addr_t offset, u64 size, + int prot); extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr); /** * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework @@ -255,7 +256,7 @@ static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova, static inline int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t paddr, - u64 size) + u64 size, int prot) { return -ENODEV; } -- GitLab From 119f5e448d32c11faf22fe81f6f2d78467a47149 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Mar 2013 20:32:13 +0900 Subject: [PATCH 0358/3163] gpio: Renesas R-Car GPIO driver V3 This patch is V3 of a GPIO driver for the R-Car series of SoCs from Renesas. This driver is designed to be reusable between multiple SoCs that share the same basic building block, but so far it has only been used on R-Car H1 (r8a7779). Each driver instance handles 32 GPIOs with individually maskable IRQs. The driver operates on a single I/O memory range and the 32 GPIOs are hooked up a single interrupt. In the case of R-Car H1 either external IRQ pins or GPIOs with interrupts can be used for on-board interupts. For external IRQs 4 pins are supported, and in the case of GPIO there are 202 GPIOS as 202 interrupts hooked up via 6 driver instances and to the GIC and the Cortex-A9 Quad. At this point this driver is interfacing as a regular platform device driver. In the future DT support will be submitted as an incremental feature patch. Signed-off-by: Magnus Damm Reviewed-by: Linus Walleij Signed-off-by: Simon Horman --- drivers/gpio/Kconfig | 6 + drivers/gpio/Makefile | 1 + drivers/gpio/gpio-rcar.c | 373 ++++++++++++++++++++++++ include/linux/platform_data/gpio-rcar.h | 25 ++ 4 files changed, 405 insertions(+) create mode 100644 drivers/gpio/gpio-rcar.c create mode 100644 include/linux/platform_data/gpio-rcar.h diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 93aaadf99f28..d766e3cbef18 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -204,6 +204,12 @@ config GPIO_PXA help Say yes here to support the PXA GPIO device +config GPIO_RCAR + tristate "Renesas R-Car GPIO" + depends on ARM + help + Say yes here to support GPIO on Renesas R-Car SoCs. + config GPIO_SPEAR_SPICS bool "ST SPEAr13xx SPI Chip Select as GPIO support" depends on PLAT_SPEAR diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 22e07bc9fcb5..b41c74d45287 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -57,6 +57,7 @@ obj-$(CONFIG_GPIO_PL061) += gpio-pl061.o obj-$(CONFIG_GPIO_PXA) += gpio-pxa.o obj-$(CONFIG_GPIO_RC5T583) += gpio-rc5t583.o obj-$(CONFIG_GPIO_RDC321X) += gpio-rdc321x.o +obj-$(CONFIG_GPIO_RCAR) += gpio-rcar.o obj-$(CONFIG_PLAT_SAMSUNG) += gpio-samsung.o obj-$(CONFIG_ARCH_SA1100) += gpio-sa1100.o obj-$(CONFIG_GPIO_SCH) += gpio-sch.o diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c new file mode 100644 index 000000000000..581ba56131a7 --- /dev/null +++ b/drivers/gpio/gpio-rcar.c @@ -0,0 +1,373 @@ +/* + * Renesas R-Car GPIO Support + * + * Copyright (C) 2013 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct gpio_rcar_priv { + void __iomem *base; + spinlock_t lock; + struct gpio_rcar_config config; + struct platform_device *pdev; + struct gpio_chip gpio_chip; + struct irq_chip irq_chip; + struct irq_domain *irq_domain; +}; + +#define IOINTSEL 0x00 +#define INOUTSEL 0x04 +#define OUTDT 0x08 +#define INDT 0x0c +#define INTDT 0x10 +#define INTCLR 0x14 +#define INTMSK 0x18 +#define MSKCLR 0x1c +#define POSNEG 0x20 +#define EDGLEVEL 0x24 +#define FILONOFF 0x28 + +static inline u32 gpio_rcar_read(struct gpio_rcar_priv *p, int offs) +{ + return ioread32(p->base + offs); +} + +static inline void gpio_rcar_write(struct gpio_rcar_priv *p, int offs, + u32 value) +{ + iowrite32(value, p->base + offs); +} + +static void gpio_rcar_modify_bit(struct gpio_rcar_priv *p, int offs, + int bit, bool value) +{ + u32 tmp = gpio_rcar_read(p, offs); + + if (value) + tmp |= BIT(bit); + else + tmp &= ~BIT(bit); + + gpio_rcar_write(p, offs, tmp); +} + +static void gpio_rcar_irq_disable(struct irq_data *d) +{ + struct gpio_rcar_priv *p = irq_data_get_irq_chip_data(d); + + gpio_rcar_write(p, INTMSK, ~BIT(irqd_to_hwirq(d))); +} + +static void gpio_rcar_irq_enable(struct irq_data *d) +{ + struct gpio_rcar_priv *p = irq_data_get_irq_chip_data(d); + + gpio_rcar_write(p, MSKCLR, BIT(irqd_to_hwirq(d))); +} + +static void gpio_rcar_config_interrupt_input_mode(struct gpio_rcar_priv *p, + unsigned int hwirq, + bool active_high_rising_edge, + bool level_trigger) +{ + unsigned long flags; + + /* follow steps in the GPIO documentation for + * "Setting Edge-Sensitive Interrupt Input Mode" and + * "Setting Level-Sensitive Interrupt Input Mode" + */ + + spin_lock_irqsave(&p->lock, flags); + + /* Configure postive or negative logic in POSNEG */ + gpio_rcar_modify_bit(p, POSNEG, hwirq, !active_high_rising_edge); + + /* Configure edge or level trigger in EDGLEVEL */ + gpio_rcar_modify_bit(p, EDGLEVEL, hwirq, !level_trigger); + + /* Select "Interrupt Input Mode" in IOINTSEL */ + gpio_rcar_modify_bit(p, IOINTSEL, hwirq, true); + + /* Write INTCLR in case of edge trigger */ + if (!level_trigger) + gpio_rcar_write(p, INTCLR, BIT(hwirq)); + + spin_unlock_irqrestore(&p->lock, flags); +} + +static int gpio_rcar_irq_set_type(struct irq_data *d, unsigned int type) +{ + struct gpio_rcar_priv *p = irq_data_get_irq_chip_data(d); + unsigned int hwirq = irqd_to_hwirq(d); + + dev_dbg(&p->pdev->dev, "sense irq = %d, type = %d\n", hwirq, type); + + switch (type & IRQ_TYPE_SENSE_MASK) { + case IRQ_TYPE_LEVEL_HIGH: + gpio_rcar_config_interrupt_input_mode(p, hwirq, true, true); + break; + case IRQ_TYPE_LEVEL_LOW: + gpio_rcar_config_interrupt_input_mode(p, hwirq, false, true); + break; + case IRQ_TYPE_EDGE_RISING: + gpio_rcar_config_interrupt_input_mode(p, hwirq, true, false); + break; + case IRQ_TYPE_EDGE_FALLING: + gpio_rcar_config_interrupt_input_mode(p, hwirq, false, false); + break; + default: + return -EINVAL; + } + return 0; +} + +static irqreturn_t gpio_rcar_irq_handler(int irq, void *dev_id) +{ + struct gpio_rcar_priv *p = dev_id; + u32 pending; + unsigned int offset, irqs_handled = 0; + + while ((pending = gpio_rcar_read(p, INTDT))) { + offset = __ffs(pending); + gpio_rcar_write(p, INTCLR, BIT(offset)); + generic_handle_irq(irq_find_mapping(p->irq_domain, offset)); + irqs_handled++; + } + + return irqs_handled ? IRQ_HANDLED : IRQ_NONE; +} + +static inline struct gpio_rcar_priv *gpio_to_priv(struct gpio_chip *chip) +{ + return container_of(chip, struct gpio_rcar_priv, gpio_chip); +} + +static void gpio_rcar_config_general_input_output_mode(struct gpio_chip *chip, + unsigned int gpio, + bool output) +{ + struct gpio_rcar_priv *p = gpio_to_priv(chip); + unsigned long flags; + + /* follow steps in the GPIO documentation for + * "Setting General Output Mode" and + * "Setting General Input Mode" + */ + + spin_lock_irqsave(&p->lock, flags); + + /* Configure postive logic in POSNEG */ + gpio_rcar_modify_bit(p, POSNEG, gpio, false); + + /* Select "General Input/Output Mode" in IOINTSEL */ + gpio_rcar_modify_bit(p, IOINTSEL, gpio, false); + + /* Select Input Mode or Output Mode in INOUTSEL */ + gpio_rcar_modify_bit(p, INOUTSEL, gpio, output); + + spin_unlock_irqrestore(&p->lock, flags); +} + +static int gpio_rcar_direction_input(struct gpio_chip *chip, unsigned offset) +{ + gpio_rcar_config_general_input_output_mode(chip, offset, false); + return 0; +} + +static int gpio_rcar_get(struct gpio_chip *chip, unsigned offset) +{ + return (int)(gpio_rcar_read(gpio_to_priv(chip), INDT) & BIT(offset)); +} + +static void gpio_rcar_set(struct gpio_chip *chip, unsigned offset, int value) +{ + struct gpio_rcar_priv *p = gpio_to_priv(chip); + unsigned long flags; + + spin_lock_irqsave(&p->lock, flags); + gpio_rcar_modify_bit(p, OUTDT, offset, value); + spin_unlock_irqrestore(&p->lock, flags); +} + +static int gpio_rcar_direction_output(struct gpio_chip *chip, unsigned offset, + int value) +{ + /* write GPIO value to output before selecting output mode of pin */ + gpio_rcar_set(chip, offset, value); + gpio_rcar_config_general_input_output_mode(chip, offset, true); + return 0; +} + +static int gpio_rcar_to_irq(struct gpio_chip *chip, unsigned offset) +{ + return irq_create_mapping(gpio_to_priv(chip)->irq_domain, offset); +} + +static int gpio_rcar_irq_domain_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + struct gpio_rcar_priv *p = h->host_data; + + dev_dbg(&p->pdev->dev, "map hw irq = %d, virq = %d\n", (int)hw, virq); + + irq_set_chip_data(virq, h->host_data); + irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq); + set_irq_flags(virq, IRQF_VALID); /* kill me now */ + return 0; +} + +static struct irq_domain_ops gpio_rcar_irq_domain_ops = { + .map = gpio_rcar_irq_domain_map, +}; + +static int gpio_rcar_probe(struct platform_device *pdev) +{ + struct gpio_rcar_config *pdata = pdev->dev.platform_data; + struct gpio_rcar_priv *p; + struct resource *io, *irq; + struct gpio_chip *gpio_chip; + struct irq_chip *irq_chip; + const char *name = dev_name(&pdev->dev); + int ret; + + p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL); + if (!p) { + dev_err(&pdev->dev, "failed to allocate driver data\n"); + ret = -ENOMEM; + goto err0; + } + + /* deal with driver instance configuration */ + if (pdata) + p->config = *pdata; + + p->pdev = pdev; + platform_set_drvdata(pdev, p); + spin_lock_init(&p->lock); + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + + if (!io || !irq) { + dev_err(&pdev->dev, "missing IRQ or IOMEM\n"); + ret = -EINVAL; + goto err0; + } + + p->base = devm_ioremap_nocache(&pdev->dev, io->start, + resource_size(io)); + if (!p->base) { + dev_err(&pdev->dev, "failed to remap I/O memory\n"); + ret = -ENXIO; + goto err0; + } + + gpio_chip = &p->gpio_chip; + gpio_chip->direction_input = gpio_rcar_direction_input; + gpio_chip->get = gpio_rcar_get; + gpio_chip->direction_output = gpio_rcar_direction_output; + gpio_chip->set = gpio_rcar_set; + gpio_chip->to_irq = gpio_rcar_to_irq; + gpio_chip->label = name; + gpio_chip->owner = THIS_MODULE; + gpio_chip->base = p->config.gpio_base; + gpio_chip->ngpio = p->config.number_of_pins; + + irq_chip = &p->irq_chip; + irq_chip->name = name; + irq_chip->irq_mask = gpio_rcar_irq_disable; + irq_chip->irq_unmask = gpio_rcar_irq_enable; + irq_chip->irq_enable = gpio_rcar_irq_enable; + irq_chip->irq_disable = gpio_rcar_irq_disable; + irq_chip->irq_set_type = gpio_rcar_irq_set_type; + irq_chip->flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_SET_TYPE_MASKED; + + p->irq_domain = irq_domain_add_simple(pdev->dev.of_node, + p->config.number_of_pins, + p->config.irq_base, + &gpio_rcar_irq_domain_ops, p); + if (!p->irq_domain) { + ret = -ENXIO; + dev_err(&pdev->dev, "cannot initialize irq domain\n"); + goto err1; + } + + if (devm_request_irq(&pdev->dev, irq->start, + gpio_rcar_irq_handler, 0, name, p)) { + dev_err(&pdev->dev, "failed to request IRQ\n"); + ret = -ENOENT; + goto err1; + } + + ret = gpiochip_add(gpio_chip); + if (ret) { + dev_err(&pdev->dev, "failed to add GPIO controller\n"); + goto err1; + } + + dev_info(&pdev->dev, "driving %d GPIOs\n", p->config.number_of_pins); + + /* warn in case of mismatch if irq base is specified */ + if (p->config.irq_base) { + ret = irq_find_mapping(p->irq_domain, 0); + if (p->config.irq_base != ret) + dev_warn(&pdev->dev, "irq base mismatch (%u/%u)\n", + p->config.irq_base, ret); + } + + return 0; + +err1: + irq_domain_remove(p->irq_domain); +err0: + return ret; +} + +static int gpio_rcar_remove(struct platform_device *pdev) +{ + struct gpio_rcar_priv *p = platform_get_drvdata(pdev); + int ret; + + ret = gpiochip_remove(&p->gpio_chip); + if (ret) + return ret; + + irq_domain_remove(p->irq_domain); + return 0; +} + +static struct platform_driver gpio_rcar_device_driver = { + .probe = gpio_rcar_probe, + .remove = gpio_rcar_remove, + .driver = { + .name = "gpio_rcar", + } +}; + +module_platform_driver(gpio_rcar_device_driver); + +MODULE_AUTHOR("Magnus Damm"); +MODULE_DESCRIPTION("Renesas R-Car GPIO Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/platform_data/gpio-rcar.h b/include/linux/platform_data/gpio-rcar.h new file mode 100644 index 000000000000..bebfcd86fb80 --- /dev/null +++ b/include/linux/platform_data/gpio-rcar.h @@ -0,0 +1,25 @@ +/* + * Renesas R-Car GPIO Support + * + * Copyright (C) 2013 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __GPIO_RCAR_H__ +#define __GPIO_RCAR_H__ + +struct gpio_rcar_config { + unsigned int gpio_base; + unsigned int irq_base; + unsigned int number_of_pins; +}; + +#endif /* __GPIO_RCAR_H__ */ -- GitLab From dc3465a943ed2dd5de37d3d60df5c4e11c49efcb Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 10 Mar 2013 03:27:00 +0100 Subject: [PATCH 0359/3163] gpio-rcar: Add pinctrl support Register the GPIO pin range, and request and free GPIO pins using the pinctrl API. Signed-off-by: Laurent Pinchart Acked-by: Linus Walleij Signed-off-by: Simon Horman --- drivers/gpio/gpio-rcar.c | 23 +++++++++++++++++++++++ include/linux/platform_data/gpio-rcar.h | 1 + 2 files changed, 24 insertions(+) diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index 581ba56131a7..b4ca450947b8 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -190,6 +191,21 @@ static void gpio_rcar_config_general_input_output_mode(struct gpio_chip *chip, spin_unlock_irqrestore(&p->lock, flags); } +static int gpio_rcar_request(struct gpio_chip *chip, unsigned offset) +{ + return pinctrl_request_gpio(chip->base + offset); +} + +static void gpio_rcar_free(struct gpio_chip *chip, unsigned offset) +{ + pinctrl_free_gpio(chip->base + offset); + + /* Set the GPIO as an input to ensure that the next GPIO request won't + * drive the GPIO pin as an output. + */ + gpio_rcar_config_general_input_output_mode(chip, offset, false); +} + static int gpio_rcar_direction_input(struct gpio_chip *chip, unsigned offset) { gpio_rcar_config_general_input_output_mode(chip, offset, false); @@ -285,6 +301,8 @@ static int gpio_rcar_probe(struct platform_device *pdev) } gpio_chip = &p->gpio_chip; + gpio_chip->request = gpio_rcar_request; + gpio_chip->free = gpio_rcar_free; gpio_chip->direction_input = gpio_rcar_direction_input; gpio_chip->get = gpio_rcar_get; gpio_chip->direction_output = gpio_rcar_direction_output; @@ -337,6 +355,11 @@ static int gpio_rcar_probe(struct platform_device *pdev) p->config.irq_base, ret); } + ret = gpiochip_add_pin_range(gpio_chip, p->config.pctl_name, 0, + gpio_chip->base, gpio_chip->ngpio); + if (ret < 0) + dev_warn(&pdev->dev, "failed to add pin range\n"); + return 0; err1: diff --git a/include/linux/platform_data/gpio-rcar.h b/include/linux/platform_data/gpio-rcar.h index bebfcd86fb80..b253f77a7ddf 100644 --- a/include/linux/platform_data/gpio-rcar.h +++ b/include/linux/platform_data/gpio-rcar.h @@ -20,6 +20,7 @@ struct gpio_rcar_config { unsigned int gpio_base; unsigned int irq_base; unsigned int number_of_pins; + const char *pctl_name; }; #endif /* __GPIO_RCAR_H__ */ -- GitLab From 48b1e3e80f742bff0f469245f2d05007af9af92e Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 10 Mar 2013 03:43:32 +0100 Subject: [PATCH 0360/3163] ARM: shmobile: marzen: Add GPIO LEDs The board has 3 LEDs connected to GPIOs. Add a led-gpio device to support them. Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/board-marzen.c | 32 +++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c index 5852331743e7..a88f7f3594c7 100644 --- a/arch/arm/mach-shmobile/board-marzen.c +++ b/arch/arm/mach-shmobile/board-marzen.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -168,12 +169,43 @@ static struct platform_device usb_phy_device = { .num_resources = ARRAY_SIZE(usb_phy_resources), }; +/* LEDS */ +static struct gpio_led marzen_leds[] = { + { + .name = "led2", + .gpio = 157, + .default_state = LEDS_GPIO_DEFSTATE_ON, + }, { + .name = "led3", + .gpio = 158, + .default_state = LEDS_GPIO_DEFSTATE_ON, + }, { + .name = "led4", + .gpio = 159, + .default_state = LEDS_GPIO_DEFSTATE_ON, + }, +}; + +static struct gpio_led_platform_data marzen_leds_pdata = { + .leds = marzen_leds, + .num_leds = ARRAY_SIZE(marzen_leds), +}; + +static struct platform_device leds_device = { + .name = "leds-gpio", + .id = 0, + .dev = { + .platform_data = &marzen_leds_pdata, + }, +}; + static struct platform_device *marzen_devices[] __initdata = { ð_device, &sdhi0_device, &thermal_device, &hspi_device, &usb_phy_device, + &leds_device, }; /* USB */ -- GitLab From 542a564d2ddbd2c37536b4dff8e45fa0fc239bcc Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 7 Mar 2013 14:31:57 +0100 Subject: [PATCH 0361/3163] sh-pfc: Make function GPIOs support optional The target is to get rid of function GPIOs completely. To reach this, make function GPIOs support optional by skipping the function GPIO chip registration if no function GPIOS are defined in SoC data. Signed-off-by: Laurent Pinchart Acked-by: Linus Walleij Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/gpio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pinctrl/sh-pfc/gpio.c b/drivers/pinctrl/sh-pfc/gpio.c index d7acb06d888c..7a54ec79518b 100644 --- a/drivers/pinctrl/sh-pfc/gpio.c +++ b/drivers/pinctrl/sh-pfc/gpio.c @@ -384,6 +384,9 @@ int sh_pfc_register_gpiochip(struct sh_pfc *pfc) } /* Register the function GPIOs chip. */ + if (pfc->info->nr_func_gpios == 0) + return 0; + chip = sh_pfc_add_gpiochip(pfc, gpio_function_setup); if (IS_ERR(chip)) return PTR_ERR(chip); -- GitLab From 1a4fd58f76cf331c93daaa1667daa25db297d0d4 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 10 Mar 2013 03:19:44 +0100 Subject: [PATCH 0362/3163] sh-pfc: Make GPIO support optional When implemented as a separate IP block, GPIOs should be handled by a separate driver. To make this possible GPIO support needs to be optional in the sh-pfc driver. If no GPIO data registers are supplied in the SoC information structure skip registration of the gpiochip. Signed-off-by: Laurent Pinchart Acked-by: Linus Walleij Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/gpio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pinctrl/sh-pfc/gpio.c b/drivers/pinctrl/sh-pfc/gpio.c index 7a54ec79518b..317cebb0ee4d 100644 --- a/drivers/pinctrl/sh-pfc/gpio.c +++ b/drivers/pinctrl/sh-pfc/gpio.c @@ -354,6 +354,9 @@ int sh_pfc_register_gpiochip(struct sh_pfc *pfc) unsigned int i; int ret; + if (pfc->info->data_regs == NULL) + return 0; + /* Register the real GPIOs chip. */ chip = sh_pfc_add_gpiochip(pfc, gpio_pin_setup); if (IS_ERR(chip)) -- GitLab From ceef91dcc0bca0a39c54d2f0071848b6d5c66b88 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 10 Mar 2013 03:19:44 +0100 Subject: [PATCH 0363/3163] sh-pfc: Skip gpiochip registration when no GPIO resource is found Boards/platforms that register dedicated GPIO devices will not supply a memory resource for GPIOs. Try to locate the GPIO memory resource at initialization time, and skip registration of the gpiochip if the resource can't be found. This is a temporary modification to ease the transition to separate GPIO drivers. It should be reverted when all boards and platforms will have been moved. Signed-off-by: Laurent Pinchart Acked-by: Linus Walleij Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/gpio.c | 39 +++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/gpio.c b/drivers/pinctrl/sh-pfc/gpio.c index 317cebb0ee4d..d37efa7dcf90 100644 --- a/drivers/pinctrl/sh-pfc/gpio.c +++ b/drivers/pinctrl/sh-pfc/gpio.c @@ -101,24 +101,9 @@ static void gpio_setup_data_reg(struct sh_pfc_chip *chip, unsigned gpio) static int gpio_setup_data_regs(struct sh_pfc_chip *chip) { struct sh_pfc *pfc = chip->pfc; - unsigned long addr = pfc->info->data_regs[0].reg; const struct pinmux_data_reg *dreg; unsigned int i; - /* Find the window that contain the GPIO registers. */ - for (i = 0; i < pfc->num_windows; ++i) { - struct sh_pfc_window *window = &pfc->window[i]; - - if (addr >= window->phys && addr < window->phys + window->size) - break; - } - - if (i == pfc->num_windows) - return -EINVAL; - - /* GPIO data registers must be in the first memory resource. */ - chip->mem = &pfc->window[i]; - /* Count the number of data registers, allocate memory and initialize * them. */ @@ -319,7 +304,8 @@ static int gpio_function_setup(struct sh_pfc_chip *chip) */ static struct sh_pfc_chip * -sh_pfc_add_gpiochip(struct sh_pfc *pfc, int(*setup)(struct sh_pfc_chip *)) +sh_pfc_add_gpiochip(struct sh_pfc *pfc, int(*setup)(struct sh_pfc_chip *), + struct sh_pfc_window *mem) { struct sh_pfc_chip *chip; int ret; @@ -328,6 +314,7 @@ sh_pfc_add_gpiochip(struct sh_pfc *pfc, int(*setup)(struct sh_pfc_chip *)) if (unlikely(!chip)) return ERR_PTR(-ENOMEM); + chip->mem = mem; chip->pfc = pfc; ret = setup(chip); @@ -357,8 +344,24 @@ int sh_pfc_register_gpiochip(struct sh_pfc *pfc) if (pfc->info->data_regs == NULL) return 0; + /* Find the memory window that contain the GPIO registers. Boards that + * register a separate GPIO device will not supply a memory resource + * that covers the data registers. In that case don't try to handle + * GPIOs. + */ + for (i = 0; i < pfc->num_windows; ++i) { + struct sh_pfc_window *window = &pfc->window[i]; + + if (pfc->info->data_regs[0].reg >= window->phys && + pfc->info->data_regs[0].reg < window->phys + window->size) + break; + } + + if (i == pfc->num_windows) + return 0; + /* Register the real GPIOs chip. */ - chip = sh_pfc_add_gpiochip(pfc, gpio_pin_setup); + chip = sh_pfc_add_gpiochip(pfc, gpio_pin_setup, &pfc->window[i]); if (IS_ERR(chip)) return PTR_ERR(chip); @@ -390,7 +393,7 @@ int sh_pfc_register_gpiochip(struct sh_pfc *pfc) if (pfc->info->nr_func_gpios == 0) return 0; - chip = sh_pfc_add_gpiochip(pfc, gpio_function_setup); + chip = sh_pfc_add_gpiochip(pfc, gpio_function_setup, NULL); if (IS_ERR(chip)) return PTR_ERR(chip); -- GitLab From e3c470510babd8ed385f1e09ec616787022b77b1 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 10 Mar 2013 17:30:25 +0100 Subject: [PATCH 0364/3163] sh-pfc: Configure pins as GPIOs at request time when handled externally When a GPIO is handled by a separate driver the pinmux gpio_set_direction() handler won't be called. The pin mux type then need to be configured to GPIO at request time. Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/core.c | 37 ++++++++++++++------------------ drivers/pinctrl/sh-pfc/pinctrl.c | 11 ++++++++++ 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/core.c b/drivers/pinctrl/sh-pfc/core.c index 97e6ea3147e0..ced9a95aa1fc 100644 --- a/drivers/pinctrl/sh-pfc/core.c +++ b/drivers/pinctrl/sh-pfc/core.c @@ -268,7 +268,7 @@ int sh_pfc_config_mux(struct sh_pfc *pfc, unsigned mark, int pinmux_type) int ret; switch (pinmux_type) { - + case PINMUX_TYPE_GPIO: case PINMUX_TYPE_FUNCTION: range = NULL; break; @@ -297,6 +297,8 @@ int sh_pfc_config_mux(struct sh_pfc *pfc, unsigned mark, int pinmux_type) enum_id = 0; field = 0; value = 0; + + /* Iterate over all the configuration fields we need to update. */ while (1) { pos = sh_pfc_mark_to_enum(pfc, mark, pos, &enum_id); if (pos < 0) @@ -305,18 +307,20 @@ int sh_pfc_config_mux(struct sh_pfc *pfc, unsigned mark, int pinmux_type) if (!enum_id) break; - /* first check if this is a function enum */ + /* Check if the configuration field selects a function. If it + * doesn't, skip the field if it's not applicable to the + * requested pinmux type. + */ in_range = sh_pfc_enum_in_range(enum_id, &pfc->info->function); if (!in_range) { - /* not a function enum */ - if (range) { - /* - * other range exists, so this pin is - * a regular GPIO pin that now is being - * bound to a specific direction. - * - * for this case we only allow function enums - * and the enums that match the other range. + if (pinmux_type == PINMUX_TYPE_FUNCTION) { + /* Functions are allowed to modify all + * fields. + */ + in_range = 1; + } else if (pinmux_type != PINMUX_TYPE_GPIO) { + /* Input/output types can only modify fields + * that correspond to their respective ranges. */ in_range = sh_pfc_enum_in_range(enum_id, range); @@ -327,17 +331,8 @@ int sh_pfc_config_mux(struct sh_pfc *pfc, unsigned mark, int pinmux_type) */ if (in_range && enum_id == range->force) continue; - } else { - /* - * no other range exists, so this pin - * must then be of the function type. - * - * allow function type pins to select - * any combination of function/in/out - * in their MARK lists. - */ - in_range = 1; } + /* GPIOs are only allowed to modify function fields. */ } if (!in_range) diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c index aef268bc17ba..3492ec9a33b7 100644 --- a/drivers/pinctrl/sh-pfc/pinctrl.c +++ b/drivers/pinctrl/sh-pfc/pinctrl.c @@ -182,6 +182,17 @@ static int sh_pfc_gpio_request_enable(struct pinctrl_dev *pctldev, goto done; } + if (!pfc->gpio) { + /* If GPIOs are handled externally the pin mux type need to be + * set to GPIO here. + */ + const struct sh_pfc_pin *pin = &pfc->info->pins[idx]; + + ret = sh_pfc_config_mux(pfc, pin->enum_id, PINMUX_TYPE_GPIO); + if (ret < 0) + goto done; + } + cfg->type = PINMUX_TYPE_GPIO; ret = 0; -- GitLab From 37a72d074d9658172dfef69c56ea7c0e9a9f6d1e Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 10 Mar 2013 03:31:51 +0100 Subject: [PATCH 0365/3163] ARM: shmobile: r8a7779: Register GPIO devices Move GPIOs handling from the PFC device to separate GPIO devices. Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/setup-r8a7779.c | 58 +++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c index 042df35e71a0..a460ba3dedcb 100644 --- a/arch/arm/mach-shmobile/setup-r8a7779.c +++ b/arch/arm/mach-shmobile/setup-r8a7779.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -68,11 +69,6 @@ static struct resource r8a7779_pfc_resources[] = { .end = 0xfffc023b, .flags = IORESOURCE_MEM, }, - [1] = { - .start = 0xffc40000, - .end = 0xffc46fff, - .flags = IORESOURCE_MEM, - } }; static struct platform_device r8a7779_pfc_device = { @@ -82,9 +78,59 @@ static struct platform_device r8a7779_pfc_device = { .num_resources = ARRAY_SIZE(r8a7779_pfc_resources), }; +#define R8A7779_GPIO(idx, npins) \ +static struct resource r8a7779_gpio##idx##_resources[] = { \ + [0] = { \ + .start = 0xffc40000 + 0x1000 * (idx), \ + .end = 0xffc4002b + 0x1000 * (idx), \ + .flags = IORESOURCE_MEM, \ + }, \ + [1] = { \ + .start = gic_iid(0xad + (idx)), \ + .flags = IORESOURCE_IRQ, \ + } \ +}; \ + \ +static struct gpio_rcar_config r8a7779_gpio##idx##_platform_data = { \ + .gpio_base = 32 * (idx), \ + .irq_base = 0, \ + .number_of_pins = npins, \ + .pctl_name = "pfc-r8a7779", \ +}; \ + \ +static struct platform_device r8a7779_gpio##idx##_device = { \ + .name = "gpio_rcar", \ + .id = idx, \ + .resource = r8a7779_gpio##idx##_resources, \ + .num_resources = ARRAY_SIZE(r8a7779_gpio##idx##_resources), \ + .dev = { \ + .platform_data = &r8a7779_gpio##idx##_platform_data, \ + }, \ +} + +R8A7779_GPIO(0, 32); +R8A7779_GPIO(1, 32); +R8A7779_GPIO(2, 32); +R8A7779_GPIO(3, 32); +R8A7779_GPIO(4, 32); +R8A7779_GPIO(5, 32); +R8A7779_GPIO(6, 9); + +static struct platform_device *r8a7779_pinctrl_devices[] __initdata = { + &r8a7779_pfc_device, + &r8a7779_gpio0_device, + &r8a7779_gpio1_device, + &r8a7779_gpio2_device, + &r8a7779_gpio3_device, + &r8a7779_gpio4_device, + &r8a7779_gpio5_device, + &r8a7779_gpio6_device, +}; + void __init r8a7779_pinmux_init(void) { - platform_device_register(&r8a7779_pfc_device); + platform_add_devices(r8a7779_pinctrl_devices, + ARRAY_SIZE(r8a7779_pinctrl_devices)); } static struct plat_sci_port scif0_platform_data = { -- GitLab From 2b4b588299fa3c8672e1049ab33acc7b600a8990 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 10 Mar 2013 04:02:15 +0100 Subject: [PATCH 0366/3163] sh-pfc: r8a7779: Remove GPIO data GPIOs are now handled by a separate driver, remove GPIO data from the SoC information structure. Signed-off-by: Laurent Pinchart Acked-by: Linus Walleij Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/pfc-r8a7779.c | 58 +--------------------------- 1 file changed, 1 insertion(+), 57 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7779.c b/drivers/pinctrl/sh-pfc/pfc-r8a7779.c index 41d8bda45163..e448ff1f408f 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7779.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7779.c @@ -85,18 +85,12 @@ } #define _GP_DATA(bank, pin, name, sfx) \ - PINMUX_DATA(name##_DATA, name##_FN, name##_IN, name##_OUT) - -#define _GP_INOUTSEL(bank, pin, name, sfx) name##_IN, name##_OUT -#define _GP_INDT(bank, pin, name, sfx) name##_DATA + PINMUX_DATA(name##_DATA, name##_FN) #define GP_ALL(str) CPU_ALL_PORT(_GP_PORT_ALL, str) #define PINMUX_GPIO_GP_ALL() CPU_ALL_PORT(_GP_GPIO, unused) #define PINMUX_DATA_GP_ALL() CPU_ALL_PORT(_GP_DATA, unused) -#define GP_INOUTSEL(bank) PORT_GP_32_REV(bank, _GP_INOUTSEL, unused) -#define GP_INDT(bank) PORT_GP_32_REV(bank, _GP_INDT, unused) - #define PINMUX_IPSR_DATA(ipsr, fn) PINMUX_DATA(fn##_MARK, FN_##ipsr, FN_##fn) #define PINMUX_IPSR_MODSEL_DATA(ipsr, fn, ms) PINMUX_DATA(fn##_MARK, FN_##ms, \ FN_##ipsr, FN_##fn) @@ -108,14 +102,6 @@ enum { GP_ALL(DATA), /* GP_0_0_DATA -> GP_6_8_DATA */ PINMUX_DATA_END, - PINMUX_INPUT_BEGIN, - GP_ALL(IN), /* GP_0_0_IN -> GP_6_8_IN */ - PINMUX_INPUT_END, - - PINMUX_OUTPUT_BEGIN, - GP_ALL(OUT), /* GP_0_0_OUT -> GP_6_8_OUT */ - PINMUX_OUTPUT_END, - PINMUX_FUNCTION_BEGIN, GP_ALL(FN), /* GP_0_0_FN -> GP_6_8_FN */ @@ -3549,45 +3535,6 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { /* SEL_I2C1 [2] */ FN_SEL_I2C1_0, FN_SEL_I2C1_1, FN_SEL_I2C1_2, FN_SEL_I2C1_3 } }, - { PINMUX_CFG_REG("INOUTSEL0", 0xffc40004, 32, 1) { GP_INOUTSEL(0) } }, - { PINMUX_CFG_REG("INOUTSEL1", 0xffc41004, 32, 1) { GP_INOUTSEL(1) } }, - { PINMUX_CFG_REG("INOUTSEL2", 0xffc42004, 32, 1) { GP_INOUTSEL(2) } }, - { PINMUX_CFG_REG("INOUTSEL3", 0xffc43004, 32, 1) { GP_INOUTSEL(3) } }, - { PINMUX_CFG_REG("INOUTSEL4", 0xffc44004, 32, 1) { GP_INOUTSEL(4) } }, - { PINMUX_CFG_REG("INOUTSEL5", 0xffc45004, 32, 1) { GP_INOUTSEL(5) } }, - { PINMUX_CFG_REG("INOUTSEL6", 0xffc46004, 32, 1) { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, - 0, 0, - 0, 0, - GP_6_8_IN, GP_6_8_OUT, - GP_6_7_IN, GP_6_7_OUT, - GP_6_6_IN, GP_6_6_OUT, - GP_6_5_IN, GP_6_5_OUT, - GP_6_4_IN, GP_6_4_OUT, - GP_6_3_IN, GP_6_3_OUT, - GP_6_2_IN, GP_6_2_OUT, - GP_6_1_IN, GP_6_1_OUT, - GP_6_0_IN, GP_6_0_OUT, } - }, - { }, -}; - -static const struct pinmux_data_reg pinmux_data_regs[] = { - { PINMUX_DATA_REG("INDT0", 0xffc40008, 32) { GP_INDT(0) } }, - { PINMUX_DATA_REG("INDT1", 0xffc41008, 32) { GP_INDT(1) } }, - { PINMUX_DATA_REG("INDT2", 0xffc42008, 32) { GP_INDT(2) } }, - { PINMUX_DATA_REG("INDT3", 0xffc43008, 32) { GP_INDT(3) } }, - { PINMUX_DATA_REG("INDT4", 0xffc44008, 32) { GP_INDT(4) } }, - { PINMUX_DATA_REG("INDT5", 0xffc45008, 32) { GP_INDT(5) } }, - { PINMUX_DATA_REG("INDT6", 0xffc46008, 32) { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, GP_6_8_DATA, - GP_6_7_DATA, GP_6_6_DATA, GP_6_5_DATA, GP_6_4_DATA, - GP_6_3_DATA, GP_6_2_DATA, GP_6_1_DATA, GP_6_0_DATA } - }, { }, }; @@ -3596,8 +3543,6 @@ const struct sh_pfc_soc_info r8a7779_pinmux_info = { .unlock_reg = 0xfffc0000, /* PMMR */ - .input = { PINMUX_INPUT_BEGIN, PINMUX_INPUT_END }, - .output = { PINMUX_OUTPUT_BEGIN, PINMUX_OUTPUT_END }, .function = { PINMUX_FUNCTION_BEGIN, PINMUX_FUNCTION_END }, .pins = pinmux_pins, @@ -3608,7 +3553,6 @@ const struct sh_pfc_soc_info r8a7779_pinmux_info = { .nr_functions = ARRAY_SIZE(pinmux_functions), .cfg_regs = pinmux_config_regs, - .data_regs = pinmux_data_regs, .gpio_data = pinmux_data, .gpio_data_size = ARRAY_SIZE(pinmux_data), -- GitLab From ba774cc7380e83f942c08564d3c142af2fbd05be Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 27 Mar 2013 11:06:37 +0100 Subject: [PATCH 0367/3163] sh-pfc: r8a7779: Split DU input and output pixel clocks The output pixel clocks can be used without the input pixel clocks. Split them in different groups. Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/pfc-r8a7779.c | 60 ++++++++++++++++++---------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7779.c b/drivers/pinctrl/sh-pfc/pfc-r8a7779.c index e448ff1f408f..62dcdcdec940 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7779.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7779.c @@ -1498,19 +1498,26 @@ static const unsigned int du0_rgb888_mux[] = { DU0_DB7_MARK, DU0_DB6_MARK, DU0_DB5_MARK, DU0_DB4_MARK, DU0_DB3_MARK, DU0_DB2_MARK, DU0_DB1_MARK, DU0_DB0_MARK, }; -static const unsigned int du0_clk_0_pins[] = { - /* CLKIN, CLKOUT */ - 29, 180, +static const unsigned int du0_clk_in_pins[] = { + /* CLKIN */ + 29, }; -static const unsigned int du0_clk_0_mux[] = { - DU0_DOTCLKIN_MARK, DU0_DOTCLKOUT0_MARK, +static const unsigned int du0_clk_in_mux[] = { + DU0_DOTCLKIN_MARK, }; -static const unsigned int du0_clk_1_pins[] = { - /* CLKIN, CLKOUT */ - 29, 30, +static const unsigned int du0_clk_out_0_pins[] = { + /* CLKOUT */ + 180, }; -static const unsigned int du0_clk_1_mux[] = { - DU0_DOTCLKIN_MARK, DU0_DOTCLKOUT1_MARK, +static const unsigned int du0_clk_out_0_mux[] = { + DU0_DOTCLKOUT0_MARK, +}; +static const unsigned int du0_clk_out_1_pins[] = { + /* CLKOUT */ + 30, +}; +static const unsigned int du0_clk_out_1_mux[] = { + DU0_DOTCLKOUT1_MARK, }; static const unsigned int du0_sync_0_pins[] = { /* VSYNC, HSYNC, DISP */ @@ -1571,12 +1578,19 @@ static const unsigned int du1_rgb888_mux[] = { DU1_DB7_MARK, DU1_DB6_MARK, DU1_DB5_MARK, DU1_DB4_MARK, DU1_DB3_MARK, DU1_DB2_MARK, DU1_DB1_MARK, DU1_DB0_MARK, }; -static const unsigned int du1_clk_pins[] = { - /* CLKIN, CLKOUT */ - 58, 59, +static const unsigned int du1_clk_in_pins[] = { + /* CLKIN */ + 58, +}; +static const unsigned int du1_clk_in_mux[] = { + DU1_DOTCLKIN_MARK, +}; +static const unsigned int du1_clk_out_pins[] = { + /* CLKOUT */ + 59, }; -static const unsigned int du1_clk_mux[] = { - DU1_DOTCLKIN_MARK, DU1_DOTCLKOUT_MARK, +static const unsigned int du1_clk_out_mux[] = { + DU1_DOTCLKOUT_MARK, }; static const unsigned int du1_sync_0_pins[] = { /* VSYNC, HSYNC, DISP */ @@ -2369,15 +2383,17 @@ static const unsigned int usb2_mux[] = { static const struct sh_pfc_pin_group pinmux_groups[] = { SH_PFC_PIN_GROUP(du0_rgb666), SH_PFC_PIN_GROUP(du0_rgb888), - SH_PFC_PIN_GROUP(du0_clk_0), - SH_PFC_PIN_GROUP(du0_clk_1), + SH_PFC_PIN_GROUP(du0_clk_in), + SH_PFC_PIN_GROUP(du0_clk_out_0), + SH_PFC_PIN_GROUP(du0_clk_out_1), SH_PFC_PIN_GROUP(du0_sync_0), SH_PFC_PIN_GROUP(du0_sync_1), SH_PFC_PIN_GROUP(du0_oddf), SH_PFC_PIN_GROUP(du0_cde), SH_PFC_PIN_GROUP(du1_rgb666), SH_PFC_PIN_GROUP(du1_rgb888), - SH_PFC_PIN_GROUP(du1_clk), + SH_PFC_PIN_GROUP(du1_clk_in), + SH_PFC_PIN_GROUP(du1_clk_out), SH_PFC_PIN_GROUP(du1_sync_0), SH_PFC_PIN_GROUP(du1_sync_1), SH_PFC_PIN_GROUP(du1_oddf), @@ -2492,8 +2508,9 @@ static const struct sh_pfc_pin_group pinmux_groups[] = { static const char * const du0_groups[] = { "du0_rgb666", "du0_rgb888", - "du0_clk_0", - "du0_clk_1", + "du0_clk_in", + "du0_clk_out_0", + "du0_clk_out_1", "du0_sync_0", "du0_sync_1", "du0_oddf", @@ -2503,7 +2520,8 @@ static const char * const du0_groups[] = { static const char * const du1_groups[] = { "du1_rgb666", "du1_rgb888", - "du1_clk", + "du1_clk_in", + "du1_clk_out", "du1_sync_0", "du1_sync_1", "du1_oddf", -- GitLab From c98f6c21afaf4692886cea0f5b63ead9945d85cc Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Mar 2013 22:49:49 +0900 Subject: [PATCH 0368/3163] sh-pfc: Add r8a73a4 pinmux support Add initial PFC support for the r8a73a4 SoC. At this point only GPIO interface is supported, move to newer interfaces planned as incremental changes. Original authors are Morimoto-san with help from Yoshii-san, thanks to them for the heavy lifting. Adjusted by Magnus to work together with updated code in drivers/pinctrl. Signed-off-by: Kuninori Morimoto Signed-off-by: Takashi Yoshii Signed-off-by: Magnus Damm Acked-by: Linus Walleij Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/include/mach/r8a73a4.h | 918 ++++++ drivers/pinctrl/sh-pfc/Kconfig | 5 + drivers/pinctrl/sh-pfc/Makefile | 1 + drivers/pinctrl/sh-pfc/core.c | 3 + drivers/pinctrl/sh-pfc/core.h | 1 + drivers/pinctrl/sh-pfc/pfc-r8a73a4.c | 2826 +++++++++++++++++ 6 files changed, 3754 insertions(+) create mode 100644 drivers/pinctrl/sh-pfc/pfc-r8a73a4.c diff --git a/arch/arm/mach-shmobile/include/mach/r8a73a4.h b/arch/arm/mach-shmobile/include/mach/r8a73a4.h index f043103e32c9..f0b1b4a962b3 100644 --- a/arch/arm/mach-shmobile/include/mach/r8a73a4.h +++ b/arch/arm/mach-shmobile/include/mach/r8a73a4.h @@ -1,6 +1,924 @@ #ifndef __ASM_R8A73A4_H__ #define __ASM_R8A73A4_H__ +/* + * Pin Function Controller: + * GPIO_FN_xx - GPIO used to select pin function + * GPIO_PORTxx - GPIO mapped to real I/O pin on CPU + */ +enum { + + /* PORT */ + GPIO_PORT0, GPIO_PORT1, GPIO_PORT2, GPIO_PORT3, GPIO_PORT4, + GPIO_PORT5, GPIO_PORT6, GPIO_PORT7, GPIO_PORT8, GPIO_PORT9, + + GPIO_PORT10, GPIO_PORT11, GPIO_PORT12, GPIO_PORT13, GPIO_PORT14, + GPIO_PORT15, GPIO_PORT16, GPIO_PORT17, GPIO_PORT18, GPIO_PORT19, + + GPIO_PORT20, GPIO_PORT21, GPIO_PORT22, GPIO_PORT23, GPIO_PORT24, + GPIO_PORT25, GPIO_PORT26, GPIO_PORT27, GPIO_PORT28, GPIO_PORT29, + + GPIO_PORT30, GPIO_PORT32, GPIO_PORT33, GPIO_PORT34, + GPIO_PORT35, GPIO_PORT36, GPIO_PORT37, GPIO_PORT38, GPIO_PORT39, + + GPIO_PORT40, GPIO_PORT64, + GPIO_PORT65, GPIO_PORT66, GPIO_PORT67, GPIO_PORT68, GPIO_PORT69, + + GPIO_PORT70, GPIO_PORT71, GPIO_PORT72, GPIO_PORT73, GPIO_PORT74, + GPIO_PORT75, GPIO_PORT76, GPIO_PORT77, GPIO_PORT78, GPIO_PORT79, + + GPIO_PORT80, GPIO_PORT81, GPIO_PORT82, GPIO_PORT83, GPIO_PORT84, + GPIO_PORT85, GPIO_PORT96, GPIO_PORT97, GPIO_PORT98, GPIO_PORT99, + + GPIO_PORT100, GPIO_PORT101, GPIO_PORT102, GPIO_PORT103, GPIO_PORT104, + GPIO_PORT105, GPIO_PORT106, GPIO_PORT107, GPIO_PORT108, GPIO_PORT109, + + GPIO_PORT110, GPIO_PORT111, GPIO_PORT112, GPIO_PORT113, GPIO_PORT114, + GPIO_PORT115, GPIO_PORT116, GPIO_PORT117, GPIO_PORT118, GPIO_PORT119, + + GPIO_PORT120, GPIO_PORT121, GPIO_PORT122, GPIO_PORT123, GPIO_PORT124, + GPIO_PORT125, GPIO_PORT126, GPIO_PORT128, GPIO_PORT129, + + GPIO_PORT130, GPIO_PORT131, GPIO_PORT132, GPIO_PORT133, GPIO_PORT134, + + GPIO_PORT160, GPIO_PORT161, GPIO_PORT162, GPIO_PORT163, GPIO_PORT164, + GPIO_PORT165, GPIO_PORT166, GPIO_PORT167, GPIO_PORT168, GPIO_PORT169, + + GPIO_PORT170, GPIO_PORT171, GPIO_PORT172, GPIO_PORT173, GPIO_PORT174, + GPIO_PORT175, GPIO_PORT176, GPIO_PORT177, GPIO_PORT178, + + GPIO_PORT192, GPIO_PORT193, GPIO_PORT194, + GPIO_PORT195, GPIO_PORT196, GPIO_PORT197, GPIO_PORT198, GPIO_PORT199, + + GPIO_PORT200, GPIO_PORT201, GPIO_PORT202, GPIO_PORT203, GPIO_PORT204, + GPIO_PORT205, GPIO_PORT206, GPIO_PORT207, GPIO_PORT208, GPIO_PORT209, + + GPIO_PORT210, GPIO_PORT211, GPIO_PORT212, GPIO_PORT213, GPIO_PORT214, + GPIO_PORT215, GPIO_PORT216, GPIO_PORT217, GPIO_PORT218, GPIO_PORT219, + + GPIO_PORT220, GPIO_PORT221, GPIO_PORT222, GPIO_PORT224, + GPIO_PORT225, GPIO_PORT226, GPIO_PORT227, GPIO_PORT228, GPIO_PORT229, + + GPIO_PORT230, GPIO_PORT231, GPIO_PORT232, GPIO_PORT233, GPIO_PORT234, + GPIO_PORT235, GPIO_PORT236, GPIO_PORT237, GPIO_PORT238, GPIO_PORT239, + + GPIO_PORT240, GPIO_PORT241, GPIO_PORT242, GPIO_PORT243, GPIO_PORT244, + GPIO_PORT245, GPIO_PORT246, GPIO_PORT247, GPIO_PORT248, GPIO_PORT249, + + GPIO_PORT250, GPIO_PORT256, GPIO_PORT257, GPIO_PORT258, GPIO_PORT259, + + GPIO_PORT260, GPIO_PORT261, GPIO_PORT262, GPIO_PORT263, GPIO_PORT264, + GPIO_PORT265, GPIO_PORT266, GPIO_PORT267, GPIO_PORT268, GPIO_PORT269, + + GPIO_PORT270, GPIO_PORT271, GPIO_PORT272, GPIO_PORT273, GPIO_PORT274, + GPIO_PORT275, GPIO_PORT276, GPIO_PORT277, GPIO_PORT278, GPIO_PORT279, + + GPIO_PORT280, GPIO_PORT281, GPIO_PORT282, GPIO_PORT283, + GPIO_PORT288, GPIO_PORT289, + + GPIO_PORT290, GPIO_PORT291, GPIO_PORT292, GPIO_PORT293, GPIO_PORT294, + GPIO_PORT295, GPIO_PORT296, GPIO_PORT297, GPIO_PORT298, GPIO_PORT299, + + GPIO_PORT300, GPIO_PORT301, GPIO_PORT302, GPIO_PORT303, GPIO_PORT304, + GPIO_PORT305, GPIO_PORT306, GPIO_PORT307, GPIO_PORT308, + + GPIO_PORT320, GPIO_PORT321, GPIO_PORT322, GPIO_PORT323, GPIO_PORT324, + GPIO_PORT325, GPIO_PORT326, GPIO_PORT327, GPIO_PORT328, GPIO_PORT329, + + /* Port0 */ + GPIO_FN_LCDD0, + GPIO_FN_PDM2_CLK_0, + GPIO_FN_DU0_DR0, + GPIO_FN_IRQ0, + + /* Port1 */ + GPIO_FN_LCDD1, + GPIO_FN_PDM2_DATA_1, + GPIO_FN_DU0_DR19, + GPIO_FN_IRQ1, + + /* Port2 */ + GPIO_FN_LCDD2, + GPIO_FN_PDM3_CLK_2, + GPIO_FN_DU0_DR2, + GPIO_FN_IRQ2, + + /* Port3 */ + GPIO_FN_LCDD3, + GPIO_FN_PDM3_DATA_3, + GPIO_FN_DU0_DR3, + GPIO_FN_IRQ3, + + /* Port4 */ + GPIO_FN_LCDD4, + GPIO_FN_PDM4_CLK_4, + GPIO_FN_DU0_DR4, + GPIO_FN_IRQ4, + + /* Port5 */ + GPIO_FN_LCDD5, + GPIO_FN_PDM4_DATA_5, + GPIO_FN_DU0_DR5, + GPIO_FN_IRQ5, + + /* Port6 */ + GPIO_FN_LCDD6, + GPIO_FN_PDM0_OUTCLK_6, + GPIO_FN_DU0_DR6, + GPIO_FN_IRQ6, + + /* Port7 */ + GPIO_FN_LCDD7, + GPIO_FN_PDM0_OUTDATA_7, + GPIO_FN_DU0_DR7, + GPIO_FN_IRQ7, + + /* Port8 */ + GPIO_FN_LCDD8, + GPIO_FN_PDM1_OUTCLK_8, + GPIO_FN_DU0_DG0, + GPIO_FN_IRQ8, + + /* Port9 */ + GPIO_FN_LCDD9, + GPIO_FN_PDM1_OUTDATA_9, + GPIO_FN_DU0_DG1, + GPIO_FN_IRQ9, + + /* Port10 */ + GPIO_FN_LCDD10, + GPIO_FN_FSICCK, + GPIO_FN_DU0_DG2, + GPIO_FN_IRQ10, + + /* Port11 */ + GPIO_FN_LCDD11, + GPIO_FN_FSICISLD, + GPIO_FN_DU0_DG3, + GPIO_FN_IRQ11, + + /* Port12 */ + GPIO_FN_LCDD12, + GPIO_FN_FSICOMC, + GPIO_FN_DU0_DG4, + GPIO_FN_IRQ12, + + /* Port13 */ + GPIO_FN_LCDD13, + GPIO_FN_FSICOLR, + GPIO_FN_FSICILR, + GPIO_FN_DU0_DG5, + GPIO_FN_IRQ13, + + /* Port14 */ + GPIO_FN_LCDD14, + GPIO_FN_FSICOBT, + GPIO_FN_FSICIBT, + GPIO_FN_DU0_DG6, + GPIO_FN_IRQ14, + + /* Port15 */ + GPIO_FN_LCDD15, + GPIO_FN_FSICOSLD, + GPIO_FN_DU0_DG7, + GPIO_FN_IRQ15, + + /* Port16 */ + GPIO_FN_LCDD16, + GPIO_FN_TPU1TO1, + GPIO_FN_DU0_DB0, + + /* Port17 */ + GPIO_FN_LCDD17, + GPIO_FN_SF_IRQ_00, + GPIO_FN_DU0_DB1, + + /* Port18 */ + GPIO_FN_LCDD18, + GPIO_FN_SF_IRQ_01, + GPIO_FN_DU0_DB2, + + /* Port19 */ + GPIO_FN_LCDD19, + GPIO_FN_SCIFB3_RTS_19, + GPIO_FN_DU0_DB3, + + /* Port20 */ + GPIO_FN_LCDD20, + GPIO_FN_SCIFB3_CTS_20, + GPIO_FN_DU0_DB4, + + /* Port21 */ + GPIO_FN_LCDD21, + GPIO_FN_SCIFB3_TXD_21, + GPIO_FN_DU0_DB5, + + /* Port22 */ + GPIO_FN_LCDD22, + GPIO_FN_SCIFB3_RXD_22, + GPIO_FN_DU0_DB6, + + /* Port23 */ + GPIO_FN_LCDD23, + GPIO_FN_SCIFB3_SCK_23, + GPIO_FN_DU0_DB7, + + /* Port24 */ + GPIO_FN_LCDHSYN, + GPIO_FN_LCDCS, + GPIO_FN_SCIFB1_RTS_24, + GPIO_FN_DU0_EXHSYNC_N_CSYNC_N_HSYNC_N, + + /* Port25 */ + GPIO_FN_LCDVSYN, + GPIO_FN_SCIFB1_CTS_25, + GPIO_FN_DU0_EXVSYNC_N_VSYNC_N_CSYNC_N, + + /* Port26 */ + GPIO_FN_LCDDCK, + GPIO_FN_LCDWR, + GPIO_FN_SCIFB1_TXD_26, + GPIO_FN_DU0_DOTCLKIN, + + /* Port27 */ + GPIO_FN_LCDDISP, + GPIO_FN_LCDRS, + GPIO_FN_SCIFB1_RXD_27, + GPIO_FN_DU0_DOTCLKOUT, + + /* Port28 */ + GPIO_FN_LCDRD_N, + GPIO_FN_SCIFB1_SCK_28, + GPIO_FN_DU0_DOTCLKOUTB, + + /* Port29 */ + GPIO_FN_LCDLCLK, + GPIO_FN_SF_IRQ_02, + GPIO_FN_DU0_DISP_CSYNC_N_DE, + + /* Port30 */ + GPIO_FN_LCDDON, + GPIO_FN_SF_IRQ_03, + GPIO_FN_DU0_ODDF_N_CLAMP, + + /* Port32 */ + GPIO_FN_SCIFA0_RTS, + GPIO_FN_SIM0_DET, + GPIO_FN_CSCIF0_RTS, + + /* Port33 */ + GPIO_FN_SCIFA0_CTS, + GPIO_FN_SIM1_DET, + GPIO_FN_CSCIF0_CTS, + + /* Port34 */ + GPIO_FN_SCIFA0_SCK, + GPIO_FN_SIM0_PWRON, + GPIO_FN_CSCIF0_SCK, + + /* Port35 */ + GPIO_FN_SCIFA1_RTS, + GPIO_FN_CSCIF1_RTS, + + /* Port36 */ + GPIO_FN_SCIFA1_CTS, + GPIO_FN_CSCIF1_CTS, + + /* Port37 */ + GPIO_FN_SCIFA1_SCK, + GPIO_FN_CSCIF1_SCK, + + /* Port38 */ + GPIO_FN_SCIFB0_RTS, + GPIO_FN_TPU0TO1, + GPIO_FN_SCIFB3_RTS_38, + GPIO_FN_CHSCIF0_HRTS, + + /* Port39 */ + GPIO_FN_SCIFB0_CTS, + GPIO_FN_TPU0TO2, + GPIO_FN_SCIFB3_CTS_39, + GPIO_FN_CHSCIF0_HCTS, + + /* Port40 */ + GPIO_FN_SCIFB0_SCK, + GPIO_FN_TPU0TO3, + GPIO_FN_SCIFB3_SCK_40, + GPIO_FN_CHSCIF0_HSCK, + + /* Port64 */ + GPIO_FN_PDM0_DATA, + + /* Port65 */ + GPIO_FN_PDM1_DATA, + + /* Port66 */ + GPIO_FN_HSI_RX_WAKE, + GPIO_FN_SCIFB2_CTS_66, + GPIO_FN_MSIOF3_SYNC, + GPIO_FN_GenIO4, + GPIO_FN_IRQ40, + + /* Port67 */ + GPIO_FN_HSI_RX_READY, + GPIO_FN_SCIFB1_TXD_67, + GPIO_FN_GIO_OUT3_67, + GPIO_FN_CHSCIF1_HTX, + + /* Port68 */ + GPIO_FN_HSI_RX_FLAG, + GPIO_FN_SCIFB2_TXD_68, + GPIO_FN_MSIOF3_TXD, + GPIO_FN_GIO_OUT4_68, + + /* Port69 */ + GPIO_FN_HSI_RX_DATA, + GPIO_FN_SCIFB2_RXD_69, + GPIO_FN_MSIOF3_RXD, + GPIO_FN_GIO_OUT5_69, + + /* Port70 */ + GPIO_FN_HSI_TX_FLAG, + GPIO_FN_SCIFB1_RTS_70, + GPIO_FN_GIO_OUT1_70, + GPIO_FN_HSIC_TSTCLK0, + GPIO_FN_CHSCIF1_HRTS, + + /* Port71 */ + GPIO_FN_HSI_TX_DATA, + GPIO_FN_SCIFB1_CTS_71, + GPIO_FN_GIO_OUT2_71, + GPIO_FN_HSIC_TSTCLK1, + GPIO_FN_CHSCIF1_HCTS, + + /* Port72 */ + GPIO_FN_HSI_TX_WAKE, + GPIO_FN_SCIFB1_RXD_72, + GPIO_FN_GenIO8, + GPIO_FN_CHSCIF1_HRX, + + /* Port73 */ + GPIO_FN_HSI_TX_READY, + GPIO_FN_SCIFB2_RTS_73, + GPIO_FN_MSIOF3_SCK, + GPIO_FN_GIO_OUT0_73, + + /* Port74 - Port85 */ + GPIO_FN_IRDA_OUT, + GPIO_FN_IRDA_IN, + GPIO_FN_IRDA_FIRSEL, + GPIO_FN_TPU0TO0, + GPIO_FN_DIGRFEN, + GPIO_FN_GPS_TIMESTAMP, + GPIO_FN_TXP, + GPIO_FN_TXP2, + GPIO_FN_COEX_0, + GPIO_FN_COEX_1, + GPIO_FN_IRQ19, + GPIO_FN_IRQ18, + + /* Port96 - Port101 */ + GPIO_FN_KEYIN0, + GPIO_FN_KEYIN1, + GPIO_FN_KEYIN2, + GPIO_FN_KEYIN3, + GPIO_FN_KEYIN4, + GPIO_FN_KEYIN5, + + /* Port102 */ + GPIO_FN_KEYIN6, + GPIO_FN_IRQ41, + + /* Port103 */ + GPIO_FN_KEYIN7, + GPIO_FN_IRQ42, + + /* Port104 - Port108 */ + GPIO_FN_KEYOUT0, + GPIO_FN_KEYOUT1, + GPIO_FN_KEYOUT2, + GPIO_FN_KEYOUT3, + GPIO_FN_KEYOUT4, + + /* Port109 */ + GPIO_FN_KEYOUT5, + GPIO_FN_IRQ43, + + /* Port110 */ + GPIO_FN_KEYOUT6, + GPIO_FN_IRQ44, + + /* Port111 */ + GPIO_FN_KEYOUT7, + GPIO_FN_RFANAEN, + GPIO_FN_IRQ45, + + /* Port112 */ + GPIO_FN_KEYIN8, + GPIO_FN_KEYOUT8, + GPIO_FN_SF_IRQ_04, + GPIO_FN_IRQ46, + + /* Port113 */ + GPIO_FN_KEYIN9, + GPIO_FN_KEYOUT9, + GPIO_FN_SF_IRQ_05, + GPIO_FN_IRQ47, + + /* Port114 */ + GPIO_FN_KEYIN10, + GPIO_FN_KEYOUT10, + GPIO_FN_SF_IRQ_06, + GPIO_FN_IRQ48, + + /* Port115 */ + GPIO_FN_KEYIN11, + GPIO_FN_KEYOUT11, + GPIO_FN_SF_IRQ_07, + GPIO_FN_IRQ49, + + /* Port116 */ + GPIO_FN_SCIFA0_TXD, + GPIO_FN_CSCIF0_TX, + + /* Port117 */ + GPIO_FN_SCIFA0_RXD, + GPIO_FN_CSCIF0_RX, + + /* Port118 */ + GPIO_FN_SCIFA1_TXD, + GPIO_FN_CSCIF1_TX, + + /* Port119 */ + GPIO_FN_SCIFA1_RXD, + GPIO_FN_CSCIF1_RX, + + /* Port120 */ + GPIO_FN_SF_PORT_1_120, + GPIO_FN_SCIFB3_RXD_120, + GPIO_FN_DU0_CDE, + + /* Port121 */ + GPIO_FN_SF_PORT_0_121, + GPIO_FN_SCIFB3_TXD_121, + + /* Port122 */ + GPIO_FN_SCIFB0_TXD, + GPIO_FN_CHSCIF0_HTX, + + /* Port123 */ + GPIO_FN_SCIFB0_RXD, + GPIO_FN_CHSCIF0_HRX, + + /* Port124 */ + GPIO_FN_ISP_STROBE_124, + + /* Port125 */ + GPIO_FN_STP_ISD_0, + GPIO_FN_PDM4_CLK_125, + GPIO_FN_MSIOF2_TXD, + GPIO_FN_SIM0_VOLTSEL0, + + /* Port126 */ + GPIO_FN_TS_SDEN, + GPIO_FN_MSIOF7_SYNC, + GPIO_FN_STP_ISEN_1, + + /* Port128 */ + GPIO_FN_STP_ISEN_0, + GPIO_FN_PDM1_OUTDATA_128, + GPIO_FN_MSIOF2_SYNC, + GPIO_FN_SIM1_VOLTSEL1, + + /* Port129 */ + GPIO_FN_TS_SPSYNC, + GPIO_FN_MSIOF7_RXD, + GPIO_FN_STP_ISSYNC_1, + + /* Port130 */ + GPIO_FN_STP_ISSYNC_0, + GPIO_FN_PDM4_DATA_130, + GPIO_FN_MSIOF2_RXD, + GPIO_FN_SIM0_VOLTSEL1, + + /* Port131 */ + GPIO_FN_STP_OPWM_0, + GPIO_FN_SIM1_PWRON, + + /* Port132 */ + GPIO_FN_TS_SCK, + GPIO_FN_MSIOF7_SCK, + GPIO_FN_STP_ISCLK_1, + + /* Port133 */ + GPIO_FN_STP_ISCLK_0, + GPIO_FN_PDM1_OUTCLK_133, + GPIO_FN_MSIOF2_SCK, + GPIO_FN_SIM1_VOLTSEL0, + + /* Port134 */ + GPIO_FN_TS_SDAT, + GPIO_FN_MSIOF7_TXD, + GPIO_FN_STP_ISD_1, + + /* Port160 - Port178 */ + GPIO_FN_IRQ20, + GPIO_FN_IRQ21, + GPIO_FN_IRQ22, + GPIO_FN_IRQ23, + GPIO_FN_MMCD0_0, + GPIO_FN_MMCD0_1, + GPIO_FN_MMCD0_2, + GPIO_FN_MMCD0_3, + GPIO_FN_MMCD0_4, + GPIO_FN_MMCD0_5, + GPIO_FN_MMCD0_6, + GPIO_FN_MMCD0_7, + GPIO_FN_MMCCMD0, + GPIO_FN_MMCCLK0, + GPIO_FN_MMCRST, + GPIO_FN_IRQ24, + GPIO_FN_IRQ25, + GPIO_FN_IRQ26, + GPIO_FN_IRQ27, + + /* Port192 - Port200 FN1 */ + GPIO_FN_A10, + GPIO_FN_A9, + GPIO_FN_A8, + GPIO_FN_A7, + GPIO_FN_A6, + GPIO_FN_A5, + GPIO_FN_A4, + GPIO_FN_A3, + GPIO_FN_A2, + + /* Port192 - Port200 FN2 */ + GPIO_FN_MMCD1_7, + GPIO_FN_MMCD1_6, + GPIO_FN_MMCD1_5, + GPIO_FN_MMCD1_4, + GPIO_FN_MMCD1_3, + GPIO_FN_MMCD1_2, + GPIO_FN_MMCD1_1, + GPIO_FN_MMCD1_0, + GPIO_FN_MMCCMD1, + + /* Port192 - Port200 IRQ */ + GPIO_FN_IRQ31, + GPIO_FN_IRQ32, + GPIO_FN_IRQ33, + GPIO_FN_IRQ34, + GPIO_FN_IRQ35, + GPIO_FN_IRQ36, + GPIO_FN_IRQ37, + GPIO_FN_IRQ38, + GPIO_FN_IRQ39, + + /* Port201 */ + GPIO_FN_A1, + + /* Port202 */ + GPIO_FN_A0, + GPIO_FN_BS, + + /* Port203 */ + GPIO_FN_CKO, + GPIO_FN_MMCCLK1, + + /* Port204 */ + GPIO_FN_CS0_N, + GPIO_FN_SIM0_GPO1, + + /* Port205 */ + GPIO_FN_CS2_N, + GPIO_FN_SIM0_GPO2, + + /* Port206 */ + GPIO_FN_CS4_N, + GPIO_FN_VIO_VD, + GPIO_FN_SIM1_GPO0, + + /* Port207 - Port212 FN1 */ + GPIO_FN_D15, + GPIO_FN_D14, + GPIO_FN_D13, + GPIO_FN_D12, + GPIO_FN_D11, + GPIO_FN_D10, + + /* Port207 - Port212 FN5 */ + GPIO_FN_GIO_OUT15, + GPIO_FN_GIO_OUT14, + GPIO_FN_GIO_OUT13, + GPIO_FN_GIO_OUT12, + GPIO_FN_WGM_TXP2, + GPIO_FN_WGM_GPS_TIMEM_ASK_RFCLK, + + /* Port213 - Port222 FN1 */ + GPIO_FN_D9, + GPIO_FN_D8, + GPIO_FN_D7, + GPIO_FN_D6, + GPIO_FN_D5, + GPIO_FN_D4, + GPIO_FN_D3, + GPIO_FN_D2, + GPIO_FN_D1, + GPIO_FN_D0, + + /* Port213 - Port222 FN2 */ + GPIO_FN_VIO_D9, + GPIO_FN_VIO_D8, + GPIO_FN_VIO_D7, + GPIO_FN_VIO_D6, + GPIO_FN_VIO_D5, + GPIO_FN_VIO_D4, + GPIO_FN_VIO_D3, + GPIO_FN_VIO_D2, + GPIO_FN_VIO_D1, + GPIO_FN_VIO_D0, + + /* Port213 - Port222 FN5 */ + GPIO_FN_GIO_OUT9, + GPIO_FN_GIO_OUT8, + GPIO_FN_GIO_OUT7, + GPIO_FN_GIO_OUT6, + GPIO_FN_GIO_OUT5_217, + GPIO_FN_GIO_OUT4_218, + GPIO_FN_GIO_OUT3_219, + GPIO_FN_GIO_OUT2_220, + GPIO_FN_GIO_OUT1_221, + GPIO_FN_GIO_OUT0_222, + + /* Port224 */ + GPIO_FN_RDWR_224, + GPIO_FN_VIO_HD, + GPIO_FN_SIM1_GPO2, + + /* Port225 */ + GPIO_FN_RD_N, + + /* Port226 */ + GPIO_FN_WAIT_N, + GPIO_FN_VIO_CLK, + GPIO_FN_SIM1_GPO1, + + /* Port227 */ + GPIO_FN_WE0_N, + GPIO_FN_RDWR_227, + + /* Port228 */ + GPIO_FN_WE1_N, + GPIO_FN_SIM0_GPO0, + + /* Port229 */ + GPIO_FN_PWMO, + GPIO_FN_VIO_CKO1_229, + + /* Port230 */ + GPIO_FN_SLIM_CLK, + GPIO_FN_VIO_CKO4_230, + + /* Port231 */ + GPIO_FN_SLIM_DATA, + GPIO_FN_VIO_CKO5_231, + + /* Port232 */ + GPIO_FN_VIO_CKO2_232, + GPIO_FN_SF_PORT_0_232, + + /* Port233 */ + GPIO_FN_VIO_CKO3_233, + GPIO_FN_SF_PORT_1_233, + + /* Port234 */ + GPIO_FN_FSIACK, + GPIO_FN_PDM3_CLK_234, + GPIO_FN_ISP_IRIS1_234, + + /* Port235 */ + GPIO_FN_FSIAISLD, + GPIO_FN_PDM3_DATA_235, + + /* Port236 */ + GPIO_FN_FSIAOMC, + GPIO_FN_PDM0_OUTCLK_236, + GPIO_FN_ISP_IRIS0_236, + + /* Port237 */ + GPIO_FN_FSIAOLR, + GPIO_FN_FSIAILR, + + /* Port238 */ + GPIO_FN_FSIAOBT, + GPIO_FN_FSIAIBT, + + /* Port239 */ + GPIO_FN_FSIAOSLD, + GPIO_FN_PDM0_OUTDATA_239, + + /* Port240 */ + GPIO_FN_FSIBISLD, + + /* Port241 */ + GPIO_FN_FSIBOLR, + GPIO_FN_FSIBILR, + + /* Port242 */ + GPIO_FN_FSIBOMC, + GPIO_FN_ISP_SHUTTER1_242, + + /* Port243 */ + GPIO_FN_FSIBOBT, + GPIO_FN_FSIBIBT, + + /* Port244 */ + GPIO_FN_FSIBOSLD, + GPIO_FN_FSIASPDIF, + + /* Port245 */ + GPIO_FN_FSIBCK, + GPIO_FN_ISP_SHUTTER0_245, + + /* Port246 - Port250 FN1 */ + GPIO_FN_ISP_IRIS1_246, + GPIO_FN_ISP_IRIS0_247, + GPIO_FN_ISP_SHUTTER1_248, + GPIO_FN_ISP_SHUTTER0_249, + GPIO_FN_ISP_STROBE_250, + + /* Port256 - Port258 */ + GPIO_FN_MSIOF0_SYNC, + GPIO_FN_MSIOF0_RXD, + GPIO_FN_MSIOF0_SCK, + + /* Port259 */ + GPIO_FN_MSIOF0_SS2, + GPIO_FN_VIO_CKO3_259, + + /* Port260 */ + GPIO_FN_MSIOF0_TXD, + + /* Port261 */ + GPIO_FN_SCIFB1_SCK_261, + GPIO_FN_CHSCIF1_HSCK, + + /* Port262 */ + GPIO_FN_SCIFB2_SCK_262, + + /* Port263 - Port266 FN1 */ + GPIO_FN_MSIOF1_SS2, + GPIO_FN_MSIOF1_TXD, + GPIO_FN_MSIOF1_RXD, + GPIO_FN_MSIOF1_SS1, + + /* Port263 - Port266 FN4 */ + GPIO_FN_MSIOF5_SS2, + GPIO_FN_MSIOF5_TXD, + GPIO_FN_MSIOF5_RXD, + GPIO_FN_MSIOF5_SS1, + + /* Port267 */ + GPIO_FN_MSIOF0_SS1, + + /* Port268 */ + GPIO_FN_MSIOF1_SCK, + GPIO_FN_MSIOF5_SCK, + + /* Port269 */ + GPIO_FN_MSIOF1_SYNC, + GPIO_FN_MSIOF5_SYNC, + + /* Port270 - Port273 FN1 */ + GPIO_FN_MSIOF2_SS1, + GPIO_FN_MSIOF2_SS2, + GPIO_FN_MSIOF3_SS2, + GPIO_FN_MSIOF3_SS1, + + /* Port270 - Port273 FN3 */ + GPIO_FN_VIO_CKO5_270, + GPIO_FN_VIO_CKO2_271, + GPIO_FN_VIO_CKO1_272, + GPIO_FN_VIO_CKO4_273, + + /* Port274 */ + GPIO_FN_MSIOF4_SS2, + GPIO_FN_TPU1TO0, + + /* Port275 - Port280 */ + GPIO_FN_IC_DP, + GPIO_FN_SIM0_RST, + GPIO_FN_IC_DM, + GPIO_FN_SIM0_BSICOMP, + GPIO_FN_SIM0_CLK, + GPIO_FN_SIM0_IO, + + /* Port281 */ + GPIO_FN_SIM1_IO, + GPIO_FN_PDM2_DATA_281, + + /* Port282 */ + GPIO_FN_SIM1_CLK, + GPIO_FN_PDM2_CLK_282, + + /* Port283 */ + GPIO_FN_SIM1_RST, + + /* Port289 */ + GPIO_FN_SDHID1_0, + GPIO_FN_STMDATA0_2, + + /* Port290 */ + GPIO_FN_SDHID1_1, + GPIO_FN_STMDATA1_2, + GPIO_FN_IRQ51, + + /* Port291 - Port294 FN1 */ + GPIO_FN_SDHID1_2, + GPIO_FN_SDHID1_3, + GPIO_FN_SDHICLK1, + GPIO_FN_SDHICMD1, + + /* Port291 - Port294 FN3 */ + GPIO_FN_STMDATA2_2, + GPIO_FN_STMDATA3_2, + GPIO_FN_STMCLK_2, + GPIO_FN_STMSIDI_2, + + /* Port295 */ + GPIO_FN_SDHID2_0, + GPIO_FN_MSIOF4_TXD, + GPIO_FN_SCIFB2_TXD_295, + GPIO_FN_MSIOF6_TXD, + + /* Port296 */ + GPIO_FN_SDHID2_1, + GPIO_FN_MSIOF6_SS2, + GPIO_FN_IRQ52, + + /* Port297 - Port300 FN1 */ + GPIO_FN_SDHID2_2, + GPIO_FN_SDHID2_3, + GPIO_FN_SDHICLK2, + GPIO_FN_SDHICMD2, + + /* Port297 - Port300 FN2 */ + GPIO_FN_MSIOF4_RXD, + GPIO_FN_MSIOF4_SYNC, + GPIO_FN_MSIOF4_SCK, + GPIO_FN_MSIOF4_SS1, + + /* Port297 - Port300 FN3 */ + GPIO_FN_SCIFB2_RXD_297, + GPIO_FN_SCIFB2_CTS_298, + GPIO_FN_SCIFB2_SCK_299, + GPIO_FN_SCIFB2_RTS_300, + + /* Port297 - Port300 FN4 */ + GPIO_FN_MSIOF6_RXD, + GPIO_FN_MSIOF6_SYNC, + GPIO_FN_MSIOF6_SCK, + GPIO_FN_MSIOF6_SS1, + + /* Port301 */ + GPIO_FN_SDHICD0, + GPIO_FN_IRQ50, + + /* Port302 - Port306 FN1 */ + GPIO_FN_SDHID0_0, + GPIO_FN_SDHID0_1, + GPIO_FN_SDHID0_2, + GPIO_FN_SDHID0_3, + GPIO_FN_SDHICMD0, + + /* Port302 - Port306 FN3 */ + GPIO_FN_STMDATA0_1, + GPIO_FN_STMDATA1_1, + GPIO_FN_STMDATA2_1, + GPIO_FN_STMDATA3_1, + GPIO_FN_STMSIDI_1, + + /* Port307 */ + GPIO_FN_SDHIWP0, + + /* Port308 */ + GPIO_FN_SDHICLK0, + GPIO_FN_STMCLK_1, + + /* Port320 - Port329 */ + GPIO_FN_IRQ16, + GPIO_FN_IRQ17, + GPIO_FN_IRQ28, + GPIO_FN_IRQ29, + GPIO_FN_IRQ30, + GPIO_FN_IRQ53, + GPIO_FN_IRQ54, + GPIO_FN_IRQ55, + GPIO_FN_IRQ56, + GPIO_FN_IRQ57, +}; + void r8a73a4_add_standard_devices(void); void r8a73a4_clock_init(void); void r8a73a4_pinmux_init(void); diff --git a/drivers/pinctrl/sh-pfc/Kconfig b/drivers/pinctrl/sh-pfc/Kconfig index af16f8f6ab6c..0e1f99c33d47 100644 --- a/drivers/pinctrl/sh-pfc/Kconfig +++ b/drivers/pinctrl/sh-pfc/Kconfig @@ -22,6 +22,11 @@ config GPIO_SH_PFC This enables support for GPIOs within the SoC's pin function controller. +config PINCTRL_PFC_R8A73A4 + def_bool y + depends on ARCH_R8A73A4 + select PINCTRL_SH_PFC + config PINCTRL_PFC_R8A7740 def_bool y depends on ARCH_R8A7740 diff --git a/drivers/pinctrl/sh-pfc/Makefile b/drivers/pinctrl/sh-pfc/Makefile index e8b9562c47e1..211cd8e98a8a 100644 --- a/drivers/pinctrl/sh-pfc/Makefile +++ b/drivers/pinctrl/sh-pfc/Makefile @@ -3,6 +3,7 @@ ifeq ($(CONFIG_GPIO_SH_PFC),y) sh-pfc-objs += gpio.o endif obj-$(CONFIG_PINCTRL_SH_PFC) += sh-pfc.o +obj-$(CONFIG_PINCTRL_PFC_R8A73A4) += pfc-r8a73a4.o obj-$(CONFIG_PINCTRL_PFC_R8A7740) += pfc-r8a7740.o obj-$(CONFIG_PINCTRL_PFC_R8A7779) += pfc-r8a7779.o obj-$(CONFIG_PINCTRL_PFC_SH7203) += pfc-sh7203.o diff --git a/drivers/pinctrl/sh-pfc/core.c b/drivers/pinctrl/sh-pfc/core.c index ced9a95aa1fc..b551336924a5 100644 --- a/drivers/pinctrl/sh-pfc/core.c +++ b/drivers/pinctrl/sh-pfc/core.c @@ -418,6 +418,9 @@ static int sh_pfc_remove(struct platform_device *pdev) } static const struct platform_device_id sh_pfc_id_table[] = { +#ifdef CONFIG_PINCTRL_PFC_R8A73A4 + { "pfc-r8a73a4", (kernel_ulong_t)&r8a73a4_pinmux_info }, +#endif #ifdef CONFIG_PINCTRL_PFC_R8A7740 { "pfc-r8a7740", (kernel_ulong_t)&r8a7740_pinmux_info }, #endif diff --git a/drivers/pinctrl/sh-pfc/core.h b/drivers/pinctrl/sh-pfc/core.h index 763d717ca979..89cb4289d761 100644 --- a/drivers/pinctrl/sh-pfc/core.h +++ b/drivers/pinctrl/sh-pfc/core.h @@ -54,6 +54,7 @@ void sh_pfc_write_raw_reg(void __iomem *mapped_reg, unsigned long reg_width, int sh_pfc_get_pin_index(struct sh_pfc *pfc, unsigned int pin); int sh_pfc_config_mux(struct sh_pfc *pfc, unsigned mark, int pinmux_type); +extern const struct sh_pfc_soc_info r8a73a4_pinmux_info; extern const struct sh_pfc_soc_info r8a7740_pinmux_info; extern const struct sh_pfc_soc_info r8a7779_pinmux_info; extern const struct sh_pfc_soc_info sh7203_pinmux_info; diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c new file mode 100644 index 000000000000..47d75d5548eb --- /dev/null +++ b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c @@ -0,0 +1,2826 @@ +/* + * Copyright (C) 2012-2013 Renesas Solutions Corp. + * Copyright (C) 2013 Magnus Damm + * Copyright (C) 2012 Kuninori Morimoto + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include +#include +#include + +#include "sh_pfc.h" + +#define CPU_ALL_PORT(fn, pfx, sfx) \ + /* Port0 - Port30 */ \ + PORT_10(fn, pfx, sfx), \ + PORT_10(fn, pfx##1, sfx), \ + PORT_10(fn, pfx##2, sfx), \ + PORT_1(fn, pfx##30, sfx), \ + /* Port32 - Port40 */ \ + PORT_1(fn, pfx##32, sfx), PORT_1(fn, pfx##33, sfx), \ + PORT_1(fn, pfx##34, sfx), PORT_1(fn, pfx##35, sfx), \ + PORT_1(fn, pfx##36, sfx), PORT_1(fn, pfx##37, sfx), \ + PORT_1(fn, pfx##38, sfx), PORT_1(fn, pfx##39, sfx), \ + PORT_1(fn, pfx##40, sfx), \ + /* Port64 - Port85 */ \ + PORT_1(fn, pfx##64, sfx), PORT_1(fn, pfx##65, sfx), \ + PORT_1(fn, pfx##66, sfx), PORT_1(fn, pfx##67, sfx), \ + PORT_1(fn, pfx##68, sfx), PORT_1(fn, pfx##69, sfx), \ + PORT_10(fn, pfx##7, sfx), \ + PORT_1(fn, pfx##80, sfx), PORT_1(fn, pfx##81, sfx), \ + PORT_1(fn, pfx##82, sfx), PORT_1(fn, pfx##83, sfx), \ + PORT_1(fn, pfx##84, sfx), PORT_1(fn, pfx##85, sfx), \ + /* Port96 - Port126 */ \ + PORT_1(fn, pfx##96, sfx), PORT_1(fn, pfx##97, sfx), \ + PORT_1(fn, pfx##98, sfx), PORT_1(fn, pfx##99, sfx), \ + PORT_10(fn, pfx##10, sfx), \ + PORT_10(fn, pfx##11, sfx), \ + PORT_1(fn, pfx##120, sfx), PORT_1(fn, pfx##121, sfx), \ + PORT_1(fn, pfx##122, sfx), PORT_1(fn, pfx##123, sfx), \ + PORT_1(fn, pfx##124, sfx), PORT_1(fn, pfx##125, sfx), \ + PORT_1(fn, pfx##126, sfx), \ + /* Port128 - Port134 */ \ + PORT_1(fn, pfx##128, sfx), PORT_1(fn, pfx##129, sfx), \ + PORT_1(fn, pfx##130, sfx), PORT_1(fn, pfx##131, sfx), \ + PORT_1(fn, pfx##132, sfx), PORT_1(fn, pfx##133, sfx), \ + PORT_1(fn, pfx##134, sfx), \ + /* Port160 - Port178 */ \ + PORT_10(fn, pfx##16, sfx), \ + PORT_1(fn, pfx##170, sfx), PORT_1(fn, pfx##171, sfx), \ + PORT_1(fn, pfx##172, sfx), PORT_1(fn, pfx##173, sfx), \ + PORT_1(fn, pfx##174, sfx), PORT_1(fn, pfx##175, sfx), \ + PORT_1(fn, pfx##176, sfx), PORT_1(fn, pfx##177, sfx), \ + PORT_1(fn, pfx##178, sfx), \ + /* Port192 - Port222 */ \ + PORT_1(fn, pfx##192, sfx), PORT_1(fn, pfx##193, sfx), \ + PORT_1(fn, pfx##194, sfx), PORT_1(fn, pfx##195, sfx), \ + PORT_1(fn, pfx##196, sfx), PORT_1(fn, pfx##197, sfx), \ + PORT_1(fn, pfx##198, sfx), PORT_1(fn, pfx##199, sfx), \ + PORT_10(fn, pfx##20, sfx), \ + PORT_10(fn, pfx##21, sfx), \ + PORT_1(fn, pfx##220, sfx), PORT_1(fn, pfx##221, sfx), \ + PORT_1(fn, pfx##222, sfx), \ + /* Port224 - Port250 */ \ + PORT_1(fn, pfx##224, sfx), PORT_1(fn, pfx##225, sfx), \ + PORT_1(fn, pfx##226, sfx), PORT_1(fn, pfx##227, sfx), \ + PORT_1(fn, pfx##228, sfx), PORT_1(fn, pfx##229, sfx), \ + PORT_10(fn, pfx##23, sfx), \ + PORT_10(fn, pfx##24, sfx), \ + PORT_1(fn, pfx##250, sfx), \ + /* Port256 - Port283 */ \ + PORT_1(fn, pfx##256, sfx), PORT_1(fn, pfx##257, sfx), \ + PORT_1(fn, pfx##258, sfx), PORT_1(fn, pfx##259, sfx), \ + PORT_10(fn, pfx##26, sfx), \ + PORT_10(fn, pfx##27, sfx), \ + PORT_1(fn, pfx##280, sfx), PORT_1(fn, pfx##281, sfx), \ + PORT_1(fn, pfx##282, sfx), PORT_1(fn, pfx##283, sfx), \ + /* Port288 - Port308 */ \ + PORT_1(fn, pfx##288, sfx), PORT_1(fn, pfx##289, sfx), \ + PORT_10(fn, pfx##29, sfx), \ + PORT_1(fn, pfx##300, sfx), PORT_1(fn, pfx##301, sfx), \ + PORT_1(fn, pfx##302, sfx), PORT_1(fn, pfx##303, sfx), \ + PORT_1(fn, pfx##304, sfx), PORT_1(fn, pfx##305, sfx), \ + PORT_1(fn, pfx##306, sfx), PORT_1(fn, pfx##307, sfx), \ + PORT_1(fn, pfx##308, sfx), \ + /* Port320 - Port329 */ \ + PORT_10(fn, pfx##32, sfx) + + +enum { + PINMUX_RESERVED = 0, + + /* PORT0_DATA -> PORT329_DATA */ + PINMUX_DATA_BEGIN, + PORT_ALL(DATA), + PINMUX_DATA_END, + + /* PORT0_IN -> PORT329_IN */ + PINMUX_INPUT_BEGIN, + PORT_ALL(IN), + PINMUX_INPUT_END, + + /* PORT0_IN_PU -> PORT329_IN_PU */ + PINMUX_INPUT_PULLUP_BEGIN, + PORT_ALL(IN_PU), + PINMUX_INPUT_PULLUP_END, + + /* PORT0_IN_PD -> PORT329_IN_PD */ + PINMUX_INPUT_PULLDOWN_BEGIN, + PORT_ALL(IN_PD), + PINMUX_INPUT_PULLDOWN_END, + + /* PORT0_OUT -> PORT329_OUT */ + PINMUX_OUTPUT_BEGIN, + PORT_ALL(OUT), + PINMUX_OUTPUT_END, + + PINMUX_FUNCTION_BEGIN, + PORT_ALL(FN_IN), /* PORT0_FN_IN -> PORT329_FN_IN */ + PORT_ALL(FN_OUT), /* PORT0_FN_OUT -> PORT329_FN_OUT */ + PORT_ALL(FN0), /* PORT0_FN0 -> PORT329_FN0 */ + PORT_ALL(FN1), /* PORT0_FN1 -> PORT329_FN1 */ + PORT_ALL(FN2), /* PORT0_FN2 -> PORT329_FN2 */ + PORT_ALL(FN3), /* PORT0_FN3 -> PORT329_FN3 */ + PORT_ALL(FN4), /* PORT0_FN4 -> PORT329_FN4 */ + PORT_ALL(FN5), /* PORT0_FN5 -> PORT329_FN5 */ + PORT_ALL(FN6), /* PORT0_FN6 -> PORT329_FN6 */ + PORT_ALL(FN7), /* PORT0_FN7 -> PORT329_FN7 */ + + MSEL1CR_31_0, MSEL1CR_31_1, + MSEL1CR_27_0, MSEL1CR_27_1, + MSEL1CR_25_0, MSEL1CR_25_1, + MSEL1CR_24_0, MSEL1CR_24_1, + MSEL1CR_22_0, MSEL1CR_22_1, + MSEL1CR_21_0, MSEL1CR_21_1, + MSEL1CR_20_0, MSEL1CR_20_1, + MSEL1CR_19_0, MSEL1CR_19_1, + MSEL1CR_18_0, MSEL1CR_18_1, + MSEL1CR_17_0, MSEL1CR_17_1, + MSEL1CR_16_0, MSEL1CR_16_1, + MSEL1CR_15_0, MSEL1CR_15_1, + MSEL1CR_14_0, MSEL1CR_14_1, + MSEL1CR_13_0, MSEL1CR_13_1, + MSEL1CR_12_0, MSEL1CR_12_1, + MSEL1CR_11_0, MSEL1CR_11_1, + MSEL1CR_10_0, MSEL1CR_10_1, + MSEL1CR_09_0, MSEL1CR_09_1, + MSEL1CR_08_0, MSEL1CR_08_1, + MSEL1CR_07_0, MSEL1CR_07_1, + MSEL1CR_06_0, MSEL1CR_06_1, + MSEL1CR_05_0, MSEL1CR_05_1, + MSEL1CR_04_0, MSEL1CR_04_1, + MSEL1CR_03_0, MSEL1CR_03_1, + MSEL1CR_02_0, MSEL1CR_02_1, + MSEL1CR_01_0, MSEL1CR_01_1, + MSEL1CR_00_0, MSEL1CR_00_1, + + MSEL3CR_31_0, MSEL3CR_31_1, + MSEL3CR_28_0, MSEL3CR_28_1, + MSEL3CR_27_0, MSEL3CR_27_1, + MSEL3CR_26_0, MSEL3CR_26_1, + MSEL3CR_23_0, MSEL3CR_23_1, + MSEL3CR_22_0, MSEL3CR_22_1, + MSEL3CR_21_0, MSEL3CR_21_1, + MSEL3CR_20_0, MSEL3CR_20_1, + MSEL3CR_19_0, MSEL3CR_19_1, + MSEL3CR_18_0, MSEL3CR_18_1, + MSEL3CR_17_0, MSEL3CR_17_1, + MSEL3CR_16_0, MSEL3CR_16_1, + MSEL3CR_15_0, MSEL3CR_15_1, + MSEL3CR_12_0, MSEL3CR_12_1, + MSEL3CR_11_0, MSEL3CR_11_1, + MSEL3CR_10_0, MSEL3CR_10_1, + MSEL3CR_09_0, MSEL3CR_09_1, + MSEL3CR_06_0, MSEL3CR_06_1, + MSEL3CR_03_0, MSEL3CR_03_1, + MSEL3CR_01_0, MSEL3CR_01_1, + MSEL3CR_00_0, MSEL3CR_00_1, + + MSEL4CR_30_0, MSEL4CR_30_1, + MSEL4CR_29_0, MSEL4CR_29_1, + MSEL4CR_28_0, MSEL4CR_28_1, + MSEL4CR_27_0, MSEL4CR_27_1, + MSEL4CR_26_0, MSEL4CR_26_1, + MSEL4CR_25_0, MSEL4CR_25_1, + MSEL4CR_24_0, MSEL4CR_24_1, + MSEL4CR_23_0, MSEL4CR_23_1, + MSEL4CR_22_0, MSEL4CR_22_1, + MSEL4CR_21_0, MSEL4CR_21_1, + MSEL4CR_20_0, MSEL4CR_20_1, + MSEL4CR_19_0, MSEL4CR_19_1, + MSEL4CR_18_0, MSEL4CR_18_1, + MSEL4CR_17_0, MSEL4CR_17_1, + MSEL4CR_16_0, MSEL4CR_16_1, + MSEL4CR_15_0, MSEL4CR_15_1, + MSEL4CR_14_0, MSEL4CR_14_1, + MSEL4CR_13_0, MSEL4CR_13_1, + MSEL4CR_12_0, MSEL4CR_12_1, + MSEL4CR_11_0, MSEL4CR_11_1, + MSEL4CR_10_0, MSEL4CR_10_1, + MSEL4CR_09_0, MSEL4CR_09_1, + MSEL4CR_07_0, MSEL4CR_07_1, + MSEL4CR_04_0, MSEL4CR_04_1, + MSEL4CR_01_0, MSEL4CR_01_1, + + MSEL5CR_31_0, MSEL5CR_31_1, + MSEL5CR_30_0, MSEL5CR_30_1, + MSEL5CR_29_0, MSEL5CR_29_1, + MSEL5CR_28_0, MSEL5CR_28_1, + MSEL5CR_27_0, MSEL5CR_27_1, + MSEL5CR_26_0, MSEL5CR_26_1, + MSEL5CR_25_0, MSEL5CR_25_1, + MSEL5CR_24_0, MSEL5CR_24_1, + MSEL5CR_23_0, MSEL5CR_23_1, + MSEL5CR_22_0, MSEL5CR_22_1, + MSEL5CR_21_0, MSEL5CR_21_1, + MSEL5CR_20_0, MSEL5CR_20_1, + MSEL5CR_19_0, MSEL5CR_19_1, + MSEL5CR_18_0, MSEL5CR_18_1, + MSEL5CR_17_0, MSEL5CR_17_1, + MSEL5CR_16_0, MSEL5CR_16_1, + MSEL5CR_15_0, MSEL5CR_15_1, + MSEL5CR_14_0, MSEL5CR_14_1, + MSEL5CR_13_0, MSEL5CR_13_1, + MSEL5CR_12_0, MSEL5CR_12_1, + MSEL5CR_11_0, MSEL5CR_11_1, + MSEL5CR_10_0, MSEL5CR_10_1, + MSEL5CR_09_0, MSEL5CR_09_1, + MSEL5CR_08_0, MSEL5CR_08_1, + MSEL5CR_07_0, MSEL5CR_07_1, + MSEL5CR_06_0, MSEL5CR_06_1, + + MSEL8CR_16_0, MSEL8CR_16_1, + MSEL8CR_01_0, MSEL8CR_01_1, + MSEL8CR_00_0, MSEL8CR_00_1, + + PINMUX_FUNCTION_END, + + PINMUX_MARK_BEGIN, + + +#define F1(a) a##_MARK +#define F2(a) a##_MARK +#define F3(a) a##_MARK +#define F4(a) a##_MARK +#define F5(a) a##_MARK +#define F6(a) a##_MARK +#define F7(a) a##_MARK +#define IRQ(a) IRQ##a##_MARK + + F1(LCDD0), F3(PDM2_CLK_0), F7(DU0_DR0), IRQ(0), /* Port0 */ + F1(LCDD1), F3(PDM2_DATA_1), F7(DU0_DR19), IRQ(1), + F1(LCDD2), F3(PDM3_CLK_2), F7(DU0_DR2), IRQ(2), + F1(LCDD3), F3(PDM3_DATA_3), F7(DU0_DR3), IRQ(3), + F1(LCDD4), F3(PDM4_CLK_4), F7(DU0_DR4), IRQ(4), + F1(LCDD5), F3(PDM4_DATA_5), F7(DU0_DR5), IRQ(5), + F1(LCDD6), F3(PDM0_OUTCLK_6), F7(DU0_DR6), IRQ(6), + F1(LCDD7), F3(PDM0_OUTDATA_7), F7(DU0_DR7), IRQ(7), + F1(LCDD8), F3(PDM1_OUTCLK_8), F7(DU0_DG0), IRQ(8), + F1(LCDD9), F3(PDM1_OUTDATA_9), F7(DU0_DG1), IRQ(9), + F1(LCDD10), F3(FSICCK), F7(DU0_DG2), IRQ(10), /* Port10 */ + F1(LCDD11), F3(FSICISLD), F7(DU0_DG3), IRQ(11), + F1(LCDD12), F3(FSICOMC), F7(DU0_DG4), IRQ(12), + F1(LCDD13), F3(FSICOLR), F4(FSICILR), F7(DU0_DG5), IRQ(13), + F1(LCDD14), F3(FSICOBT), F4(FSICIBT), F7(DU0_DG6), IRQ(14), + F1(LCDD15), F3(FSICOSLD), F7(DU0_DG7), IRQ(15), + F1(LCDD16), F4(TPU1TO1), F7(DU0_DB0), + F1(LCDD17), F4(SF_IRQ_00), F7(DU0_DB1), + F1(LCDD18), F4(SF_IRQ_01), F7(DU0_DB2), + F1(LCDD19), F3(SCIFB3_RTS_19), F7(DU0_DB3), + F1(LCDD20), F3(SCIFB3_CTS_20), F7(DU0_DB4), /* Port20 */ + F1(LCDD21), F3(SCIFB3_TXD_21), F7(DU0_DB5), + F1(LCDD22), F3(SCIFB3_RXD_22), F7(DU0_DB6), + F1(LCDD23), F3(SCIFB3_SCK_23), F7(DU0_DB7), + F1(LCDHSYN), F2(LCDCS), F3(SCIFB1_RTS_24), + F7(DU0_EXHSYNC_N_CSYNC_N_HSYNC_N), + F1(LCDVSYN), F3(SCIFB1_CTS_25), F7(DU0_EXVSYNC_N_VSYNC_N_CSYNC_N), + F1(LCDDCK), F2(LCDWR), F3(SCIFB1_TXD_26), F7(DU0_DOTCLKIN), + F1(LCDDISP), F2(LCDRS), F3(SCIFB1_RXD_27), F7(DU0_DOTCLKOUT), + F1(LCDRD_N), F3(SCIFB1_SCK_28), F7(DU0_DOTCLKOUTB), + F1(LCDLCLK), F4(SF_IRQ_02), F7(DU0_DISP_CSYNC_N_DE), + F1(LCDDON), F4(SF_IRQ_03), F7(DU0_ODDF_N_CLAMP), /* Port30 */ + + F1(SCIFA0_RTS), F5(SIM0_DET), F7(CSCIF0_RTS), /* Port32 */ + F1(SCIFA0_CTS), F5(SIM1_DET), F7(CSCIF0_CTS), + F1(SCIFA0_SCK), F5(SIM0_PWRON), F7(CSCIF0_SCK), + F1(SCIFA1_RTS), F7(CSCIF1_RTS), + F1(SCIFA1_CTS), F7(CSCIF1_CTS), + F1(SCIFA1_SCK), F7(CSCIF1_SCK), + F1(SCIFB0_RTS), F3(TPU0TO1), F4(SCIFB3_RTS_38), F7(CHSCIF0_HRTS), + F1(SCIFB0_CTS), F3(TPU0TO2), F4(SCIFB3_CTS_39), F7(CHSCIF0_HCTS), + F1(SCIFB0_SCK), F3(TPU0TO3), F4(SCIFB3_SCK_40), + F7(CHSCIF0_HSCK), /* Port40 */ + + F1(PDM0_DATA), /* Port64 */ + F1(PDM1_DATA), + F1(HSI_RX_WAKE), F2(SCIFB2_CTS_66), F3(MSIOF3_SYNC), F5(GenIO4), + IRQ(40), + F1(HSI_RX_READY), F2(SCIFB1_TXD_67), F5(GIO_OUT3_67), F7(CHSCIF1_HTX), + F1(HSI_RX_FLAG), F2(SCIFB2_TXD_68), F3(MSIOF3_TXD), F5(GIO_OUT4_68), + F1(HSI_RX_DATA), F2(SCIFB2_RXD_69), F3(MSIOF3_RXD), F5(GIO_OUT5_69), + F1(HSI_TX_FLAG), F2(SCIFB1_RTS_70), F5(GIO_OUT1_70), F6(HSIC_TSTCLK0), + F7(CHSCIF1_HRTS), /* Port70 */ + F1(HSI_TX_DATA), F2(SCIFB1_CTS_71), F5(GIO_OUT2_71), F6(HSIC_TSTCLK1), + F7(CHSCIF1_HCTS), + F1(HSI_TX_WAKE), F2(SCIFB1_RXD_72), F5(GenIO8), F7(CHSCIF1_HRX), + F1(HSI_TX_READY), F2(SCIFB2_RTS_73), F3(MSIOF3_SCK), F5(GIO_OUT0_73), + F1(IRDA_OUT), F1(IRDA_IN), F1(IRDA_FIRSEL), F1(TPU0TO0), + F1(DIGRFEN), F1(GPS_TIMESTAMP), F1(TXP), /* Port80 */ + F1(TXP2), F1(COEX_0), F1(COEX_1), IRQ(19), IRQ(18), /* Port85 */ + + F1(KEYIN0), /* Port96 */ + F1(KEYIN1), F1(KEYIN2), F1(KEYIN3), F1(KEYIN4), /* Port100 */ + F1(KEYIN5), F1(KEYIN6), IRQ(41), F1(KEYIN7), IRQ(42), + F2(KEYOUT0), F2(KEYOUT1), F2(KEYOUT2), F2(KEYOUT3), + F2(KEYOUT4), F2(KEYOUT5), IRQ(43), F2(KEYOUT6), IRQ(44), /* Port110 */ + F2(KEYOUT7), F5(RFANAEN), IRQ(45), + F1(KEYIN8), F2(KEYOUT8), F4(SF_IRQ_04), IRQ(46), + F1(KEYIN9), F2(KEYOUT9), F4(SF_IRQ_05), IRQ(47), + F1(KEYIN10), F2(KEYOUT10), F4(SF_IRQ_06), IRQ(48), + F1(KEYIN11), F2(KEYOUT11), F4(SF_IRQ_07), IRQ(49), + F1(SCIFA0_TXD), F7(CSCIF0_TX), F1(SCIFA0_RXD), F7(CSCIF0_RX), + F1(SCIFA1_TXD), F7(CSCIF1_TX), F1(SCIFA1_RXD), F7(CSCIF1_RX), + F3(SF_PORT_1_120), F4(SCIFB3_RXD_120), F7(DU0_CDE), /* Port120 */ + F3(SF_PORT_0_121), F4(SCIFB3_TXD_121), + F1(SCIFB0_TXD), F7(CHSCIF0_HTX), + F1(SCIFB0_RXD), F7(CHSCIF0_HRX), F3(ISP_STROBE_124), + F1(STP_ISD_0), F2(PDM4_CLK_125), F3(MSIOF2_TXD), F5(SIM0_VOLTSEL0), + F1(TS_SDEN), F2(MSIOF7_SYNC), F3(STP_ISEN_1), + F1(STP_ISEN_0), F2(PDM1_OUTDATA_128), F3(MSIOF2_SYNC), + F5(SIM1_VOLTSEL1), F1(TS_SPSYNC), F2(MSIOF7_RXD), F3(STP_ISSYNC_1), + F1(STP_ISSYNC_0), F2(PDM4_DATA_130), F3(MSIOF2_RXD), + F5(SIM0_VOLTSEL1), /* Port130 */ + F1(STP_OPWM_0), F5(SIM1_PWRON), F1(TS_SCK), F2(MSIOF7_SCK), + F3(STP_ISCLK_1), F1(STP_ISCLK_0), F2(PDM1_OUTCLK_133), F3(MSIOF2_SCK), + F5(SIM1_VOLTSEL0), F1(TS_SDAT), F2(MSIOF7_TXD), F3(STP_ISD_1), + IRQ(20), /* Port160 */ + IRQ(21), IRQ(22), IRQ(23), + F1(MMCD0_0), F1(MMCD0_1), F1(MMCD0_2), F1(MMCD0_3), + F1(MMCD0_4), F1(MMCD0_5), F1(MMCD0_6), /* Port170 */ + F1(MMCD0_7), F1(MMCCMD0), F1(MMCCLK0), F1(MMCRST), + IRQ(24), IRQ(25), IRQ(26), IRQ(27), + F1(A10), F2(MMCD1_7), IRQ(31), /* Port192 */ + F1(A9), F2(MMCD1_6), IRQ(32), + F1(A8), F2(MMCD1_5), IRQ(33), + F1(A7), F2(MMCD1_4), IRQ(34), + F1(A6), F2(MMCD1_3), IRQ(35), + F1(A5), F2(MMCD1_2), IRQ(36), + F1(A4), F2(MMCD1_1), IRQ(37), + F1(A3), F2(MMCD1_0), IRQ(38), + F1(A2), F2(MMCCMD1), IRQ(39), /* Port200 */ + F1(A1), + F1(A0), F2(BS), + F1(CKO), F2(MMCCLK1), + F1(CS0_N), F5(SIM0_GPO1), + F1(CS2_N), F5(SIM0_GPO2), + F1(CS4_N), F2(VIO_VD), F5(SIM1_GPO0), + F1(D15), F5(GIO_OUT15), + F1(D14), F5(GIO_OUT14), + F1(D13), F5(GIO_OUT13), + F1(D12), F5(GIO_OUT12), /* Port210 */ + F1(D11), F5(WGM_TXP2), + F1(D10), F5(WGM_GPS_TIMEM_ASK_RFCLK), + F1(D9), F2(VIO_D9), F5(GIO_OUT9), + F1(D8), F2(VIO_D8), F5(GIO_OUT8), + F1(D7), F2(VIO_D7), F5(GIO_OUT7), + F1(D6), F2(VIO_D6), F5(GIO_OUT6), + F1(D5), F2(VIO_D5), F5(GIO_OUT5_217), + F1(D4), F2(VIO_D4), F5(GIO_OUT4_218), + F1(D3), F2(VIO_D3), F5(GIO_OUT3_219), + F1(D2), F2(VIO_D2), F5(GIO_OUT2_220), /* Port220 */ + F1(D1), F2(VIO_D1), F5(GIO_OUT1_221), + F1(D0), F2(VIO_D0), F5(GIO_OUT0_222), + F1(RDWR_224), F2(VIO_HD), F5(SIM1_GPO2), + F1(RD_N), F1(WAIT_N), F2(VIO_CLK), F5(SIM1_GPO1), + F1(WE0_N), F2(RDWR_227), + F1(WE1_N), F5(SIM0_GPO0), + F1(PWMO), F2(VIO_CKO1_229), + F1(SLIM_CLK), F2(VIO_CKO4_230), /* Port230 */ + F1(SLIM_DATA), F2(VIO_CKO5_231), F2(VIO_CKO2_232), F4(SF_PORT_0_232), + F2(VIO_CKO3_233), F4(SF_PORT_1_233), + F1(FSIACK), F2(PDM3_CLK_234), F3(ISP_IRIS1_234), + F1(FSIAISLD), F2(PDM3_DATA_235), + F1(FSIAOMC), F2(PDM0_OUTCLK_236), F3(ISP_IRIS0_236), + F1(FSIAOLR), F2(FSIAILR), F1(FSIAOBT), F2(FSIAIBT), + F1(FSIAOSLD), F2(PDM0_OUTDATA_239), + F1(FSIBISLD), /* Port240 */ + F1(FSIBOLR), F2(FSIBILR), F1(FSIBOMC), F3(ISP_SHUTTER1_242), + F1(FSIBOBT), F2(FSIBIBT), F1(FSIBOSLD), F2(FSIASPDIF), + F1(FSIBCK), F3(ISP_SHUTTER0_245), + F1(ISP_IRIS1_246), F1(ISP_IRIS0_247), F1(ISP_SHUTTER1_248), + F1(ISP_SHUTTER0_249), F1(ISP_STROBE_250), /* Port250 */ + F1(MSIOF0_SYNC), F1(MSIOF0_RXD), F1(MSIOF0_SCK), F1(MSIOF0_SS2), + F3(VIO_CKO3_259), F1(MSIOF0_TXD), /* Port260 */ + F2(SCIFB1_SCK_261), F7(CHSCIF1_HSCK), F2(SCIFB2_SCK_262), + F1(MSIOF1_SS2), F4(MSIOF5_SS2), F1(MSIOF1_TXD), F4(MSIOF5_TXD), + F1(MSIOF1_RXD), F4(MSIOF5_RXD), F1(MSIOF1_SS1), F4(MSIOF5_SS1), + F1(MSIOF0_SS1), F1(MSIOF1_SCK), F4(MSIOF5_SCK), + F1(MSIOF1_SYNC), F4(MSIOF5_SYNC), + F1(MSIOF2_SS1), F3(VIO_CKO5_270), /* Port270 */ + F1(MSIOF2_SS2), F3(VIO_CKO2_271), F1(MSIOF3_SS2), F3(VIO_CKO1_272), + F1(MSIOF3_SS1), F3(VIO_CKO4_273), F1(MSIOF4_SS2), F4(TPU1TO0), + F1(IC_DP), F1(SIM0_RST), F1(IC_DM), F1(SIM0_BSICOMP), + F1(SIM0_CLK), F1(SIM0_IO), /* Port280 */ + F1(SIM1_IO), F2(PDM2_DATA_281), F1(SIM1_CLK), F2(PDM2_CLK_282), + F1(SIM1_RST), F1(SDHID1_0), F3(STMDATA0_2), + F1(SDHID1_1), F3(STMDATA1_2), IRQ(51), /* Port290 */ + F1(SDHID1_2), F3(STMDATA2_2), F1(SDHID1_3), F3(STMDATA3_2), + F1(SDHICLK1), F3(STMCLK_2), F1(SDHICMD1), F3(STMSIDI_2), + F1(SDHID2_0), F2(MSIOF4_TXD), F3(SCIFB2_TXD_295), F4(MSIOF6_TXD), + F1(SDHID2_1), F4(MSIOF6_SS2), IRQ(52), + F1(SDHID2_2), F2(MSIOF4_RXD), F3(SCIFB2_RXD_297), F4(MSIOF6_RXD), + F1(SDHID2_3), F2(MSIOF4_SYNC), F3(SCIFB2_CTS_298), F4(MSIOF6_SYNC), + F1(SDHICLK2), F2(MSIOF4_SCK), F3(SCIFB2_SCK_299), F4(MSIOF6_SCK), + F1(SDHICMD2), F2(MSIOF4_SS1), F3(SCIFB2_RTS_300), + F4(MSIOF6_SS1), /* Port300 */ + F1(SDHICD0), IRQ(50), F1(SDHID0_0), F3(STMDATA0_1), + F1(SDHID0_1), F3(STMDATA1_1), F1(SDHID0_2), F3(STMDATA2_1), + F1(SDHID0_3), F3(STMDATA3_1), F1(SDHICMD0), F3(STMSIDI_1), + F1(SDHIWP0), F1(SDHICLK0), F3(STMCLK_1), IRQ(16), /* Port320 */ + IRQ(17), IRQ(28), IRQ(29), IRQ(30), IRQ(53), IRQ(54), + IRQ(55), IRQ(56), IRQ(57), + PINMUX_MARK_END, +}; + +static const pinmux_enum_t pinmux_data[] = { + /* specify valid pin states for each pin in GPIO mode */ + + PORT_DATA_IO_PU_PD(0), PORT_DATA_IO_PU_PD(1), + PORT_DATA_IO_PU_PD(2), PORT_DATA_IO_PU_PD(3), + PORT_DATA_IO_PU_PD(4), PORT_DATA_IO_PU_PD(5), + PORT_DATA_IO_PU_PD(6), PORT_DATA_IO_PU_PD(7), + PORT_DATA_IO_PU_PD(8), PORT_DATA_IO_PU_PD(9), + + PORT_DATA_IO_PU_PD(10), PORT_DATA_IO_PU_PD(11), + PORT_DATA_IO_PU_PD(12), PORT_DATA_IO_PU_PD(13), + PORT_DATA_IO_PU_PD(14), PORT_DATA_IO_PU_PD(15), + PORT_DATA_IO_PU_PD(16), PORT_DATA_IO_PU_PD(17), + PORT_DATA_IO_PU_PD(18), PORT_DATA_IO_PU_PD(19), + + PORT_DATA_IO_PU_PD(20), PORT_DATA_IO_PU_PD(21), + PORT_DATA_IO_PU_PD(22), PORT_DATA_IO_PU_PD(23), + PORT_DATA_IO_PU_PD(24), PORT_DATA_IO_PU_PD(25), + PORT_DATA_IO_PU_PD(26), PORT_DATA_IO_PU_PD(27), + PORT_DATA_IO_PU_PD(28), PORT_DATA_IO_PU_PD(29), + + PORT_DATA_IO_PU_PD(30), PORT_DATA_IO_PU_PD(32), + PORT_DATA_IO_PU_PD(33), PORT_DATA_IO_PU_PD(34), + PORT_DATA_IO_PU_PD(35), PORT_DATA_IO_PU_PD(36), + PORT_DATA_IO_PU_PD(37), PORT_DATA_IO_PU_PD(38), + PORT_DATA_IO_PU_PD(39), PORT_DATA_IO_PU_PD(40), + + PORT_DATA_IO_PU_PD(64), PORT_DATA_IO_PU_PD(65), + PORT_DATA_IO_PU_PD(66), PORT_DATA_IO_PU_PD(67), + PORT_DATA_IO_PU_PD(68), PORT_DATA_IO_PU_PD(69), + + PORT_DATA_IO_PU_PD(70), PORT_DATA_IO_PU_PD(71), + PORT_DATA_IO_PU_PD(72), PORT_DATA_IO_PU_PD(73), + PORT_DATA_O(74), PORT_DATA_IO_PU_PD(75), + PORT_DATA_IO_PU_PD(76), PORT_DATA_IO_PU_PD(77), + PORT_DATA_IO_PU_PD(78), PORT_DATA_IO_PU_PD(79), + + PORT_DATA_IO_PU_PD(80), PORT_DATA_IO_PU_PD(81), + PORT_DATA_IO_PU_PD(82), PORT_DATA_IO_PU_PD(83), + PORT_DATA_IO_PU_PD(84), PORT_DATA_IO_PU_PD(85), + + PORT_DATA_IO_PU_PD(96), PORT_DATA_IO_PU_PD(97), + PORT_DATA_IO_PU_PD(98), PORT_DATA_IO_PU_PD(99), + + PORT_DATA_IO_PU_PD(100), PORT_DATA_IO_PU_PD(101), + PORT_DATA_IO_PU_PD(102), PORT_DATA_IO_PU_PD(103), + PORT_DATA_IO_PU_PD(104), PORT_DATA_IO_PU_PD(105), + PORT_DATA_IO_PU_PD(106), PORT_DATA_IO_PU_PD(107), + PORT_DATA_IO_PU_PD(108), PORT_DATA_IO_PU_PD(109), + + PORT_DATA_IO_PU_PD(110), PORT_DATA_IO_PU_PD(111), + PORT_DATA_IO_PU_PD(112), PORT_DATA_IO_PU_PD(113), + PORT_DATA_IO_PU_PD(114), PORT_DATA_IO_PU_PD(115), + PORT_DATA_IO_PU_PD(116), PORT_DATA_IO_PU_PD(117), + PORT_DATA_IO_PU_PD(118), PORT_DATA_IO_PU_PD(119), + + PORT_DATA_IO_PU_PD(120), PORT_DATA_IO_PU_PD(121), + PORT_DATA_IO_PU_PD(122), PORT_DATA_IO_PU_PD(123), + PORT_DATA_IO_PU_PD(124), PORT_DATA_IO_PU_PD(125), + PORT_DATA_IO_PU_PD(126), + PORT_DATA_IO_PU_PD(128), PORT_DATA_IO_PU_PD(129), + + PORT_DATA_IO_PU_PD(130), PORT_DATA_IO_PU_PD(131), + PORT_DATA_IO_PU_PD(132), PORT_DATA_IO_PU_PD(133), + PORT_DATA_IO_PU_PD(134), + + PORT_DATA_IO_PU_PD(160), PORT_DATA_IO_PU_PD(161), + PORT_DATA_IO_PU_PD(162), PORT_DATA_IO_PU_PD(163), + PORT_DATA_IO_PU_PD(164), PORT_DATA_IO_PU_PD(165), + PORT_DATA_IO_PU_PD(166), PORT_DATA_IO_PU_PD(167), + PORT_DATA_IO_PU_PD(168), PORT_DATA_IO_PU_PD(169), + + PORT_DATA_IO_PU_PD(170), PORT_DATA_IO_PU_PD(171), + PORT_DATA_IO_PU_PD(172), PORT_DATA_IO_PU_PD(173), + PORT_DATA_IO_PU_PD(174), PORT_DATA_IO_PU_PD(175), + PORT_DATA_IO_PU_PD(176), PORT_DATA_IO_PU_PD(177), + PORT_DATA_IO_PU_PD(178), + + PORT_DATA_IO_PU_PD(192), PORT_DATA_IO_PU_PD(193), + PORT_DATA_IO_PU_PD(194), PORT_DATA_IO_PU_PD(195), + PORT_DATA_IO_PU_PD(196), PORT_DATA_IO_PU_PD(197), + PORT_DATA_IO_PU_PD(198), PORT_DATA_IO_PU_PD(199), + + PORT_DATA_IO_PU_PD(200), PORT_DATA_IO_PU_PD(201), + PORT_DATA_IO_PU_PD(202), PORT_DATA_IO_PU_PD(203), + PORT_DATA_IO_PU_PD(204), PORT_DATA_IO_PU_PD(205), + PORT_DATA_IO_PU_PD(206), PORT_DATA_IO_PU_PD(207), + PORT_DATA_IO_PU_PD(208), PORT_DATA_IO_PU_PD(209), + + PORT_DATA_IO_PU_PD(210), PORT_DATA_IO_PU_PD(211), + PORT_DATA_IO_PU_PD(212), PORT_DATA_IO_PU_PD(213), + PORT_DATA_IO_PU_PD(214), PORT_DATA_IO_PU_PD(215), + PORT_DATA_IO_PU_PD(216), PORT_DATA_IO_PU_PD(217), + PORT_DATA_IO_PU_PD(218), PORT_DATA_IO_PU_PD(219), + + PORT_DATA_IO_PU_PD(220), PORT_DATA_IO_PU_PD(221), + PORT_DATA_IO_PU_PD(222), PORT_DATA_IO_PU_PD(224), + PORT_DATA_IO_PU_PD(225), PORT_DATA_IO_PU_PD(226), + PORT_DATA_IO_PU_PD(227), PORT_DATA_IO_PU_PD(228), + PORT_DATA_IO_PU_PD(229), + + PORT_DATA_IO_PU_PD(230), PORT_DATA_IO_PU_PD(231), + PORT_DATA_IO_PU_PD(232), PORT_DATA_IO_PU_PD(233), + PORT_DATA_IO_PU_PD(234), PORT_DATA_IO_PU_PD(235), + PORT_DATA_IO_PU_PD(236), PORT_DATA_IO_PU_PD(237), + PORT_DATA_IO_PU_PD(238), PORT_DATA_IO_PU_PD(239), + + PORT_DATA_IO_PU_PD(240), PORT_DATA_IO_PU_PD(241), + PORT_DATA_IO_PU_PD(242), PORT_DATA_IO_PU_PD(243), + PORT_DATA_IO_PU_PD(244), PORT_DATA_IO_PU_PD(245), + PORT_DATA_IO_PU_PD(246), PORT_DATA_IO_PU_PD(247), + PORT_DATA_IO_PU_PD(248), PORT_DATA_IO_PU_PD(249), + + PORT_DATA_IO_PU_PD(250), + PORT_DATA_IO_PU_PD(256), PORT_DATA_IO_PU_PD(257), + PORT_DATA_IO_PU_PD(258), PORT_DATA_IO_PU_PD(259), + + PORT_DATA_IO_PU_PD(260), PORT_DATA_IO_PU_PD(261), + PORT_DATA_IO_PU_PD(262), PORT_DATA_IO_PU_PD(263), + PORT_DATA_IO_PU_PD(264), PORT_DATA_IO_PU_PD(265), + PORT_DATA_IO_PU_PD(266), PORT_DATA_IO_PU_PD(267), + PORT_DATA_IO_PU_PD(268), PORT_DATA_IO_PU_PD(269), + + PORT_DATA_IO_PU_PD(270), PORT_DATA_IO_PU_PD(271), + PORT_DATA_IO_PU_PD(272), PORT_DATA_IO_PU_PD(273), + PORT_DATA_IO_PU_PD(274), PORT_DATA_IO_PU_PD(275), + PORT_DATA_IO_PU_PD(276), PORT_DATA_IO_PU_PD(277), + PORT_DATA_IO_PU_PD(278), PORT_DATA_IO_PU_PD(279), + + PORT_DATA_IO_PU_PD(280), PORT_DATA_IO_PU_PD(281), + PORT_DATA_IO_PU_PD(282), PORT_DATA_IO_PU_PD(283), + PORT_DATA_O(288), PORT_DATA_IO_PU_PD(289), + + PORT_DATA_IO_PU_PD(290), PORT_DATA_IO_PU_PD(291), + PORT_DATA_IO_PU_PD(292), PORT_DATA_IO_PU_PD(293), + PORT_DATA_IO_PU_PD(294), PORT_DATA_IO_PU_PD(295), + PORT_DATA_IO_PU_PD(296), PORT_DATA_IO_PU_PD(297), + PORT_DATA_IO_PU_PD(298), PORT_DATA_IO_PU_PD(299), + + PORT_DATA_IO_PU_PD(300), PORT_DATA_IO_PU_PD(301), + PORT_DATA_IO_PU_PD(302), PORT_DATA_IO_PU_PD(303), + PORT_DATA_IO_PU_PD(304), PORT_DATA_IO_PU_PD(305), + PORT_DATA_IO_PU_PD(306), PORT_DATA_IO_PU_PD(307), + PORT_DATA_IO_PU_PD(308), + + PORT_DATA_IO_PU_PD(320), PORT_DATA_IO_PU_PD(321), + PORT_DATA_IO_PU_PD(322), PORT_DATA_IO_PU_PD(323), + PORT_DATA_IO_PU_PD(324), PORT_DATA_IO_PU_PD(325), + PORT_DATA_IO_PU_PD(326), PORT_DATA_IO_PU_PD(327), + PORT_DATA_IO_PU_PD(328), PORT_DATA_IO_PU_PD(329), + + /* Port0 */ + PINMUX_DATA(LCDD0_MARK, PORT0_FN1), + PINMUX_DATA(PDM2_CLK_0_MARK, PORT0_FN3), + PINMUX_DATA(DU0_DR0_MARK, PORT0_FN7), + PINMUX_DATA(IRQ0_MARK, PORT0_FN0), + + /* Port1 */ + PINMUX_DATA(LCDD1_MARK, PORT1_FN1), + PINMUX_DATA(PDM2_DATA_1_MARK, PORT1_FN3, MSEL3CR_12_0), + PINMUX_DATA(DU0_DR19_MARK, PORT1_FN7), + PINMUX_DATA(IRQ1_MARK, PORT1_FN0), + + /* Port2 */ + PINMUX_DATA(LCDD2_MARK, PORT2_FN1), + PINMUX_DATA(PDM3_CLK_2_MARK, PORT2_FN3), + PINMUX_DATA(DU0_DR2_MARK, PORT2_FN7), + PINMUX_DATA(IRQ2_MARK, PORT2_FN0), + + /* Port3 */ + PINMUX_DATA(LCDD3_MARK, PORT3_FN1), + PINMUX_DATA(PDM3_DATA_3_MARK, PORT3_FN3, MSEL3CR_12_0), + PINMUX_DATA(DU0_DR3_MARK, PORT3_FN7), + PINMUX_DATA(IRQ3_MARK, PORT3_FN0), + + /* Port4 */ + PINMUX_DATA(LCDD4_MARK, PORT4_FN1), + PINMUX_DATA(PDM4_CLK_4_MARK, PORT4_FN3), + PINMUX_DATA(DU0_DR4_MARK, PORT4_FN7), + PINMUX_DATA(IRQ4_MARK, PORT4_FN0), + + /* Port5 */ + PINMUX_DATA(LCDD5_MARK, PORT5_FN1), + PINMUX_DATA(PDM4_DATA_5_MARK, PORT5_FN3, MSEL3CR_12_0), + PINMUX_DATA(DU0_DR5_MARK, PORT5_FN7), + PINMUX_DATA(IRQ5_MARK, PORT5_FN0), + + /* Port6 */ + PINMUX_DATA(LCDD6_MARK, PORT6_FN1), + PINMUX_DATA(PDM0_OUTCLK_6_MARK, PORT6_FN3), + PINMUX_DATA(DU0_DR6_MARK, PORT6_FN7), + PINMUX_DATA(IRQ6_MARK, PORT6_FN0), + + /* Port7 */ + PINMUX_DATA(LCDD7_MARK, PORT7_FN1), + PINMUX_DATA(PDM0_OUTDATA_7_MARK, PORT7_FN3), + PINMUX_DATA(DU0_DR7_MARK, PORT7_FN7), + PINMUX_DATA(IRQ7_MARK, PORT7_FN0), + + /* Port8 */ + PINMUX_DATA(LCDD8_MARK, PORT8_FN1), + PINMUX_DATA(PDM1_OUTCLK_8_MARK, PORT8_FN3), + PINMUX_DATA(DU0_DG0_MARK, PORT8_FN7), + PINMUX_DATA(IRQ8_MARK, PORT8_FN0), + + /* Port9 */ + PINMUX_DATA(LCDD9_MARK, PORT9_FN1), + PINMUX_DATA(PDM1_OUTDATA_9_MARK, PORT9_FN3), + PINMUX_DATA(DU0_DG1_MARK, PORT9_FN7), + PINMUX_DATA(IRQ9_MARK, PORT9_FN0), + + /* Port10 */ + PINMUX_DATA(LCDD10_MARK, PORT10_FN1), + PINMUX_DATA(FSICCK_MARK, PORT10_FN3), + PINMUX_DATA(DU0_DG2_MARK, PORT10_FN7), + PINMUX_DATA(IRQ10_MARK, PORT10_FN0), + + /* Port11 */ + PINMUX_DATA(LCDD11_MARK, PORT11_FN1), + PINMUX_DATA(FSICISLD_MARK, PORT11_FN3), + PINMUX_DATA(DU0_DG3_MARK, PORT11_FN7), + PINMUX_DATA(IRQ11_MARK, PORT11_FN0), + + /* Port12 */ + PINMUX_DATA(LCDD12_MARK, PORT12_FN1), + PINMUX_DATA(FSICOMC_MARK, PORT12_FN3), + PINMUX_DATA(DU0_DG4_MARK, PORT12_FN7), + PINMUX_DATA(IRQ12_MARK, PORT12_FN0), + + /* Port13 */ + PINMUX_DATA(LCDD13_MARK, PORT13_FN1), + PINMUX_DATA(FSICOLR_MARK, PORT13_FN3), + PINMUX_DATA(FSICILR_MARK, PORT13_FN4), + PINMUX_DATA(DU0_DG5_MARK, PORT13_FN7), + PINMUX_DATA(IRQ13_MARK, PORT13_FN0), + + /* Port14 */ + PINMUX_DATA(LCDD14_MARK, PORT14_FN1), + PINMUX_DATA(FSICOBT_MARK, PORT14_FN3), + PINMUX_DATA(FSICIBT_MARK, PORT14_FN4), + PINMUX_DATA(DU0_DG6_MARK, PORT14_FN7), + PINMUX_DATA(IRQ14_MARK, PORT14_FN0), + + /* Port15 */ + PINMUX_DATA(LCDD15_MARK, PORT15_FN1), + PINMUX_DATA(FSICOSLD_MARK, PORT15_FN3), + PINMUX_DATA(DU0_DG7_MARK, PORT15_FN7), + PINMUX_DATA(IRQ15_MARK, PORT15_FN0), + + /* Port16 */ + PINMUX_DATA(LCDD16_MARK, PORT16_FN1), + PINMUX_DATA(TPU1TO1_MARK, PORT16_FN4), + PINMUX_DATA(DU0_DB0_MARK, PORT16_FN7), + + /* Port17 */ + PINMUX_DATA(LCDD17_MARK, PORT17_FN1), + PINMUX_DATA(SF_IRQ_00_MARK, PORT17_FN4), + PINMUX_DATA(DU0_DB1_MARK, PORT17_FN7), + + /* Port18 */ + PINMUX_DATA(LCDD18_MARK, PORT18_FN1), + PINMUX_DATA(SF_IRQ_01_MARK, PORT18_FN4), + PINMUX_DATA(DU0_DB2_MARK, PORT18_FN7), + + /* Port19 */ + PINMUX_DATA(LCDD19_MARK, PORT19_FN1), + PINMUX_DATA(SCIFB3_RTS_19_MARK, PORT19_FN3), + PINMUX_DATA(DU0_DB3_MARK, PORT19_FN7), + + /* Port20 */ + PINMUX_DATA(LCDD20_MARK, PORT20_FN1), + PINMUX_DATA(SCIFB3_CTS_20_MARK, PORT20_FN3, MSEL3CR_09_0), + PINMUX_DATA(DU0_DB4_MARK, PORT20_FN7), + + /* Port21 */ + PINMUX_DATA(LCDD21_MARK, PORT21_FN1), + PINMUX_DATA(SCIFB3_TXD_21_MARK, PORT21_FN3, MSEL3CR_09_0), + PINMUX_DATA(DU0_DB5_MARK, PORT21_FN7), + + /* Port22 */ + PINMUX_DATA(LCDD22_MARK, PORT22_FN1), + PINMUX_DATA(SCIFB3_RXD_22_MARK, PORT22_FN3, MSEL3CR_09_0), + PINMUX_DATA(DU0_DB6_MARK, PORT22_FN7), + + /* Port23 */ + PINMUX_DATA(LCDD23_MARK, PORT23_FN1), + PINMUX_DATA(SCIFB3_SCK_23_MARK, PORT23_FN3), + PINMUX_DATA(DU0_DB7_MARK, PORT23_FN7), + + /* Port24 */ + PINMUX_DATA(LCDHSYN_MARK, PORT24_FN1), + PINMUX_DATA(LCDCS_MARK, PORT24_FN2), + PINMUX_DATA(SCIFB1_RTS_24_MARK, PORT24_FN3), + PINMUX_DATA(DU0_EXHSYNC_N_CSYNC_N_HSYNC_N_MARK, PORT24_FN7), + + /* Port25 */ + PINMUX_DATA(LCDVSYN_MARK, PORT25_FN1), + PINMUX_DATA(SCIFB1_CTS_25_MARK, PORT25_FN3, MSEL3CR_11_0), + PINMUX_DATA(DU0_EXVSYNC_N_VSYNC_N_CSYNC_N_MARK, PORT25_FN7), + + /* Port26 */ + PINMUX_DATA(LCDDCK_MARK, PORT26_FN1), + PINMUX_DATA(LCDWR_MARK, PORT26_FN2), + PINMUX_DATA(SCIFB1_TXD_26_MARK, PORT26_FN3, MSEL3CR_11_0), + PINMUX_DATA(DU0_DOTCLKIN_MARK, PORT26_FN7), + + /* Port27 */ + PINMUX_DATA(LCDDISP_MARK, PORT27_FN1), + PINMUX_DATA(LCDRS_MARK, PORT27_FN2), + PINMUX_DATA(SCIFB1_RXD_27_MARK, PORT27_FN3, MSEL3CR_11_0), + PINMUX_DATA(DU0_DOTCLKOUT_MARK, PORT27_FN7), + + /* Port28 */ + PINMUX_DATA(LCDRD_N_MARK, PORT28_FN1), + PINMUX_DATA(SCIFB1_SCK_28_MARK, PORT28_FN3), + PINMUX_DATA(DU0_DOTCLKOUTB_MARK, PORT28_FN7), + + /* Port29 */ + PINMUX_DATA(LCDLCLK_MARK, PORT29_FN1), + PINMUX_DATA(SF_IRQ_02_MARK, PORT29_FN4), + PINMUX_DATA(DU0_DISP_CSYNC_N_DE_MARK, PORT29_FN7), + + /* Port30 */ + PINMUX_DATA(LCDDON_MARK, PORT30_FN1), + PINMUX_DATA(SF_IRQ_03_MARK, PORT30_FN4), + PINMUX_DATA(DU0_ODDF_N_CLAMP_MARK, PORT30_FN7), + + /* Port32 */ + PINMUX_DATA(SCIFA0_RTS_MARK, PORT32_FN1), + PINMUX_DATA(SIM0_DET_MARK, PORT32_FN5), + PINMUX_DATA(CSCIF0_RTS_MARK, PORT32_FN7), + + /* Port33 */ + PINMUX_DATA(SCIFA0_CTS_MARK, PORT33_FN1), + PINMUX_DATA(SIM1_DET_MARK, PORT33_FN5), + PINMUX_DATA(CSCIF0_CTS_MARK, PORT33_FN7), + + /* Port34 */ + PINMUX_DATA(SCIFA0_SCK_MARK, PORT34_FN1), + PINMUX_DATA(SIM0_PWRON_MARK, PORT34_FN5), + PINMUX_DATA(CSCIF0_SCK_MARK, PORT34_FN7), + + /* Port35 */ + PINMUX_DATA(SCIFA1_RTS_MARK, PORT35_FN1), + PINMUX_DATA(CSCIF1_RTS_MARK, PORT35_FN7), + + /* Port36 */ + PINMUX_DATA(SCIFA1_CTS_MARK, PORT36_FN1), + PINMUX_DATA(CSCIF1_CTS_MARK, PORT36_FN7), + + /* Port37 */ + PINMUX_DATA(SCIFA1_SCK_MARK, PORT37_FN1), + PINMUX_DATA(CSCIF1_SCK_MARK, PORT37_FN7), + + /* Port38 */ + PINMUX_DATA(SCIFB0_RTS_MARK, PORT38_FN1), + PINMUX_DATA(TPU0TO1_MARK, PORT38_FN3), + PINMUX_DATA(SCIFB3_RTS_38_MARK, PORT38_FN4), + PINMUX_DATA(CHSCIF0_HRTS_MARK, PORT38_FN7), + + /* Port39 */ + PINMUX_DATA(SCIFB0_CTS_MARK, PORT39_FN1), + PINMUX_DATA(TPU0TO2_MARK, PORT39_FN3), + PINMUX_DATA(SCIFB3_CTS_39_MARK, PORT39_FN4, MSEL3CR_09_1), + PINMUX_DATA(CHSCIF0_HCTS_MARK, PORT39_FN7), + + /* Port40 */ + PINMUX_DATA(SCIFB0_SCK_MARK, PORT40_FN1), + PINMUX_DATA(TPU0TO3_MARK, PORT40_FN3), + PINMUX_DATA(SCIFB3_SCK_40_MARK, PORT40_FN4), + PINMUX_DATA(CHSCIF0_HSCK_MARK, PORT40_FN7), + + /* Port64 */ + PINMUX_DATA(PDM0_DATA_MARK, PORT64_FN1), + + /* Port65 */ + PINMUX_DATA(PDM1_DATA_MARK, PORT65_FN1), + + /* Port66 */ + PINMUX_DATA(HSI_RX_WAKE_MARK, PORT66_FN1), + PINMUX_DATA(SCIFB2_CTS_66_MARK, PORT66_FN2, MSEL3CR_10_0), + PINMUX_DATA(MSIOF3_SYNC_MARK, PORT66_FN3), + PINMUX_DATA(GenIO4_MARK, PORT66_FN5), + PINMUX_DATA(IRQ40_MARK, PORT66_FN0), + + /* Port67 */ + PINMUX_DATA(HSI_RX_READY_MARK, PORT67_FN1), + PINMUX_DATA(SCIFB1_TXD_67_MARK, PORT67_FN2, MSEL3CR_11_1), + PINMUX_DATA(GIO_OUT3_67_MARK, PORT67_FN5), + PINMUX_DATA(CHSCIF1_HTX_MARK, PORT67_FN7), + + /* Port68 */ + PINMUX_DATA(HSI_RX_FLAG_MARK, PORT68_FN1), + PINMUX_DATA(SCIFB2_TXD_68_MARK, PORT68_FN2, MSEL3CR_10_0), + PINMUX_DATA(MSIOF3_TXD_MARK, PORT68_FN3), + PINMUX_DATA(GIO_OUT4_68_MARK, PORT68_FN5), + + /* Port69 */ + PINMUX_DATA(HSI_RX_DATA_MARK, PORT69_FN1), + PINMUX_DATA(SCIFB2_RXD_69_MARK, PORT69_FN2, MSEL3CR_10_0), + PINMUX_DATA(MSIOF3_RXD_MARK, PORT69_FN3), + PINMUX_DATA(GIO_OUT5_69_MARK, PORT69_FN5), + + /* Port70 */ + PINMUX_DATA(HSI_TX_FLAG_MARK, PORT70_FN1), + PINMUX_DATA(SCIFB1_RTS_70_MARK, PORT70_FN2), + PINMUX_DATA(GIO_OUT1_70_MARK, PORT70_FN5), + PINMUX_DATA(HSIC_TSTCLK0_MARK, PORT70_FN6), + PINMUX_DATA(CHSCIF1_HRTS_MARK, PORT70_FN7), + + /* Port71 */ + PINMUX_DATA(HSI_TX_DATA_MARK, PORT71_FN1), + PINMUX_DATA(SCIFB1_CTS_71_MARK, PORT71_FN2, MSEL3CR_11_1), + PINMUX_DATA(GIO_OUT2_71_MARK, PORT71_FN5), + PINMUX_DATA(HSIC_TSTCLK1_MARK, PORT71_FN6), + PINMUX_DATA(CHSCIF1_HCTS_MARK, PORT71_FN7), + + /* Port72 */ + PINMUX_DATA(HSI_TX_WAKE_MARK, PORT72_FN1), + PINMUX_DATA(SCIFB1_RXD_72_MARK, PORT72_FN2, MSEL3CR_11_1), + PINMUX_DATA(GenIO8_MARK, PORT72_FN5), + PINMUX_DATA(CHSCIF1_HRX_MARK, PORT72_FN7), + + /* Port73 */ + PINMUX_DATA(HSI_TX_READY_MARK, PORT73_FN1), + PINMUX_DATA(SCIFB2_RTS_73_MARK, PORT73_FN2), + PINMUX_DATA(MSIOF3_SCK_MARK, PORT73_FN3), + PINMUX_DATA(GIO_OUT0_73_MARK, PORT73_FN5), + + /* Port74 - Port85 */ + PINMUX_DATA(IRDA_OUT_MARK, PORT74_FN1), + PINMUX_DATA(IRDA_IN_MARK, PORT75_FN1), + PINMUX_DATA(IRDA_FIRSEL_MARK, PORT76_FN1), + PINMUX_DATA(TPU0TO0_MARK, PORT77_FN1), + PINMUX_DATA(DIGRFEN_MARK, PORT78_FN1), + PINMUX_DATA(GPS_TIMESTAMP_MARK, PORT79_FN1), + PINMUX_DATA(TXP_MARK, PORT80_FN1), + PINMUX_DATA(TXP2_MARK, PORT81_FN1), + PINMUX_DATA(COEX_0_MARK, PORT82_FN1), + PINMUX_DATA(COEX_1_MARK, PORT83_FN1), + PINMUX_DATA(IRQ19_MARK, PORT84_FN0), + PINMUX_DATA(IRQ18_MARK, PORT85_FN0), + + /* Port96 - Port101 */ + PINMUX_DATA(KEYIN0_MARK, PORT96_FN1), + PINMUX_DATA(KEYIN1_MARK, PORT97_FN1), + PINMUX_DATA(KEYIN2_MARK, PORT98_FN1), + PINMUX_DATA(KEYIN3_MARK, PORT99_FN1), + PINMUX_DATA(KEYIN4_MARK, PORT100_FN1), + PINMUX_DATA(KEYIN5_MARK, PORT101_FN1), + + /* Port102 */ + PINMUX_DATA(KEYIN6_MARK, PORT102_FN1), + PINMUX_DATA(IRQ41_MARK, PORT102_FN0), + + /* Port103 */ + PINMUX_DATA(KEYIN7_MARK, PORT103_FN1), + PINMUX_DATA(IRQ42_MARK, PORT103_FN0), + + /* Port104 - Port108 */ + PINMUX_DATA(KEYOUT0_MARK, PORT104_FN2), + PINMUX_DATA(KEYOUT1_MARK, PORT105_FN2), + PINMUX_DATA(KEYOUT2_MARK, PORT106_FN2), + PINMUX_DATA(KEYOUT3_MARK, PORT107_FN2), + PINMUX_DATA(KEYOUT4_MARK, PORT108_FN2), + + /* Port109 */ + PINMUX_DATA(KEYOUT5_MARK, PORT109_FN2), + PINMUX_DATA(IRQ43_MARK, PORT109_FN0), + + /* Port110 */ + PINMUX_DATA(KEYOUT6_MARK, PORT110_FN2), + PINMUX_DATA(IRQ44_MARK, PORT110_FN0), + + /* Port111 */ + PINMUX_DATA(KEYOUT7_MARK, PORT111_FN2), + PINMUX_DATA(RFANAEN_MARK, PORT111_FN5), + PINMUX_DATA(IRQ45_MARK, PORT111_FN0), + + /* Port112 */ + PINMUX_DATA(KEYIN8_MARK, PORT112_FN1), + PINMUX_DATA(KEYOUT8_MARK, PORT112_FN2), + PINMUX_DATA(SF_IRQ_04_MARK, PORT112_FN4), + PINMUX_DATA(IRQ46_MARK, PORT112_FN0), + + /* Port113 */ + PINMUX_DATA(KEYIN9_MARK, PORT113_FN1), + PINMUX_DATA(KEYOUT9_MARK, PORT113_FN2), + PINMUX_DATA(SF_IRQ_05_MARK, PORT113_FN4), + PINMUX_DATA(IRQ47_MARK, PORT113_FN0), + + /* Port114 */ + PINMUX_DATA(KEYIN10_MARK, PORT114_FN1), + PINMUX_DATA(KEYOUT10_MARK, PORT114_FN2), + PINMUX_DATA(SF_IRQ_06_MARK, PORT114_FN4), + PINMUX_DATA(IRQ48_MARK, PORT114_FN0), + + /* Port115 */ + PINMUX_DATA(KEYIN11_MARK, PORT115_FN1), + PINMUX_DATA(KEYOUT11_MARK, PORT115_FN2), + PINMUX_DATA(SF_IRQ_07_MARK, PORT115_FN4), + PINMUX_DATA(IRQ49_MARK, PORT115_FN0), + + /* Port116 */ + PINMUX_DATA(SCIFA0_TXD_MARK, PORT116_FN1), + PINMUX_DATA(CSCIF0_TX_MARK, PORT116_FN7), + + /* Port117 */ + PINMUX_DATA(SCIFA0_RXD_MARK, PORT117_FN1), + PINMUX_DATA(CSCIF0_RX_MARK, PORT117_FN7), + + /* Port118 */ + PINMUX_DATA(SCIFA1_TXD_MARK, PORT118_FN1), + PINMUX_DATA(CSCIF1_TX_MARK, PORT118_FN7), + + /* Port119 */ + PINMUX_DATA(SCIFA1_RXD_MARK, PORT119_FN1), + PINMUX_DATA(CSCIF1_RX_MARK, PORT119_FN7), + + /* Port120 */ + PINMUX_DATA(SF_PORT_1_120_MARK, PORT120_FN3), + PINMUX_DATA(SCIFB3_RXD_120_MARK, PORT120_FN4, MSEL3CR_09_1), + PINMUX_DATA(DU0_CDE_MARK, PORT120_FN7), + + /* Port121 */ + PINMUX_DATA(SF_PORT_0_121_MARK, PORT121_FN3), + PINMUX_DATA(SCIFB3_TXD_121_MARK, PORT121_FN4, MSEL3CR_09_1), + + /* Port122 */ + PINMUX_DATA(SCIFB0_TXD_MARK, PORT122_FN1), + PINMUX_DATA(CHSCIF0_HTX_MARK, PORT122_FN7), + + /* Port123 */ + PINMUX_DATA(SCIFB0_RXD_MARK, PORT123_FN1), + PINMUX_DATA(CHSCIF0_HRX_MARK, PORT123_FN7), + + /* Port124 */ + PINMUX_DATA(ISP_STROBE_124_MARK, PORT124_FN3), + + /* Port125 */ + PINMUX_DATA(STP_ISD_0_MARK, PORT125_FN1), + PINMUX_DATA(PDM4_CLK_125_MARK, PORT125_FN2), + PINMUX_DATA(MSIOF2_TXD_MARK, PORT125_FN3), + PINMUX_DATA(SIM0_VOLTSEL0_MARK, PORT125_FN5), + + /* Port126 */ + PINMUX_DATA(TS_SDEN_MARK, PORT126_FN1), + PINMUX_DATA(MSIOF7_SYNC_MARK, PORT126_FN2), + PINMUX_DATA(STP_ISEN_1_MARK, PORT126_FN3), + + /* Port128 */ + PINMUX_DATA(STP_ISEN_0_MARK, PORT128_FN1), + PINMUX_DATA(PDM1_OUTDATA_128_MARK, PORT128_FN2), + PINMUX_DATA(MSIOF2_SYNC_MARK, PORT128_FN3), + PINMUX_DATA(SIM1_VOLTSEL1_MARK, PORT128_FN5), + + /* Port129 */ + PINMUX_DATA(TS_SPSYNC_MARK, PORT129_FN1), + PINMUX_DATA(MSIOF7_RXD_MARK, PORT129_FN2), + PINMUX_DATA(STP_ISSYNC_1_MARK, PORT129_FN3), + + /* Port130 */ + PINMUX_DATA(STP_ISSYNC_0_MARK, PORT130_FN1), + PINMUX_DATA(PDM4_DATA_130_MARK, PORT130_FN2, MSEL3CR_12_1), + PINMUX_DATA(MSIOF2_RXD_MARK, PORT130_FN3), + PINMUX_DATA(SIM0_VOLTSEL1_MARK, PORT130_FN5), + + /* Port131 */ + PINMUX_DATA(STP_OPWM_0_MARK, PORT131_FN1), + PINMUX_DATA(SIM1_PWRON_MARK, PORT131_FN5), + + /* Port132 */ + PINMUX_DATA(TS_SCK_MARK, PORT132_FN1), + PINMUX_DATA(MSIOF7_SCK_MARK, PORT132_FN2), + PINMUX_DATA(STP_ISCLK_1_MARK, PORT132_FN3), + + /* Port133 */ + PINMUX_DATA(STP_ISCLK_0_MARK, PORT133_FN1), + PINMUX_DATA(PDM1_OUTCLK_133_MARK, PORT133_FN2), + PINMUX_DATA(MSIOF2_SCK_MARK, PORT133_FN3), + PINMUX_DATA(SIM1_VOLTSEL0_MARK, PORT133_FN5), + + /* Port134 */ + PINMUX_DATA(TS_SDAT_MARK, PORT134_FN1), + PINMUX_DATA(MSIOF7_TXD_MARK, PORT134_FN2), + PINMUX_DATA(STP_ISD_1_MARK, PORT134_FN3), + + /* Port160 - Port178 */ + PINMUX_DATA(IRQ20_MARK, PORT160_FN0), + PINMUX_DATA(IRQ21_MARK, PORT161_FN0), + PINMUX_DATA(IRQ22_MARK, PORT162_FN0), + PINMUX_DATA(IRQ23_MARK, PORT163_FN0), + PINMUX_DATA(MMCD0_0_MARK, PORT164_FN1), + PINMUX_DATA(MMCD0_1_MARK, PORT165_FN1), + PINMUX_DATA(MMCD0_2_MARK, PORT166_FN1), + PINMUX_DATA(MMCD0_3_MARK, PORT167_FN1), + PINMUX_DATA(MMCD0_4_MARK, PORT168_FN1), + PINMUX_DATA(MMCD0_5_MARK, PORT169_FN1), + PINMUX_DATA(MMCD0_6_MARK, PORT170_FN1), + PINMUX_DATA(MMCD0_7_MARK, PORT171_FN1), + PINMUX_DATA(MMCCMD0_MARK, PORT172_FN1), + PINMUX_DATA(MMCCLK0_MARK, PORT173_FN1), + PINMUX_DATA(MMCRST_MARK, PORT174_FN1), + PINMUX_DATA(IRQ24_MARK, PORT175_FN0), + PINMUX_DATA(IRQ25_MARK, PORT176_FN0), + PINMUX_DATA(IRQ26_MARK, PORT177_FN0), + PINMUX_DATA(IRQ27_MARK, PORT178_FN0), + + /* Port192 - Port200 FN1 */ + PINMUX_DATA(A10_MARK, PORT192_FN1), + PINMUX_DATA(A9_MARK, PORT193_FN1), + PINMUX_DATA(A8_MARK, PORT194_FN1), + PINMUX_DATA(A7_MARK, PORT195_FN1), + PINMUX_DATA(A6_MARK, PORT196_FN1), + PINMUX_DATA(A5_MARK, PORT197_FN1), + PINMUX_DATA(A4_MARK, PORT198_FN1), + PINMUX_DATA(A3_MARK, PORT199_FN1), + PINMUX_DATA(A2_MARK, PORT200_FN1), + + /* Port192 - Port200 FN2 */ + PINMUX_DATA(MMCD1_7_MARK, PORT192_FN2), + PINMUX_DATA(MMCD1_6_MARK, PORT193_FN2), + PINMUX_DATA(MMCD1_5_MARK, PORT194_FN2), + PINMUX_DATA(MMCD1_4_MARK, PORT195_FN2), + PINMUX_DATA(MMCD1_3_MARK, PORT196_FN2), + PINMUX_DATA(MMCD1_2_MARK, PORT197_FN2), + PINMUX_DATA(MMCD1_1_MARK, PORT198_FN2), + PINMUX_DATA(MMCD1_0_MARK, PORT199_FN2), + PINMUX_DATA(MMCCMD1_MARK, PORT200_FN2), + + /* Port192 - Port200 IRQ */ + PINMUX_DATA(IRQ31_MARK, PORT192_FN0), + PINMUX_DATA(IRQ32_MARK, PORT193_FN0), + PINMUX_DATA(IRQ33_MARK, PORT194_FN0), + PINMUX_DATA(IRQ34_MARK, PORT195_FN0), + PINMUX_DATA(IRQ35_MARK, PORT196_FN0), + PINMUX_DATA(IRQ36_MARK, PORT197_FN0), + PINMUX_DATA(IRQ37_MARK, PORT198_FN0), + PINMUX_DATA(IRQ38_MARK, PORT199_FN0), + PINMUX_DATA(IRQ39_MARK, PORT200_FN0), + + /* Port201 */ + PINMUX_DATA(A1_MARK, PORT201_FN1), + + /* Port202 */ + PINMUX_DATA(A0_MARK, PORT202_FN1), + PINMUX_DATA(BS_MARK, PORT202_FN2), + + /* Port203 */ + PINMUX_DATA(CKO_MARK, PORT203_FN1), + PINMUX_DATA(MMCCLK1_MARK, PORT203_FN2), + + /* Port204 */ + PINMUX_DATA(CS0_N_MARK, PORT204_FN1), + PINMUX_DATA(SIM0_GPO1_MARK, PORT204_FN5), + + /* Port205 */ + PINMUX_DATA(CS2_N_MARK, PORT205_FN1), + PINMUX_DATA(SIM0_GPO2_MARK, PORT205_FN5), + + /* Port206 */ + PINMUX_DATA(CS4_N_MARK, PORT206_FN1), + PINMUX_DATA(VIO_VD_MARK, PORT206_FN2), + PINMUX_DATA(SIM1_GPO0_MARK, PORT206_FN5), + + /* Port207 - Port212 FN1 */ + PINMUX_DATA(D15_MARK, PORT207_FN1), + PINMUX_DATA(D14_MARK, PORT208_FN1), + PINMUX_DATA(D13_MARK, PORT209_FN1), + PINMUX_DATA(D12_MARK, PORT210_FN1), + PINMUX_DATA(D11_MARK, PORT211_FN1), + PINMUX_DATA(D10_MARK, PORT212_FN1), + + /* Port207 - Port212 FN5 */ + PINMUX_DATA(GIO_OUT15_MARK, PORT207_FN5), + PINMUX_DATA(GIO_OUT14_MARK, PORT208_FN5), + PINMUX_DATA(GIO_OUT13_MARK, PORT209_FN5), + PINMUX_DATA(GIO_OUT12_MARK, PORT210_FN5), + PINMUX_DATA(WGM_TXP2_MARK, PORT211_FN5), + PINMUX_DATA(WGM_GPS_TIMEM_ASK_RFCLK_MARK, PORT212_FN5), + + /* Port213 - Port222 FN1 */ + PINMUX_DATA(D9_MARK, PORT213_FN1), + PINMUX_DATA(D8_MARK, PORT214_FN1), + PINMUX_DATA(D7_MARK, PORT215_FN1), + PINMUX_DATA(D6_MARK, PORT216_FN1), + PINMUX_DATA(D5_MARK, PORT217_FN1), + PINMUX_DATA(D4_MARK, PORT218_FN1), + PINMUX_DATA(D3_MARK, PORT219_FN1), + PINMUX_DATA(D2_MARK, PORT220_FN1), + PINMUX_DATA(D1_MARK, PORT221_FN1), + PINMUX_DATA(D0_MARK, PORT222_FN1), + + /* Port213 - Port222 FN2 */ + PINMUX_DATA(VIO_D9_MARK, PORT213_FN2), + PINMUX_DATA(VIO_D8_MARK, PORT214_FN2), + PINMUX_DATA(VIO_D7_MARK, PORT215_FN2), + PINMUX_DATA(VIO_D6_MARK, PORT216_FN2), + PINMUX_DATA(VIO_D5_MARK, PORT217_FN2), + PINMUX_DATA(VIO_D4_MARK, PORT218_FN2), + PINMUX_DATA(VIO_D3_MARK, PORT219_FN2), + PINMUX_DATA(VIO_D2_MARK, PORT220_FN2), + PINMUX_DATA(VIO_D1_MARK, PORT221_FN2), + PINMUX_DATA(VIO_D0_MARK, PORT222_FN2), + + /* Port213 - Port222 FN5 */ + PINMUX_DATA(GIO_OUT9_MARK, PORT213_FN5), + PINMUX_DATA(GIO_OUT8_MARK, PORT214_FN5), + PINMUX_DATA(GIO_OUT7_MARK, PORT215_FN5), + PINMUX_DATA(GIO_OUT6_MARK, PORT216_FN5), + PINMUX_DATA(GIO_OUT5_217_MARK, PORT217_FN5), + PINMUX_DATA(GIO_OUT4_218_MARK, PORT218_FN5), + PINMUX_DATA(GIO_OUT3_219_MARK, PORT219_FN5), + PINMUX_DATA(GIO_OUT2_220_MARK, PORT220_FN5), + PINMUX_DATA(GIO_OUT1_221_MARK, PORT221_FN5), + PINMUX_DATA(GIO_OUT0_222_MARK, PORT222_FN5), + + /* Port224 */ + PINMUX_DATA(RDWR_224_MARK, PORT224_FN1), + PINMUX_DATA(VIO_HD_MARK, PORT224_FN2), + PINMUX_DATA(SIM1_GPO2_MARK, PORT224_FN5), + + /* Port225 */ + PINMUX_DATA(RD_N_MARK, PORT225_FN1), + + /* Port226 */ + PINMUX_DATA(WAIT_N_MARK, PORT226_FN1), + PINMUX_DATA(VIO_CLK_MARK, PORT226_FN2), + PINMUX_DATA(SIM1_GPO1_MARK, PORT226_FN5), + + /* Port227 */ + PINMUX_DATA(WE0_N_MARK, PORT227_FN1), + PINMUX_DATA(RDWR_227_MARK, PORT227_FN2), + + /* Port228 */ + PINMUX_DATA(WE1_N_MARK, PORT228_FN1), + PINMUX_DATA(SIM0_GPO0_MARK, PORT228_FN5), + + /* Port229 */ + PINMUX_DATA(PWMO_MARK, PORT229_FN1), + PINMUX_DATA(VIO_CKO1_229_MARK, PORT229_FN2), + + /* Port230 */ + PINMUX_DATA(SLIM_CLK_MARK, PORT230_FN1), + PINMUX_DATA(VIO_CKO4_230_MARK, PORT230_FN2), + + /* Port231 */ + PINMUX_DATA(SLIM_DATA_MARK, PORT231_FN1), + PINMUX_DATA(VIO_CKO5_231_MARK, PORT231_FN2), + + /* Port232 */ + PINMUX_DATA(VIO_CKO2_232_MARK, PORT232_FN2), + PINMUX_DATA(SF_PORT_0_232_MARK, PORT232_FN4), + + /* Port233 */ + PINMUX_DATA(VIO_CKO3_233_MARK, PORT233_FN2), + PINMUX_DATA(SF_PORT_1_233_MARK, PORT233_FN4), + + /* Port234 */ + PINMUX_DATA(FSIACK_MARK, PORT234_FN1), + PINMUX_DATA(PDM3_CLK_234_MARK, PORT234_FN2), + PINMUX_DATA(ISP_IRIS1_234_MARK, PORT234_FN3), + + /* Port235 */ + PINMUX_DATA(FSIAISLD_MARK, PORT235_FN1), + PINMUX_DATA(PDM3_DATA_235_MARK, PORT235_FN2, MSEL3CR_12_1), + + /* Port236 */ + PINMUX_DATA(FSIAOMC_MARK, PORT236_FN1), + PINMUX_DATA(PDM0_OUTCLK_236_MARK, PORT236_FN2), + PINMUX_DATA(ISP_IRIS0_236_MARK, PORT236_FN3), + + /* Port237 */ + PINMUX_DATA(FSIAOLR_MARK, PORT237_FN1), + PINMUX_DATA(FSIAILR_MARK, PORT237_FN2), + + /* Port238 */ + PINMUX_DATA(FSIAOBT_MARK, PORT238_FN1), + PINMUX_DATA(FSIAIBT_MARK, PORT238_FN2), + + /* Port239 */ + PINMUX_DATA(FSIAOSLD_MARK, PORT239_FN1), + PINMUX_DATA(PDM0_OUTDATA_239_MARK, PORT239_FN2), + + /* Port240 */ + PINMUX_DATA(FSIBISLD_MARK, PORT240_FN1), + + /* Port241 */ + PINMUX_DATA(FSIBOLR_MARK, PORT241_FN1), + PINMUX_DATA(FSIBILR_MARK, PORT241_FN2), + + /* Port242 */ + PINMUX_DATA(FSIBOMC_MARK, PORT242_FN1), + PINMUX_DATA(ISP_SHUTTER1_242_MARK, PORT242_FN3), + + /* Port243 */ + PINMUX_DATA(FSIBOBT_MARK, PORT243_FN1), + PINMUX_DATA(FSIBIBT_MARK, PORT243_FN2), + + /* Port244 */ + PINMUX_DATA(FSIBOSLD_MARK, PORT244_FN1), + PINMUX_DATA(FSIASPDIF_MARK, PORT244_FN2), + + /* Port245 */ + PINMUX_DATA(FSIBCK_MARK, PORT245_FN1), + PINMUX_DATA(ISP_SHUTTER0_245_MARK, PORT245_FN3), + + /* Port246 - Port250 FN1 */ + PINMUX_DATA(ISP_IRIS1_246_MARK, PORT246_FN1), + PINMUX_DATA(ISP_IRIS0_247_MARK, PORT247_FN1), + PINMUX_DATA(ISP_SHUTTER1_248_MARK, PORT248_FN1), + PINMUX_DATA(ISP_SHUTTER0_249_MARK, PORT249_FN1), + PINMUX_DATA(ISP_STROBE_250_MARK, PORT250_FN1), + + /* Port256 - Port258 */ + PINMUX_DATA(MSIOF0_SYNC_MARK, PORT256_FN1), + PINMUX_DATA(MSIOF0_RXD_MARK, PORT257_FN1), + PINMUX_DATA(MSIOF0_SCK_MARK, PORT258_FN1), + + /* Port259 */ + PINMUX_DATA(MSIOF0_SS2_MARK, PORT259_FN1), + PINMUX_DATA(VIO_CKO3_259_MARK, PORT259_FN3), + + /* Port260 */ + PINMUX_DATA(MSIOF0_TXD_MARK, PORT260_FN1), + + /* Port261 */ + PINMUX_DATA(SCIFB1_SCK_261_MARK, PORT261_FN2), + PINMUX_DATA(CHSCIF1_HSCK_MARK, PORT261_FN7), + + /* Port262 */ + PINMUX_DATA(SCIFB2_SCK_262_MARK, PORT262_FN2), + + /* Port263 - Port266 FN1 */ + PINMUX_DATA(MSIOF1_SS2_MARK, PORT263_FN1), + PINMUX_DATA(MSIOF1_TXD_MARK, PORT264_FN1), + PINMUX_DATA(MSIOF1_RXD_MARK, PORT265_FN1), + PINMUX_DATA(MSIOF1_SS1_MARK, PORT266_FN1), + + /* Port263 - Port266 FN4 */ + PINMUX_DATA(MSIOF5_SS2_MARK, PORT263_FN4), + PINMUX_DATA(MSIOF5_TXD_MARK, PORT264_FN4), + PINMUX_DATA(MSIOF5_RXD_MARK, PORT265_FN4), + PINMUX_DATA(MSIOF5_SS1_MARK, PORT266_FN4), + + /* Port267 */ + PINMUX_DATA(MSIOF0_SS1_MARK, PORT267_FN1), + + /* Port268 */ + PINMUX_DATA(MSIOF1_SCK_MARK, PORT268_FN1), + PINMUX_DATA(MSIOF5_SCK_MARK, PORT268_FN4), + + /* Port269 */ + PINMUX_DATA(MSIOF1_SYNC_MARK, PORT269_FN1), + PINMUX_DATA(MSIOF5_SYNC_MARK, PORT269_FN4), + + /* Port270 - Port273 FN1 */ + PINMUX_DATA(MSIOF2_SS1_MARK, PORT270_FN1), + PINMUX_DATA(MSIOF2_SS2_MARK, PORT271_FN1), + PINMUX_DATA(MSIOF3_SS2_MARK, PORT272_FN1), + PINMUX_DATA(MSIOF3_SS1_MARK, PORT273_FN1), + + /* Port270 - Port273 FN3 */ + PINMUX_DATA(VIO_CKO5_270_MARK, PORT270_FN3), + PINMUX_DATA(VIO_CKO2_271_MARK, PORT271_FN3), + PINMUX_DATA(VIO_CKO1_272_MARK, PORT272_FN3), + PINMUX_DATA(VIO_CKO4_273_MARK, PORT273_FN3), + + /* Port274 */ + PINMUX_DATA(MSIOF4_SS2_MARK, PORT274_FN1), + PINMUX_DATA(TPU1TO0_MARK, PORT274_FN4), + + /* Port275 - Port280 */ + PINMUX_DATA(IC_DP_MARK, PORT275_FN1), + PINMUX_DATA(SIM0_RST_MARK, PORT276_FN1), + PINMUX_DATA(IC_DM_MARK, PORT277_FN1), + PINMUX_DATA(SIM0_BSICOMP_MARK, PORT278_FN1), + PINMUX_DATA(SIM0_CLK_MARK, PORT279_FN1), + PINMUX_DATA(SIM0_IO_MARK, PORT280_FN1), + + /* Port281 */ + PINMUX_DATA(SIM1_IO_MARK, PORT281_FN1), + PINMUX_DATA(PDM2_DATA_281_MARK, PORT281_FN2, MSEL3CR_12_1), + + /* Port282 */ + PINMUX_DATA(SIM1_CLK_MARK, PORT282_FN1), + PINMUX_DATA(PDM2_CLK_282_MARK, PORT282_FN2), + + /* Port283 */ + PINMUX_DATA(SIM1_RST_MARK, PORT283_FN1), + + /* Port289 */ + PINMUX_DATA(SDHID1_0_MARK, PORT289_FN1), + PINMUX_DATA(STMDATA0_2_MARK, PORT289_FN3), + + /* Port290 */ + PINMUX_DATA(SDHID1_1_MARK, PORT290_FN1), + PINMUX_DATA(STMDATA1_2_MARK, PORT290_FN3), + PINMUX_DATA(IRQ51_MARK, PORT290_FN0), + + /* Port291 - Port294 FN1 */ + PINMUX_DATA(SDHID1_2_MARK, PORT291_FN1), + PINMUX_DATA(SDHID1_3_MARK, PORT292_FN1), + PINMUX_DATA(SDHICLK1_MARK, PORT293_FN1), + PINMUX_DATA(SDHICMD1_MARK, PORT294_FN1), + + /* Port291 - Port294 FN3 */ + PINMUX_DATA(STMDATA2_2_MARK, PORT291_FN3), + PINMUX_DATA(STMDATA3_2_MARK, PORT292_FN3), + PINMUX_DATA(STMCLK_2_MARK, PORT293_FN3), + PINMUX_DATA(STMSIDI_2_MARK, PORT294_FN3), + + /* Port295 */ + PINMUX_DATA(SDHID2_0_MARK, PORT295_FN1), + PINMUX_DATA(MSIOF4_TXD_MARK, PORT295_FN2), + PINMUX_DATA(SCIFB2_TXD_295_MARK, PORT295_FN3, MSEL3CR_10_1), + PINMUX_DATA(MSIOF6_TXD_MARK, PORT295_FN4), + + /* Port296 */ + PINMUX_DATA(SDHID2_1_MARK, PORT296_FN1), + PINMUX_DATA(MSIOF6_SS2_MARK, PORT296_FN4), + PINMUX_DATA(IRQ52_MARK, PORT296_FN0), + + /* Port297 - Port300 FN1 */ + PINMUX_DATA(SDHID2_2_MARK, PORT297_FN1), + PINMUX_DATA(SDHID2_3_MARK, PORT298_FN1), + PINMUX_DATA(SDHICLK2_MARK, PORT299_FN1), + PINMUX_DATA(SDHICMD2_MARK, PORT300_FN1), + + /* Port297 - Port300 FN2 */ + PINMUX_DATA(MSIOF4_RXD_MARK, PORT297_FN2), + PINMUX_DATA(MSIOF4_SYNC_MARK, PORT298_FN2), + PINMUX_DATA(MSIOF4_SCK_MARK, PORT299_FN2), + PINMUX_DATA(MSIOF4_SS1_MARK, PORT300_FN2), + + /* Port297 - Port300 FN3 */ + PINMUX_DATA(SCIFB2_RXD_297_MARK, PORT297_FN3, MSEL3CR_10_1), + PINMUX_DATA(SCIFB2_CTS_298_MARK, PORT298_FN3, MSEL3CR_10_1), + PINMUX_DATA(SCIFB2_SCK_299_MARK, PORT299_FN3), + PINMUX_DATA(SCIFB2_RTS_300_MARK, PORT300_FN3), + + /* Port297 - Port300 FN4 */ + PINMUX_DATA(MSIOF6_RXD_MARK, PORT297_FN4), + PINMUX_DATA(MSIOF6_SYNC_MARK, PORT298_FN4), + PINMUX_DATA(MSIOF6_SCK_MARK, PORT299_FN4), + PINMUX_DATA(MSIOF6_SS1_MARK, PORT300_FN4), + + /* Port301 */ + PINMUX_DATA(SDHICD0_MARK, PORT301_FN1), + PINMUX_DATA(IRQ50_MARK, PORT301_FN0), + + /* Port302 - Port306 FN1 */ + PINMUX_DATA(SDHID0_0_MARK, PORT302_FN1), + PINMUX_DATA(SDHID0_1_MARK, PORT303_FN1), + PINMUX_DATA(SDHID0_2_MARK, PORT304_FN1), + PINMUX_DATA(SDHID0_3_MARK, PORT305_FN1), + PINMUX_DATA(SDHICMD0_MARK, PORT306_FN1), + + /* Port302 - Port306 FN3 */ + PINMUX_DATA(STMDATA0_1_MARK, PORT302_FN3), + PINMUX_DATA(STMDATA1_1_MARK, PORT303_FN3), + PINMUX_DATA(STMDATA2_1_MARK, PORT304_FN3), + PINMUX_DATA(STMDATA3_1_MARK, PORT305_FN3), + PINMUX_DATA(STMSIDI_1_MARK, PORT306_FN3), + + /* Port307 */ + PINMUX_DATA(SDHIWP0_MARK, PORT307_FN1), + + /* Port308 */ + PINMUX_DATA(SDHICLK0_MARK, PORT308_FN1), + PINMUX_DATA(STMCLK_1_MARK, PORT308_FN3), + + /* Port320 - Port329 */ + PINMUX_DATA(IRQ16_MARK, PORT320_FN0), + PINMUX_DATA(IRQ17_MARK, PORT321_FN0), + PINMUX_DATA(IRQ28_MARK, PORT322_FN0), + PINMUX_DATA(IRQ29_MARK, PORT323_FN0), + PINMUX_DATA(IRQ30_MARK, PORT324_FN0), + PINMUX_DATA(IRQ53_MARK, PORT325_FN0), + PINMUX_DATA(IRQ54_MARK, PORT326_FN0), + PINMUX_DATA(IRQ55_MARK, PORT327_FN0), + PINMUX_DATA(IRQ56_MARK, PORT328_FN0), + PINMUX_DATA(IRQ57_MARK, PORT329_FN0), +}; + +static struct sh_pfc_pin pinmux_pins[] = { + GPIO_PORT_ALL(), +}; + +#define PINMUX_FN_BASE ARRAY_SIZE(pinmux_pins) + +static const struct pinmux_func pinmux_func_gpios[] = { + /* Port0 */ + GPIO_FN(LCDD0), + GPIO_FN(PDM2_CLK_0), + GPIO_FN(DU0_DR0), + GPIO_FN(IRQ0), + + /* Port1 */ + GPIO_FN(LCDD1), + GPIO_FN(PDM2_DATA_1), + GPIO_FN(DU0_DR19), + GPIO_FN(IRQ1), + + /* Port2 */ + GPIO_FN(LCDD2), + GPIO_FN(PDM3_CLK_2), + GPIO_FN(DU0_DR2), + GPIO_FN(IRQ2), + + /* Port3 */ + GPIO_FN(LCDD3), + GPIO_FN(PDM3_DATA_3), + GPIO_FN(DU0_DR3), + GPIO_FN(IRQ3), + + /* Port4 */ + GPIO_FN(LCDD4), + GPIO_FN(PDM4_CLK_4), + GPIO_FN(DU0_DR4), + GPIO_FN(IRQ4), + + /* Port5 */ + GPIO_FN(LCDD5), + GPIO_FN(PDM4_DATA_5), + GPIO_FN(DU0_DR5), + GPIO_FN(IRQ5), + + /* Port6 */ + GPIO_FN(LCDD6), + GPIO_FN(PDM0_OUTCLK_6), + GPIO_FN(DU0_DR6), + GPIO_FN(IRQ6), + + /* Port7 */ + GPIO_FN(LCDD7), + GPIO_FN(PDM0_OUTDATA_7), + GPIO_FN(DU0_DR7), + GPIO_FN(IRQ7), + + /* Port8 */ + GPIO_FN(LCDD8), + GPIO_FN(PDM1_OUTCLK_8), + GPIO_FN(DU0_DG0), + GPIO_FN(IRQ8), + + /* Port9 */ + GPIO_FN(LCDD9), + GPIO_FN(PDM1_OUTDATA_9), + GPIO_FN(DU0_DG1), + GPIO_FN(IRQ9), + + /* Port10 */ + GPIO_FN(LCDD10), + GPIO_FN(FSICCK), + GPIO_FN(DU0_DG2), + GPIO_FN(IRQ10), + + /* Port11 */ + GPIO_FN(LCDD11), + GPIO_FN(FSICISLD), + GPIO_FN(DU0_DG3), + GPIO_FN(IRQ11), + + /* Port12 */ + GPIO_FN(LCDD12), + GPIO_FN(FSICOMC), + GPIO_FN(DU0_DG4), + GPIO_FN(IRQ12), + + /* Port13 */ + GPIO_FN(LCDD13), + GPIO_FN(FSICOLR), + GPIO_FN(FSICILR), + GPIO_FN(DU0_DG5), + GPIO_FN(IRQ13), + + /* Port14 */ + GPIO_FN(LCDD14), + GPIO_FN(FSICOBT), + GPIO_FN(FSICIBT), + GPIO_FN(DU0_DG6), + GPIO_FN(IRQ14), + + /* Port15 */ + GPIO_FN(LCDD15), + GPIO_FN(FSICOSLD), + GPIO_FN(DU0_DG7), + GPIO_FN(IRQ15), + + /* Port16 */ + GPIO_FN(LCDD16), + GPIO_FN(TPU1TO1), + GPIO_FN(DU0_DB0), + + /* Port17 */ + GPIO_FN(LCDD17), + GPIO_FN(SF_IRQ_00), + GPIO_FN(DU0_DB1), + + /* Port18 */ + GPIO_FN(LCDD18), + GPIO_FN(SF_IRQ_01), + GPIO_FN(DU0_DB2), + + /* Port19 */ + GPIO_FN(LCDD19), + GPIO_FN(SCIFB3_RTS_19), + GPIO_FN(DU0_DB3), + + /* Port20 */ + GPIO_FN(LCDD20), + GPIO_FN(SCIFB3_CTS_20), + GPIO_FN(DU0_DB4), + + /* Port21 */ + GPIO_FN(LCDD21), + GPIO_FN(SCIFB3_TXD_21), + GPIO_FN(DU0_DB5), + + /* Port22 */ + GPIO_FN(LCDD22), + GPIO_FN(SCIFB3_RXD_22), + GPIO_FN(DU0_DB6), + + /* Port23 */ + GPIO_FN(LCDD23), + GPIO_FN(SCIFB3_SCK_23), + GPIO_FN(DU0_DB7), + + /* Port24 */ + GPIO_FN(LCDHSYN), + GPIO_FN(LCDCS), + GPIO_FN(SCIFB1_RTS_24), + GPIO_FN(DU0_EXHSYNC_N_CSYNC_N_HSYNC_N), + + /* Port25 */ + GPIO_FN(LCDVSYN), + GPIO_FN(SCIFB1_CTS_25), + GPIO_FN(DU0_EXVSYNC_N_VSYNC_N_CSYNC_N), + + /* Port26 */ + GPIO_FN(LCDDCK), + GPIO_FN(LCDWR), + GPIO_FN(SCIFB1_TXD_26), + GPIO_FN(DU0_DOTCLKIN), + + /* Port27 */ + GPIO_FN(LCDDISP), + GPIO_FN(LCDRS), + GPIO_FN(SCIFB1_RXD_27), + GPIO_FN(DU0_DOTCLKOUT), + + /* Port28 */ + GPIO_FN(LCDRD_N), + GPIO_FN(SCIFB1_SCK_28), + GPIO_FN(DU0_DOTCLKOUTB), + + /* Port29 */ + GPIO_FN(LCDLCLK), + GPIO_FN(SF_IRQ_02), + GPIO_FN(DU0_DISP_CSYNC_N_DE), + + /* Port30 */ + GPIO_FN(LCDDON), + GPIO_FN(SF_IRQ_03), + GPIO_FN(DU0_ODDF_N_CLAMP), + + /* Port32 */ + GPIO_FN(SCIFA0_RTS), + GPIO_FN(SIM0_DET), + GPIO_FN(CSCIF0_RTS), + + /* Port33 */ + GPIO_FN(SCIFA0_CTS), + GPIO_FN(SIM1_DET), + GPIO_FN(CSCIF0_CTS), + + /* Port34 */ + GPIO_FN(SCIFA0_SCK), + GPIO_FN(SIM0_PWRON), + GPIO_FN(CSCIF0_SCK), + + /* Port35 */ + GPIO_FN(SCIFA1_RTS), + GPIO_FN(CSCIF1_RTS), + + /* Port36 */ + GPIO_FN(SCIFA1_CTS), + GPIO_FN(CSCIF1_CTS), + + /* Port37 */ + GPIO_FN(SCIFA1_SCK), + GPIO_FN(CSCIF1_SCK), + + /* Port38 */ + GPIO_FN(SCIFB0_RTS), + GPIO_FN(TPU0TO1), + GPIO_FN(SCIFB3_RTS_38), + GPIO_FN(CHSCIF0_HRTS), + + /* Port39 */ + GPIO_FN(SCIFB0_CTS), + GPIO_FN(TPU0TO2), + GPIO_FN(SCIFB3_CTS_39), + GPIO_FN(CHSCIF0_HCTS), + + /* Port40 */ + GPIO_FN(SCIFB0_SCK), + GPIO_FN(TPU0TO3), + GPIO_FN(SCIFB3_SCK_40), + GPIO_FN(CHSCIF0_HSCK), + + /* Port64 */ + GPIO_FN(PDM0_DATA), + + /* Port65 */ + GPIO_FN(PDM1_DATA), + + /* Port66 */ + GPIO_FN(HSI_RX_WAKE), + GPIO_FN(SCIFB2_CTS_66), + GPIO_FN(MSIOF3_SYNC), + GPIO_FN(GenIO4), + GPIO_FN(IRQ40), + + /* Port67 */ + GPIO_FN(HSI_RX_READY), + GPIO_FN(SCIFB1_TXD_67), + GPIO_FN(GIO_OUT3_67), + GPIO_FN(CHSCIF1_HTX), + + /* Port68 */ + GPIO_FN(HSI_RX_FLAG), + GPIO_FN(SCIFB2_TXD_68), + GPIO_FN(MSIOF3_TXD), + GPIO_FN(GIO_OUT4_68), + + /* Port69 */ + GPIO_FN(HSI_RX_DATA), + GPIO_FN(SCIFB2_RXD_69), + GPIO_FN(MSIOF3_RXD), + GPIO_FN(GIO_OUT5_69), + + /* Port70 */ + GPIO_FN(HSI_TX_FLAG), + GPIO_FN(SCIFB1_RTS_70), + GPIO_FN(GIO_OUT1_70), + GPIO_FN(HSIC_TSTCLK0), + GPIO_FN(CHSCIF1_HRTS), + + /* Port71 */ + GPIO_FN(HSI_TX_DATA), + GPIO_FN(SCIFB1_CTS_71), + GPIO_FN(GIO_OUT2_71), + GPIO_FN(HSIC_TSTCLK1), + GPIO_FN(CHSCIF1_HCTS), + + /* Port72 */ + GPIO_FN(HSI_TX_WAKE), + GPIO_FN(SCIFB1_RXD_72), + GPIO_FN(GenIO8), + GPIO_FN(CHSCIF1_HRX), + + /* Port73 */ + GPIO_FN(HSI_TX_READY), + GPIO_FN(SCIFB2_RTS_73), + GPIO_FN(MSIOF3_SCK), + GPIO_FN(GIO_OUT0_73), + + /* Port74 - Port85 */ + GPIO_FN(IRDA_OUT), + GPIO_FN(IRDA_IN), + GPIO_FN(IRDA_FIRSEL), + GPIO_FN(TPU0TO0), + GPIO_FN(DIGRFEN), + GPIO_FN(GPS_TIMESTAMP), + GPIO_FN(TXP), + GPIO_FN(TXP2), + GPIO_FN(COEX_0), + GPIO_FN(COEX_1), + GPIO_FN(IRQ19), + GPIO_FN(IRQ18), + + /* Port96 - Port101 */ + GPIO_FN(KEYIN0), + GPIO_FN(KEYIN1), + GPIO_FN(KEYIN2), + GPIO_FN(KEYIN3), + GPIO_FN(KEYIN4), + GPIO_FN(KEYIN5), + + /* Port102 */ + GPIO_FN(KEYIN6), + GPIO_FN(IRQ41), + + /* Port103 */ + GPIO_FN(KEYIN7), + GPIO_FN(IRQ42), + + /* Port104 - Port108 */ + GPIO_FN(KEYOUT0), + GPIO_FN(KEYOUT1), + GPIO_FN(KEYOUT2), + GPIO_FN(KEYOUT3), + GPIO_FN(KEYOUT4), + + /* Port109 */ + GPIO_FN(KEYOUT5), + GPIO_FN(IRQ43), + + /* Port110 */ + GPIO_FN(KEYOUT6), + GPIO_FN(IRQ44), + + /* Port111 */ + GPIO_FN(KEYOUT7), + GPIO_FN(RFANAEN), + GPIO_FN(IRQ45), + + /* Port112 */ + GPIO_FN(KEYIN8), + GPIO_FN(KEYOUT8), + GPIO_FN(SF_IRQ_04), + GPIO_FN(IRQ46), + + /* Port113 */ + GPIO_FN(KEYIN9), + GPIO_FN(KEYOUT9), + GPIO_FN(SF_IRQ_05), + GPIO_FN(IRQ47), + + /* Port114 */ + GPIO_FN(KEYIN10), + GPIO_FN(KEYOUT10), + GPIO_FN(SF_IRQ_06), + GPIO_FN(IRQ48), + + /* Port115 */ + GPIO_FN(KEYIN11), + GPIO_FN(KEYOUT11), + GPIO_FN(SF_IRQ_07), + GPIO_FN(IRQ49), + + /* Port116 */ + GPIO_FN(SCIFA0_TXD), + GPIO_FN(CSCIF0_TX), + + /* Port117 */ + GPIO_FN(SCIFA0_RXD), + GPIO_FN(CSCIF0_RX), + + /* Port118 */ + GPIO_FN(SCIFA1_TXD), + GPIO_FN(CSCIF1_TX), + + /* Port119 */ + GPIO_FN(SCIFA1_RXD), + GPIO_FN(CSCIF1_RX), + + /* Port120 */ + GPIO_FN(SF_PORT_1_120), + GPIO_FN(SCIFB3_RXD_120), + GPIO_FN(DU0_CDE), + + /* Port121 */ + GPIO_FN(SF_PORT_0_121), + GPIO_FN(SCIFB3_TXD_121), + + /* Port122 */ + GPIO_FN(SCIFB0_TXD), + GPIO_FN(CHSCIF0_HTX), + + /* Port123 */ + GPIO_FN(SCIFB0_RXD), + GPIO_FN(CHSCIF0_HRX), + + /* Port124 */ + GPIO_FN(ISP_STROBE_124), + + /* Port125 */ + GPIO_FN(STP_ISD_0), + GPIO_FN(PDM4_CLK_125), + GPIO_FN(MSIOF2_TXD), + GPIO_FN(SIM0_VOLTSEL0), + + /* Port126 */ + GPIO_FN(TS_SDEN), + GPIO_FN(MSIOF7_SYNC), + GPIO_FN(STP_ISEN_1), + + /* Port128 */ + GPIO_FN(STP_ISEN_0), + GPIO_FN(PDM1_OUTDATA_128), + GPIO_FN(MSIOF2_SYNC), + GPIO_FN(SIM1_VOLTSEL1), + + /* Port129 */ + GPIO_FN(TS_SPSYNC), + GPIO_FN(MSIOF7_RXD), + GPIO_FN(STP_ISSYNC_1), + + /* Port130 */ + GPIO_FN(STP_ISSYNC_0), + GPIO_FN(PDM4_DATA_130), + GPIO_FN(MSIOF2_RXD), + GPIO_FN(SIM0_VOLTSEL1), + + /* Port131 */ + GPIO_FN(STP_OPWM_0), + GPIO_FN(SIM1_PWRON), + + /* Port132 */ + GPIO_FN(TS_SCK), + GPIO_FN(MSIOF7_SCK), + GPIO_FN(STP_ISCLK_1), + + /* Port133 */ + GPIO_FN(STP_ISCLK_0), + GPIO_FN(PDM1_OUTCLK_133), + GPIO_FN(MSIOF2_SCK), + GPIO_FN(SIM1_VOLTSEL0), + + /* Port134 */ + GPIO_FN(TS_SDAT), + GPIO_FN(MSIOF7_TXD), + GPIO_FN(STP_ISD_1), + + /* Port160 - Port178 */ + GPIO_FN(IRQ20), + GPIO_FN(IRQ21), + GPIO_FN(IRQ22), + GPIO_FN(IRQ23), + GPIO_FN(MMCD0_0), + GPIO_FN(MMCD0_1), + GPIO_FN(MMCD0_2), + GPIO_FN(MMCD0_3), + GPIO_FN(MMCD0_4), + GPIO_FN(MMCD0_5), + GPIO_FN(MMCD0_6), + GPIO_FN(MMCD0_7), + GPIO_FN(MMCCMD0), + GPIO_FN(MMCCLK0), + GPIO_FN(MMCRST), + GPIO_FN(IRQ24), + GPIO_FN(IRQ25), + GPIO_FN(IRQ26), + GPIO_FN(IRQ27), + + /* Port192 - Port200 FN1 */ + GPIO_FN(A10), + GPIO_FN(A9), + GPIO_FN(A8), + GPIO_FN(A7), + GPIO_FN(A6), + GPIO_FN(A5), + GPIO_FN(A4), + GPIO_FN(A3), + GPIO_FN(A2), + + /* Port192 - Port200 FN2 */ + GPIO_FN(MMCD1_7), + GPIO_FN(MMCD1_6), + GPIO_FN(MMCD1_5), + GPIO_FN(MMCD1_4), + GPIO_FN(MMCD1_3), + GPIO_FN(MMCD1_2), + GPIO_FN(MMCD1_1), + GPIO_FN(MMCD1_0), + GPIO_FN(MMCCMD1), + + /* Port192 - Port200 IRQ */ + GPIO_FN(IRQ31), + GPIO_FN(IRQ32), + GPIO_FN(IRQ33), + GPIO_FN(IRQ34), + GPIO_FN(IRQ35), + GPIO_FN(IRQ36), + GPIO_FN(IRQ37), + GPIO_FN(IRQ38), + GPIO_FN(IRQ39), + + /* Port201 */ + GPIO_FN(A1), + + /* Port202 */ + GPIO_FN(A0), + GPIO_FN(BS), + + /* Port203 */ + GPIO_FN(CKO), + GPIO_FN(MMCCLK1), + + /* Port204 */ + GPIO_FN(CS0_N), + GPIO_FN(SIM0_GPO1), + + /* Port205 */ + GPIO_FN(CS2_N), + GPIO_FN(SIM0_GPO2), + + /* Port206 */ + GPIO_FN(CS4_N), + GPIO_FN(VIO_VD), + GPIO_FN(SIM1_GPO0), + + /* Port207 - Port212 FN1 */ + GPIO_FN(D15), + GPIO_FN(D14), + GPIO_FN(D13), + GPIO_FN(D12), + GPIO_FN(D11), + GPIO_FN(D10), + + /* Port207 - Port212 FN5 */ + GPIO_FN(GIO_OUT15), + GPIO_FN(GIO_OUT14), + GPIO_FN(GIO_OUT13), + GPIO_FN(GIO_OUT12), + GPIO_FN(WGM_TXP2), + GPIO_FN(WGM_GPS_TIMEM_ASK_RFCLK), + + /* Port213 - Port222 FN1 */ + GPIO_FN(D9), + GPIO_FN(D8), + GPIO_FN(D7), + GPIO_FN(D6), + GPIO_FN(D5), + GPIO_FN(D4), + GPIO_FN(D3), + GPIO_FN(D2), + GPIO_FN(D1), + GPIO_FN(D0), + + /* Port213 - Port222 FN2 */ + GPIO_FN(VIO_D9), + GPIO_FN(VIO_D8), + GPIO_FN(VIO_D7), + GPIO_FN(VIO_D6), + GPIO_FN(VIO_D5), + GPIO_FN(VIO_D4), + GPIO_FN(VIO_D3), + GPIO_FN(VIO_D2), + GPIO_FN(VIO_D1), + GPIO_FN(VIO_D0), + + /* Port213 - Port222 FN5 */ + GPIO_FN(GIO_OUT9), + GPIO_FN(GIO_OUT8), + GPIO_FN(GIO_OUT7), + GPIO_FN(GIO_OUT6), + GPIO_FN(GIO_OUT5_217), + GPIO_FN(GIO_OUT4_218), + GPIO_FN(GIO_OUT3_219), + GPIO_FN(GIO_OUT2_220), + GPIO_FN(GIO_OUT1_221), + GPIO_FN(GIO_OUT0_222), + + /* Port224 */ + GPIO_FN(RDWR_224), + GPIO_FN(VIO_HD), + GPIO_FN(SIM1_GPO2), + + /* Port225 */ + GPIO_FN(RD_N), + + /* Port226 */ + GPIO_FN(WAIT_N), + GPIO_FN(VIO_CLK), + GPIO_FN(SIM1_GPO1), + + /* Port227 */ + GPIO_FN(WE0_N), + GPIO_FN(RDWR_227), + + /* Port228 */ + GPIO_FN(WE1_N), + GPIO_FN(SIM0_GPO0), + + /* Port229 */ + GPIO_FN(PWMO), + GPIO_FN(VIO_CKO1_229), + + /* Port230 */ + GPIO_FN(SLIM_CLK), + GPIO_FN(VIO_CKO4_230), + + /* Port231 */ + GPIO_FN(SLIM_DATA), + GPIO_FN(VIO_CKO5_231), + + /* Port232 */ + GPIO_FN(VIO_CKO2_232), + GPIO_FN(SF_PORT_0_232), + + /* Port233 */ + GPIO_FN(VIO_CKO3_233), + GPIO_FN(SF_PORT_1_233), + + /* Port234 */ + GPIO_FN(FSIACK), + GPIO_FN(PDM3_CLK_234), + GPIO_FN(ISP_IRIS1_234), + + /* Port235 */ + GPIO_FN(FSIAISLD), + GPIO_FN(PDM3_DATA_235), + + /* Port236 */ + GPIO_FN(FSIAOMC), + GPIO_FN(PDM0_OUTCLK_236), + GPIO_FN(ISP_IRIS0_236), + + /* Port237 */ + GPIO_FN(FSIAOLR), + GPIO_FN(FSIAILR), + + /* Port238 */ + GPIO_FN(FSIAOBT), + GPIO_FN(FSIAIBT), + + /* Port239 */ + GPIO_FN(FSIAOSLD), + GPIO_FN(PDM0_OUTDATA_239), + + /* Port240 */ + GPIO_FN(FSIBISLD), + + /* Port241 */ + GPIO_FN(FSIBOLR), + GPIO_FN(FSIBILR), + + /* Port242 */ + GPIO_FN(FSIBOMC), + GPIO_FN(ISP_SHUTTER1_242), + + /* Port243 */ + GPIO_FN(FSIBOBT), + GPIO_FN(FSIBIBT), + + /* Port244 */ + GPIO_FN(FSIBOSLD), + GPIO_FN(FSIASPDIF), + + /* Port245 */ + GPIO_FN(FSIBCK), + GPIO_FN(ISP_SHUTTER0_245), + + /* Port246 - Port250 FN1 */ + GPIO_FN(ISP_IRIS1_246), + GPIO_FN(ISP_IRIS0_247), + GPIO_FN(ISP_SHUTTER1_248), + GPIO_FN(ISP_SHUTTER0_249), + GPIO_FN(ISP_STROBE_250), + + /* Port256 - Port258 */ + GPIO_FN(MSIOF0_SYNC), + GPIO_FN(MSIOF0_RXD), + GPIO_FN(MSIOF0_SCK), + + /* Port259 */ + GPIO_FN(MSIOF0_SS2), + GPIO_FN(VIO_CKO3_259), + + /* Port260 */ + GPIO_FN(MSIOF0_TXD), + + /* Port261 */ + GPIO_FN(SCIFB1_SCK_261), + GPIO_FN(CHSCIF1_HSCK), + + /* Port262 */ + GPIO_FN(SCIFB2_SCK_262), + + /* Port263 - Port266 FN1 */ + GPIO_FN(MSIOF1_SS2), + GPIO_FN(MSIOF1_TXD), + GPIO_FN(MSIOF1_RXD), + GPIO_FN(MSIOF1_SS1), + + /* Port263 - Port266 FN4 */ + GPIO_FN(MSIOF5_SS2), + GPIO_FN(MSIOF5_TXD), + GPIO_FN(MSIOF5_RXD), + GPIO_FN(MSIOF5_SS1), + + /* Port267 */ + GPIO_FN(MSIOF0_SS1), + + /* Port268 */ + GPIO_FN(MSIOF1_SCK), + GPIO_FN(MSIOF5_SCK), + + /* Port269 */ + GPIO_FN(MSIOF1_SYNC), + GPIO_FN(MSIOF5_SYNC), + + /* Port270 - Port273 FN1 */ + GPIO_FN(MSIOF2_SS1), + GPIO_FN(MSIOF2_SS2), + GPIO_FN(MSIOF3_SS2), + GPIO_FN(MSIOF3_SS1), + + /* Port270 - Port273 FN3 */ + GPIO_FN(VIO_CKO5_270), + GPIO_FN(VIO_CKO2_271), + GPIO_FN(VIO_CKO1_272), + GPIO_FN(VIO_CKO4_273), + + /* Port274 */ + GPIO_FN(MSIOF4_SS2), + GPIO_FN(TPU1TO0), + + /* Port275 - Port280 */ + GPIO_FN(IC_DP), + GPIO_FN(SIM0_RST), + GPIO_FN(IC_DM), + GPIO_FN(SIM0_BSICOMP), + GPIO_FN(SIM0_CLK), + GPIO_FN(SIM0_IO), + + /* Port281 */ + GPIO_FN(SIM1_IO), + GPIO_FN(PDM2_DATA_281), + + /* Port282 */ + GPIO_FN(SIM1_CLK), + GPIO_FN(PDM2_CLK_282), + + /* Port283 */ + GPIO_FN(SIM1_RST), + + /* Port289 */ + GPIO_FN(SDHID1_0), + GPIO_FN(STMDATA0_2), + + /* Port290 */ + GPIO_FN(SDHID1_1), + GPIO_FN(STMDATA1_2), + GPIO_FN(IRQ51), + + /* Port291 - Port294 FN1 */ + GPIO_FN(SDHID1_2), + GPIO_FN(SDHID1_3), + GPIO_FN(SDHICLK1), + GPIO_FN(SDHICMD1), + + /* Port291 - Port294 FN3 */ + GPIO_FN(STMDATA2_2), + GPIO_FN(STMDATA3_2), + GPIO_FN(STMCLK_2), + GPIO_FN(STMSIDI_2), + + /* Port295 */ + GPIO_FN(SDHID2_0), + GPIO_FN(MSIOF4_TXD), + GPIO_FN(SCIFB2_TXD_295), + GPIO_FN(MSIOF6_TXD), + + /* Port296 */ + GPIO_FN(SDHID2_1), + GPIO_FN(MSIOF6_SS2), + GPIO_FN(IRQ52), + + /* Port297 - Port300 FN1 */ + GPIO_FN(SDHID2_2), + GPIO_FN(SDHID2_3), + GPIO_FN(SDHICLK2), + GPIO_FN(SDHICMD2), + + /* Port297 - Port300 FN2 */ + GPIO_FN(MSIOF4_RXD), + GPIO_FN(MSIOF4_SYNC), + GPIO_FN(MSIOF4_SCK), + GPIO_FN(MSIOF4_SS1), + + /* Port297 - Port300 FN3 */ + GPIO_FN(SCIFB2_RXD_297), + GPIO_FN(SCIFB2_CTS_298), + GPIO_FN(SCIFB2_SCK_299), + GPIO_FN(SCIFB2_RTS_300), + + /* Port297 - Port300 FN4 */ + GPIO_FN(MSIOF6_RXD), + GPIO_FN(MSIOF6_SYNC), + GPIO_FN(MSIOF6_SCK), + GPIO_FN(MSIOF6_SS1), + + /* Port301 */ + GPIO_FN(SDHICD0), + GPIO_FN(IRQ50), + + /* Port302 - Port306 FN1 */ + GPIO_FN(SDHID0_0), + GPIO_FN(SDHID0_1), + GPIO_FN(SDHID0_2), + GPIO_FN(SDHID0_3), + GPIO_FN(SDHICMD0), + + /* Port302 - Port306 FN3 */ + GPIO_FN(STMDATA0_1), + GPIO_FN(STMDATA1_1), + GPIO_FN(STMDATA2_1), + GPIO_FN(STMDATA3_1), + GPIO_FN(STMSIDI_1), + + /* Port307 */ + GPIO_FN(SDHIWP0), + + /* Port308 */ + GPIO_FN(SDHICLK0), + GPIO_FN(STMCLK_1), + + /* Port320 - Port329 */ + GPIO_FN(IRQ16), + GPIO_FN(IRQ17), + GPIO_FN(IRQ28), + GPIO_FN(IRQ29), + GPIO_FN(IRQ30), + GPIO_FN(IRQ53), + GPIO_FN(IRQ54), + GPIO_FN(IRQ55), + GPIO_FN(IRQ56), + GPIO_FN(IRQ57), +}; + +static const struct pinmux_cfg_reg pinmux_config_regs[] = { + + PORTCR(0, 0xe6050000), + PORTCR(1, 0xe6050001), + PORTCR(2, 0xe6050002), + PORTCR(3, 0xe6050003), + PORTCR(4, 0xe6050004), + PORTCR(5, 0xe6050005), + PORTCR(6, 0xe6050006), + PORTCR(7, 0xe6050007), + PORTCR(8, 0xe6050008), + PORTCR(9, 0xe6050009), + PORTCR(10, 0xe605000A), + PORTCR(11, 0xe605000B), + PORTCR(12, 0xe605000C), + PORTCR(13, 0xe605000D), + PORTCR(14, 0xe605000E), + PORTCR(15, 0xe605000F), + PORTCR(16, 0xe6050010), + PORTCR(17, 0xe6050011), + PORTCR(18, 0xe6050012), + PORTCR(19, 0xe6050013), + PORTCR(20, 0xe6050014), + PORTCR(21, 0xe6050015), + PORTCR(22, 0xe6050016), + PORTCR(23, 0xe6050017), + PORTCR(24, 0xe6050018), + PORTCR(25, 0xe6050019), + PORTCR(26, 0xe605001A), + PORTCR(27, 0xe605001B), + PORTCR(28, 0xe605001C), + PORTCR(29, 0xe605001D), + PORTCR(30, 0xe605001E), + PORTCR(32, 0xe6051020), + PORTCR(33, 0xe6051021), + PORTCR(34, 0xe6051022), + PORTCR(35, 0xe6051023), + PORTCR(36, 0xe6051024), + PORTCR(37, 0xe6051025), + PORTCR(38, 0xe6051026), + PORTCR(39, 0xe6051027), + PORTCR(40, 0xe6051028), + PORTCR(64, 0xe6050040), + PORTCR(65, 0xe6050041), + PORTCR(66, 0xe6050042), + PORTCR(67, 0xe6050043), + PORTCR(68, 0xe6050044), + PORTCR(69, 0xe6050045), + PORTCR(70, 0xe6050046), + PORTCR(71, 0xe6050047), + PORTCR(72, 0xe6050048), + PORTCR(73, 0xe6050049), + PORTCR(74, 0xe605004A), + PORTCR(75, 0xe605004B), + PORTCR(76, 0xe605004C), + PORTCR(77, 0xe605004D), + PORTCR(78, 0xe605004E), + PORTCR(79, 0xe605004F), + PORTCR(80, 0xe6050050), + PORTCR(81, 0xe6050051), + PORTCR(82, 0xe6050052), + PORTCR(83, 0xe6050053), + PORTCR(84, 0xe6050054), + PORTCR(85, 0xe6050055), + PORTCR(96, 0xe6051060), + PORTCR(97, 0xe6051061), + PORTCR(98, 0xe6051062), + PORTCR(99, 0xe6051063), + PORTCR(100, 0xe6051064), + PORTCR(101, 0xe6051065), + PORTCR(102, 0xe6051066), + PORTCR(103, 0xe6051067), + PORTCR(104, 0xe6051068), + PORTCR(105, 0xe6051069), + PORTCR(106, 0xe605106A), + PORTCR(107, 0xe605106B), + PORTCR(108, 0xe605106C), + PORTCR(109, 0xe605106D), + PORTCR(110, 0xe605106E), + PORTCR(111, 0xe605106F), + PORTCR(112, 0xe6051070), + PORTCR(113, 0xe6051071), + PORTCR(114, 0xe6051072), + PORTCR(115, 0xe6051073), + PORTCR(116, 0xe6051074), + PORTCR(117, 0xe6051075), + PORTCR(118, 0xe6051076), + PORTCR(119, 0xe6051077), + PORTCR(120, 0xe6051078), + PORTCR(121, 0xe6051079), + PORTCR(122, 0xe605107A), + PORTCR(123, 0xe605107B), + PORTCR(124, 0xe605107C), + PORTCR(125, 0xe605107D), + PORTCR(126, 0xe605107E), + PORTCR(128, 0xe6051080), + PORTCR(129, 0xe6051081), + PORTCR(130, 0xe6051082), + PORTCR(131, 0xe6051083), + PORTCR(132, 0xe6051084), + PORTCR(133, 0xe6051085), + PORTCR(134, 0xe6051086), + PORTCR(160, 0xe60520A0), + PORTCR(161, 0xe60520A1), + PORTCR(162, 0xe60520A2), + PORTCR(163, 0xe60520A3), + PORTCR(164, 0xe60520A4), + PORTCR(165, 0xe60520A5), + PORTCR(166, 0xe60520A6), + PORTCR(167, 0xe60520A7), + PORTCR(168, 0xe60520A8), + PORTCR(169, 0xe60520A9), + PORTCR(170, 0xe60520AA), + PORTCR(171, 0xe60520AB), + PORTCR(172, 0xe60520AC), + PORTCR(173, 0xe60520AD), + PORTCR(174, 0xe60520AE), + PORTCR(175, 0xe60520AF), + PORTCR(176, 0xe60520B0), + PORTCR(177, 0xe60520B1), + PORTCR(178, 0xe60520B2), + PORTCR(192, 0xe60520C0), + PORTCR(193, 0xe60520C1), + PORTCR(194, 0xe60520C2), + PORTCR(195, 0xe60520C3), + PORTCR(196, 0xe60520C4), + PORTCR(197, 0xe60520C5), + PORTCR(198, 0xe60520C6), + PORTCR(199, 0xe60520C7), + PORTCR(200, 0xe60520C8), + PORTCR(201, 0xe60520C9), + PORTCR(202, 0xe60520CA), + PORTCR(203, 0xe60520CB), + PORTCR(204, 0xe60520CC), + PORTCR(205, 0xe60520CD), + PORTCR(206, 0xe60520CE), + PORTCR(207, 0xe60520CF), + PORTCR(208, 0xe60520D0), + PORTCR(209, 0xe60520D1), + PORTCR(210, 0xe60520D2), + PORTCR(211, 0xe60520D3), + PORTCR(212, 0xe60520D4), + PORTCR(213, 0xe60520D5), + PORTCR(214, 0xe60520D6), + PORTCR(215, 0xe60520D7), + PORTCR(216, 0xe60520D8), + PORTCR(217, 0xe60520D9), + PORTCR(218, 0xe60520DA), + PORTCR(219, 0xe60520DB), + PORTCR(220, 0xe60520DC), + PORTCR(221, 0xe60520DD), + PORTCR(222, 0xe60520DE), + PORTCR(224, 0xe60520E0), + PORTCR(225, 0xe60520E1), + PORTCR(226, 0xe60520E2), + PORTCR(227, 0xe60520E3), + PORTCR(228, 0xe60520E4), + PORTCR(229, 0xe60520E5), + PORTCR(230, 0xe60520e6), + PORTCR(231, 0xe60520E7), + PORTCR(232, 0xe60520E8), + PORTCR(233, 0xe60520E9), + PORTCR(234, 0xe60520EA), + PORTCR(235, 0xe60520EB), + PORTCR(236, 0xe60520EC), + PORTCR(237, 0xe60520ED), + PORTCR(238, 0xe60520EE), + PORTCR(239, 0xe60520EF), + PORTCR(240, 0xe60520F0), + PORTCR(241, 0xe60520F1), + PORTCR(242, 0xe60520F2), + PORTCR(243, 0xe60520F3), + PORTCR(244, 0xe60520F4), + PORTCR(245, 0xe60520F5), + PORTCR(246, 0xe60520F6), + PORTCR(247, 0xe60520F7), + PORTCR(248, 0xe60520F8), + PORTCR(249, 0xe60520F9), + PORTCR(250, 0xe60520FA), + PORTCR(256, 0xe6052100), + PORTCR(257, 0xe6052101), + PORTCR(258, 0xe6052102), + PORTCR(259, 0xe6052103), + PORTCR(260, 0xe6052104), + PORTCR(261, 0xe6052105), + PORTCR(262, 0xe6052106), + PORTCR(263, 0xe6052107), + PORTCR(264, 0xe6052108), + PORTCR(265, 0xe6052109), + PORTCR(266, 0xe605210A), + PORTCR(267, 0xe605210B), + PORTCR(268, 0xe605210C), + PORTCR(269, 0xe605210D), + PORTCR(270, 0xe605210E), + PORTCR(271, 0xe605210F), + PORTCR(272, 0xe6052110), + PORTCR(273, 0xe6052111), + PORTCR(274, 0xe6052112), + PORTCR(275, 0xe6052113), + PORTCR(276, 0xe6052114), + PORTCR(277, 0xe6052115), + PORTCR(278, 0xe6052116), + PORTCR(279, 0xe6052117), + PORTCR(280, 0xe6052118), + PORTCR(281, 0xe6052119), + PORTCR(282, 0xe605211A), + PORTCR(283, 0xe605211B), + PORTCR(288, 0xe6053120), + PORTCR(289, 0xe6053121), + PORTCR(290, 0xe6053122), + PORTCR(291, 0xe6053123), + PORTCR(292, 0xe6053124), + PORTCR(293, 0xe6053125), + PORTCR(294, 0xe6053126), + PORTCR(295, 0xe6053127), + PORTCR(296, 0xe6053128), + PORTCR(297, 0xe6053129), + PORTCR(298, 0xe605312A), + PORTCR(299, 0xe605312B), + PORTCR(300, 0xe605312C), + PORTCR(301, 0xe605312D), + PORTCR(302, 0xe605312E), + PORTCR(303, 0xe605312F), + PORTCR(304, 0xe6053130), + PORTCR(305, 0xe6053131), + PORTCR(306, 0xe6053132), + PORTCR(307, 0xe6053133), + PORTCR(308, 0xe6053134), + PORTCR(320, 0xe6053140), + PORTCR(321, 0xe6053141), + PORTCR(322, 0xe6053142), + PORTCR(323, 0xe6053143), + PORTCR(324, 0xe6053144), + PORTCR(325, 0xe6053145), + PORTCR(326, 0xe6053146), + PORTCR(327, 0xe6053147), + PORTCR(328, 0xe6053148), + PORTCR(329, 0xe6053149), + + { PINMUX_CFG_REG("MSEL1CR", 0xe605800c, 32, 1) { + MSEL1CR_31_0, MSEL1CR_31_1, + 0, 0, + 0, 0, + 0, 0, + MSEL1CR_27_0, MSEL1CR_27_1, + 0, 0, + MSEL1CR_25_0, MSEL1CR_25_1, + MSEL1CR_24_0, MSEL1CR_24_1, + 0, 0, + MSEL1CR_22_0, MSEL1CR_22_1, + MSEL1CR_21_0, MSEL1CR_21_1, + MSEL1CR_20_0, MSEL1CR_20_1, + MSEL1CR_19_0, MSEL1CR_19_1, + MSEL1CR_18_0, MSEL1CR_18_1, + MSEL1CR_17_0, MSEL1CR_17_1, + MSEL1CR_16_0, MSEL1CR_16_1, + MSEL1CR_15_0, MSEL1CR_15_1, + MSEL1CR_14_0, MSEL1CR_14_1, + MSEL1CR_13_0, MSEL1CR_13_1, + MSEL1CR_12_0, MSEL1CR_12_1, + MSEL1CR_11_0, MSEL1CR_11_1, + MSEL1CR_10_0, MSEL1CR_10_1, + MSEL1CR_09_0, MSEL1CR_09_1, + MSEL1CR_08_0, MSEL1CR_08_1, + MSEL1CR_07_0, MSEL1CR_07_1, + MSEL1CR_06_0, MSEL1CR_06_1, + MSEL1CR_05_0, MSEL1CR_05_1, + MSEL1CR_04_0, MSEL1CR_04_1, + MSEL1CR_03_0, MSEL1CR_03_1, + MSEL1CR_02_0, MSEL1CR_02_1, + MSEL1CR_01_0, MSEL1CR_01_1, + MSEL1CR_00_0, MSEL1CR_00_1, + } + }, + { PINMUX_CFG_REG("MSEL3CR", 0xe6058020, 32, 1) { + MSEL3CR_31_0, MSEL3CR_31_1, + 0, 0, + 0, 0, + MSEL3CR_28_0, MSEL3CR_28_1, + MSEL3CR_27_0, MSEL3CR_27_1, + MSEL3CR_26_0, MSEL3CR_26_1, + 0, 0, + 0, 0, + MSEL3CR_23_0, MSEL3CR_23_1, + MSEL3CR_22_0, MSEL3CR_22_1, + MSEL3CR_21_0, MSEL3CR_21_1, + MSEL3CR_20_0, MSEL3CR_20_1, + MSEL3CR_19_0, MSEL3CR_19_1, + MSEL3CR_18_0, MSEL3CR_18_1, + MSEL3CR_17_0, MSEL3CR_17_1, + MSEL3CR_16_0, MSEL3CR_16_1, + MSEL3CR_15_0, MSEL3CR_15_1, + 0, 0, + 0, 0, + MSEL3CR_12_0, MSEL3CR_12_1, + MSEL3CR_11_0, MSEL3CR_11_1, + MSEL3CR_10_0, MSEL3CR_10_1, + MSEL3CR_09_0, MSEL3CR_09_1, + 0, 0, + 0, 0, + MSEL3CR_06_0, MSEL3CR_06_1, + 0, 0, + 0, 0, + MSEL3CR_03_0, MSEL3CR_03_1, + 0, 0, + MSEL3CR_01_0, MSEL3CR_01_1, + MSEL3CR_00_0, MSEL3CR_00_1, + } + }, + { PINMUX_CFG_REG("MSEL4CR", 0xe6058024, 32, 1) { + 0, 0, + MSEL4CR_30_0, MSEL4CR_30_1, + MSEL4CR_29_0, MSEL4CR_29_1, + MSEL4CR_28_0, MSEL4CR_28_1, + MSEL4CR_27_0, MSEL4CR_27_1, + MSEL4CR_26_0, MSEL4CR_26_1, + MSEL4CR_25_0, MSEL4CR_25_1, + MSEL4CR_24_0, MSEL4CR_24_1, + MSEL4CR_23_0, MSEL4CR_23_1, + MSEL4CR_22_0, MSEL4CR_22_1, + MSEL4CR_21_0, MSEL4CR_21_1, + MSEL4CR_20_0, MSEL4CR_20_1, + MSEL4CR_19_0, MSEL4CR_19_1, + MSEL4CR_18_0, MSEL4CR_18_1, + MSEL4CR_17_0, MSEL4CR_17_1, + MSEL4CR_16_0, MSEL4CR_16_1, + MSEL4CR_15_0, MSEL4CR_15_1, + MSEL4CR_14_0, MSEL4CR_14_1, + MSEL4CR_13_0, MSEL4CR_13_1, + MSEL4CR_12_0, MSEL4CR_12_1, + MSEL4CR_11_0, MSEL4CR_11_1, + MSEL4CR_10_0, MSEL4CR_10_1, + MSEL4CR_09_0, MSEL4CR_09_1, + 0, 0, + MSEL4CR_07_0, MSEL4CR_07_1, + 0, 0, + 0, 0, + MSEL4CR_04_0, MSEL4CR_04_1, + 0, 0, + 0, 0, + MSEL4CR_01_0, MSEL4CR_01_1, + 0, 0, + } + }, + { PINMUX_CFG_REG("MSEL5CR", 0xe6058028, 32, 1) { + MSEL5CR_31_0, MSEL5CR_31_1, + MSEL5CR_30_0, MSEL5CR_30_1, + MSEL5CR_29_0, MSEL5CR_29_1, + MSEL5CR_28_0, MSEL5CR_28_1, + MSEL5CR_27_0, MSEL5CR_27_1, + MSEL5CR_26_0, MSEL5CR_26_1, + MSEL5CR_25_0, MSEL5CR_25_1, + MSEL5CR_24_0, MSEL5CR_24_1, + MSEL5CR_23_0, MSEL5CR_23_1, + MSEL5CR_22_0, MSEL5CR_22_1, + MSEL5CR_21_0, MSEL5CR_21_1, + MSEL5CR_20_0, MSEL5CR_20_1, + MSEL5CR_19_0, MSEL5CR_19_1, + MSEL5CR_18_0, MSEL5CR_18_1, + MSEL5CR_17_0, MSEL5CR_17_1, + MSEL5CR_16_0, MSEL5CR_16_1, + MSEL5CR_15_0, MSEL5CR_15_1, + MSEL5CR_14_0, MSEL5CR_14_1, + MSEL5CR_13_0, MSEL5CR_13_1, + MSEL5CR_12_0, MSEL5CR_12_1, + MSEL5CR_11_0, MSEL5CR_11_1, + MSEL5CR_10_0, MSEL5CR_10_1, + MSEL5CR_09_0, MSEL5CR_09_1, + MSEL5CR_08_0, MSEL5CR_08_1, + MSEL5CR_07_0, MSEL5CR_07_1, + MSEL5CR_06_0, MSEL5CR_06_1, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + } + }, + { PINMUX_CFG_REG("MSEL8CR", 0xe6058034, 32, 1) { + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + MSEL8CR_16_0, MSEL8CR_16_1, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + MSEL8CR_01_0, MSEL8CR_01_1, + MSEL8CR_00_0, MSEL8CR_00_1, + } + }, + { }, +}; + +static const struct pinmux_data_reg pinmux_data_regs[] = { + + { PINMUX_DATA_REG("PORTL031_000DR", 0xe6054000, 32) { + 0, PORT30_DATA, PORT29_DATA, PORT28_DATA, + PORT27_DATA, PORT26_DATA, PORT25_DATA, PORT24_DATA, + PORT23_DATA, PORT22_DATA, PORT21_DATA, PORT20_DATA, + PORT19_DATA, PORT18_DATA, PORT17_DATA, PORT16_DATA, + PORT15_DATA, PORT14_DATA, PORT13_DATA, PORT12_DATA, + PORT11_DATA, PORT10_DATA, PORT9_DATA, PORT8_DATA, + PORT7_DATA, PORT6_DATA, PORT5_DATA, PORT4_DATA, + PORT3_DATA, PORT2_DATA, PORT1_DATA, PORT0_DATA, + } + }, + { PINMUX_DATA_REG("PORTD063_032DR", 0xe6055000, 32) { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, PORT40_DATA, + PORT39_DATA, PORT38_DATA, PORT37_DATA, PORT36_DATA, + PORT35_DATA, PORT34_DATA, PORT33_DATA, PORT32_DATA, + } + }, + { PINMUX_DATA_REG("PORTL095_064DR", 0xe6054004, 32) { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, PORT85_DATA, PORT84_DATA, + PORT83_DATA, PORT82_DATA, PORT81_DATA, PORT80_DATA, + PORT79_DATA, PORT78_DATA, PORT77_DATA, PORT76_DATA, + PORT75_DATA, PORT74_DATA, PORT73_DATA, PORT72_DATA, + PORT71_DATA, PORT70_DATA, PORT69_DATA, PORT68_DATA, + PORT67_DATA, PORT66_DATA, PORT65_DATA, PORT64_DATA, + } + }, + { PINMUX_DATA_REG("PORTD127_096DR", 0xe6055004, 32) { + 0, PORT126_DATA, PORT125_DATA, PORT124_DATA, + PORT123_DATA, PORT122_DATA, PORT121_DATA, PORT120_DATA, + PORT119_DATA, PORT118_DATA, PORT117_DATA, PORT116_DATA, + PORT115_DATA, PORT114_DATA, PORT113_DATA, PORT112_DATA, + PORT111_DATA, PORT110_DATA, PORT109_DATA, PORT108_DATA, + PORT107_DATA, PORT106_DATA, PORT105_DATA, PORT104_DATA, + PORT103_DATA, PORT102_DATA, PORT101_DATA, PORT100_DATA, + PORT99_DATA, PORT98_DATA, PORT97_DATA, PORT96_DATA, + } + }, + { PINMUX_DATA_REG("PORTD159_128DR", 0xe6055008, 32) { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, PORT134_DATA, PORT133_DATA, PORT132_DATA, + PORT131_DATA, PORT130_DATA, PORT129_DATA, PORT128_DATA, + } + }, + { PINMUX_DATA_REG("PORTR191_160DR", 0xe6056000, 32) { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, PORT178_DATA, PORT177_DATA, PORT176_DATA, + PORT175_DATA, PORT174_DATA, PORT173_DATA, PORT172_DATA, + PORT171_DATA, PORT170_DATA, PORT169_DATA, PORT168_DATA, + PORT167_DATA, PORT166_DATA, PORT165_DATA, PORT164_DATA, + PORT163_DATA, PORT162_DATA, PORT161_DATA, PORT160_DATA, + } + }, + { PINMUX_DATA_REG("PORTR223_192DR", 0xe6056004, 32) { + 0, PORT222_DATA, PORT221_DATA, PORT220_DATA, + PORT219_DATA, PORT218_DATA, PORT217_DATA, PORT216_DATA, + PORT215_DATA, PORT214_DATA, PORT213_DATA, PORT212_DATA, + PORT211_DATA, PORT210_DATA, PORT209_DATA, PORT208_DATA, + PORT207_DATA, PORT206_DATA, PORT205_DATA, PORT204_DATA, + PORT203_DATA, PORT202_DATA, PORT201_DATA, PORT200_DATA, + PORT199_DATA, PORT198_DATA, PORT197_DATA, PORT196_DATA, + PORT195_DATA, PORT194_DATA, PORT193_DATA, PORT192_DATA, + } + }, + { PINMUX_DATA_REG("PORTR255_224DR", 0xe6056008, 32) { + 0, 0, 0, 0, + 0, PORT250_DATA, PORT249_DATA, PORT248_DATA, + PORT247_DATA, PORT246_DATA, PORT245_DATA, PORT244_DATA, + PORT243_DATA, PORT242_DATA, PORT241_DATA, PORT240_DATA, + PORT239_DATA, PORT238_DATA, PORT237_DATA, PORT236_DATA, + PORT235_DATA, PORT234_DATA, PORT233_DATA, PORT232_DATA, + PORT231_DATA, PORT230_DATA, PORT229_DATA, PORT228_DATA, + PORT227_DATA, PORT226_DATA, PORT225_DATA, PORT224_DATA, + } + }, + { PINMUX_DATA_REG("PORTR287_256DR", 0xe605600C, 32) { + 0, 0, 0, 0, + PORT283_DATA, PORT282_DATA, PORT281_DATA, PORT280_DATA, + PORT279_DATA, PORT278_DATA, PORT277_DATA, PORT276_DATA, + PORT275_DATA, PORT274_DATA, PORT273_DATA, PORT272_DATA, + PORT271_DATA, PORT270_DATA, PORT269_DATA, PORT268_DATA, + PORT267_DATA, PORT266_DATA, PORT265_DATA, PORT264_DATA, + PORT263_DATA, PORT262_DATA, PORT261_DATA, PORT260_DATA, + PORT259_DATA, PORT258_DATA, PORT257_DATA, PORT256_DATA, + } + }, + { PINMUX_DATA_REG("PORTU319_288DR", 0xe6057000, 32) { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, PORT308_DATA, + PORT307_DATA, PORT306_DATA, PORT305_DATA, PORT304_DATA, + PORT303_DATA, PORT302_DATA, PORT301_DATA, PORT300_DATA, + PORT299_DATA, PORT298_DATA, PORT297_DATA, PORT296_DATA, + PORT295_DATA, PORT294_DATA, PORT293_DATA, PORT292_DATA, + PORT291_DATA, PORT290_DATA, PORT289_DATA, PORT288_DATA, + } + }, + { PINMUX_DATA_REG("PORTU351_320DR", 0xe6057004, 32) { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, PORT329_DATA, PORT328_DATA, + PORT327_DATA, PORT326_DATA, PORT325_DATA, PORT324_DATA, + PORT323_DATA, PORT322_DATA, PORT321_DATA, PORT320_DATA, + } + }, + { }, +}; + +const struct sh_pfc_soc_info r8a73a4_pinmux_info = { + .name = "r8a73a4_pfc", + + .input = { PINMUX_INPUT_BEGIN, PINMUX_INPUT_END }, + .input_pu = { PINMUX_INPUT_PULLUP_BEGIN, PINMUX_INPUT_PULLUP_END }, + .input_pd = { PINMUX_INPUT_PULLDOWN_BEGIN, PINMUX_INPUT_PULLDOWN_END }, + .output = { PINMUX_OUTPUT_BEGIN, PINMUX_OUTPUT_END }, + .function = { PINMUX_FUNCTION_BEGIN, PINMUX_FUNCTION_END }, + + .pins = pinmux_pins, + .nr_pins = ARRAY_SIZE(pinmux_pins), + .func_gpios = pinmux_func_gpios, + .nr_func_gpios = ARRAY_SIZE(pinmux_func_gpios), + + .cfg_regs = pinmux_config_regs, + .data_regs = pinmux_data_regs, + + .gpio_data = pinmux_data, + .gpio_data_size = ARRAY_SIZE(pinmux_data), +}; -- GitLab From f365bfcc8723e41b9110c5e7eb292a39b26ad8ba Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Mar 2013 22:49:59 +0900 Subject: [PATCH 0369/3163] sh-pfc: r8a73a4: Support sparse GPIO numbers The r8a73a4 SoC has sparse GPIO numbers. Declare ranges for pin numbers in the PFC SoC data. Pin numbers shall be used with the GPIO API from this point on. Signed-off-by: Magnus Damm Acked-by: Linus Walleij Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/include/mach/r8a73a4.h | 2 +- drivers/pinctrl/sh-pfc/pfc-r8a73a4.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/include/mach/r8a73a4.h b/arch/arm/mach-shmobile/include/mach/r8a73a4.h index f0b1b4a962b3..2d4af4af3634 100644 --- a/arch/arm/mach-shmobile/include/mach/r8a73a4.h +++ b/arch/arm/mach-shmobile/include/mach/r8a73a4.h @@ -86,7 +86,7 @@ enum { GPIO_PORT325, GPIO_PORT326, GPIO_PORT327, GPIO_PORT328, GPIO_PORT329, /* Port0 */ - GPIO_FN_LCDD0, + GPIO_FN_LCDD0 = 330, GPIO_FN_PDM2_CLK_0, GPIO_FN_DU0_DR0, GPIO_FN_IRQ0, diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c index 47d75d5548eb..5dd68fb96274 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c @@ -1424,6 +1424,20 @@ static struct sh_pfc_pin pinmux_pins[] = { GPIO_PORT_ALL(), }; +static const struct pinmux_range pinmux_ranges[] = { + {.begin = 0, .end = 30,}, + {.begin = 32, .end = 40,}, + {.begin = 64, .end = 85,}, + {.begin = 96, .end = 126,}, + {.begin = 128, .end = 134,}, + {.begin = 160, .end = 178,}, + {.begin = 192, .end = 222,}, + {.begin = 224, .end = 250,}, + {.begin = 256, .end = 283,}, + {.begin = 288, .end = 308,}, + {.begin = 320, .end = 329,}, +}; + #define PINMUX_FN_BASE ARRAY_SIZE(pinmux_pins) static const struct pinmux_func pinmux_func_gpios[] = { @@ -2815,6 +2829,8 @@ const struct sh_pfc_soc_info r8a73a4_pinmux_info = { .pins = pinmux_pins, .nr_pins = ARRAY_SIZE(pinmux_pins), + .ranges = pinmux_ranges, + .nr_ranges = ARRAY_SIZE(pinmux_ranges), .func_gpios = pinmux_func_gpios, .nr_func_gpios = ARRAY_SIZE(pinmux_func_gpios), -- GitLab From c96931ca88b583a2d7c8bb02a33871c982bd5b68 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Mar 2013 22:50:09 +0900 Subject: [PATCH 0370/3163] sh-pfc: r8a73a4: GPIO IRQ support V2 of code to add GPIO -> IRQ mappings to the PFC table for the r8a73a4 SoC. Requires the IRQs to be mapped at a fixed location in Linux IRQ space. The actual IRQs are not handled by the PFC, instead IRQC is used on r8a73a4. Signed-off-by: Magnus Damm Acked-by: Linus Walleij Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/pfc-r8a73a4.c | 63 ++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c index 5dd68fb96274..01773291ea91 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c @@ -2818,6 +2818,66 @@ static const struct pinmux_data_reg pinmux_data_regs[] = { { }, }; +static const struct pinmux_irq pinmux_irqs[] = { + PINMUX_IRQ(irq_pin(0), 0), + PINMUX_IRQ(irq_pin(1), 1), + PINMUX_IRQ(irq_pin(2), 2), + PINMUX_IRQ(irq_pin(3), 3), + PINMUX_IRQ(irq_pin(4), 4), + PINMUX_IRQ(irq_pin(5), 5), + PINMUX_IRQ(irq_pin(6), 6), + PINMUX_IRQ(irq_pin(7), 7), + PINMUX_IRQ(irq_pin(8), 8), + PINMUX_IRQ(irq_pin(9), 9), + PINMUX_IRQ(irq_pin(10), 10), + PINMUX_IRQ(irq_pin(11), 11), + PINMUX_IRQ(irq_pin(12), 12), + PINMUX_IRQ(irq_pin(13), 13), + PINMUX_IRQ(irq_pin(14), 14), + PINMUX_IRQ(irq_pin(15), 15), + PINMUX_IRQ(irq_pin(16), 320), + PINMUX_IRQ(irq_pin(17), 321), + PINMUX_IRQ(irq_pin(18), 85), + PINMUX_IRQ(irq_pin(19), 84), + PINMUX_IRQ(irq_pin(20), 160), + PINMUX_IRQ(irq_pin(21), 161), + PINMUX_IRQ(irq_pin(22), 162), + PINMUX_IRQ(irq_pin(23), 163), + PINMUX_IRQ(irq_pin(24), 175), + PINMUX_IRQ(irq_pin(25), 176), + PINMUX_IRQ(irq_pin(26), 177), + PINMUX_IRQ(irq_pin(27), 178), + PINMUX_IRQ(irq_pin(28), 322), + PINMUX_IRQ(irq_pin(29), 323), + PINMUX_IRQ(irq_pin(30), 324), + PINMUX_IRQ(irq_pin(31), 192), + PINMUX_IRQ(irq_pin(32), 193), + PINMUX_IRQ(irq_pin(33), 194), + PINMUX_IRQ(irq_pin(34), 195), + PINMUX_IRQ(irq_pin(35), 196), + PINMUX_IRQ(irq_pin(36), 197), + PINMUX_IRQ(irq_pin(37), 198), + PINMUX_IRQ(irq_pin(38), 199), + PINMUX_IRQ(irq_pin(39), 200), + PINMUX_IRQ(irq_pin(40), 66), + PINMUX_IRQ(irq_pin(41), 102), + PINMUX_IRQ(irq_pin(42), 103), + PINMUX_IRQ(irq_pin(43), 109), + PINMUX_IRQ(irq_pin(44), 110), + PINMUX_IRQ(irq_pin(45), 111), + PINMUX_IRQ(irq_pin(46), 112), + PINMUX_IRQ(irq_pin(47), 113), + PINMUX_IRQ(irq_pin(48), 114), + PINMUX_IRQ(irq_pin(49), 115), + PINMUX_IRQ(irq_pin(50), 301), + PINMUX_IRQ(irq_pin(51), 290), + PINMUX_IRQ(irq_pin(52), 296), + PINMUX_IRQ(irq_pin(53), 325), + PINMUX_IRQ(irq_pin(54), 326), + PINMUX_IRQ(irq_pin(55), 327), + PINMUX_IRQ(irq_pin(56), 328), + PINMUX_IRQ(irq_pin(57), 329), +}; const struct sh_pfc_soc_info r8a73a4_pinmux_info = { .name = "r8a73a4_pfc", @@ -2839,4 +2899,7 @@ const struct sh_pfc_soc_info r8a73a4_pinmux_info = { .gpio_data = pinmux_data, .gpio_data_size = ARRAY_SIZE(pinmux_data), + + .gpio_irq = pinmux_irqs, + .gpio_irq_size = ARRAY_SIZE(pinmux_irqs), }; -- GitLab From 57ef73b469e7e6f7d15e5467649c787bc7070819 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Mar 2013 22:50:27 +0900 Subject: [PATCH 0371/3163] sh-pfc: r8a73a4: Add bias (pull-up/down) pinconf support Implement pull-up/down support for r8a73a4 similar to the implementation for sh73a0. Signed-off-by: Magnus Damm Acked-by: Linus Walleij Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/pfc-r8a73a4.c | 197 ++++++++++++++++++++++++++- 1 file changed, 196 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c index 01773291ea91..86a5fd33a479 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c @@ -17,10 +17,13 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include #include +#include #include #include +#include "core.h" #include "sh_pfc.h" #define CPU_ALL_PORT(fn, pfx, sfx) \ @@ -1420,8 +1423,143 @@ static const pinmux_enum_t pinmux_data[] = { PINMUX_DATA(IRQ57_MARK, PORT329_FN0), }; +#define R8A73A4_PIN(pin, cfgs) \ + { \ + .name = __stringify(PORT##pin), \ + .enum_id = PORT##pin##_DATA, \ + .configs = cfgs, \ + } + +#define __O (SH_PFC_PIN_CFG_OUTPUT) +#define __IO (SH_PFC_PIN_CFG_INPUT | SH_PFC_PIN_CFG_OUTPUT) +#define __PUD (SH_PFC_PIN_CFG_PULL_DOWN | SH_PFC_PIN_CFG_PULL_UP) + +#define R8A73A4_PIN_IO_PU_PD(pin) R8A73A4_PIN(pin, __IO | __PUD) +#define R8A73A4_PIN_O(pin) R8A73A4_PIN(pin, __O) + static struct sh_pfc_pin pinmux_pins[] = { - GPIO_PORT_ALL(), + R8A73A4_PIN_IO_PU_PD(0), R8A73A4_PIN_IO_PU_PD(1), + R8A73A4_PIN_IO_PU_PD(2), R8A73A4_PIN_IO_PU_PD(3), + R8A73A4_PIN_IO_PU_PD(4), R8A73A4_PIN_IO_PU_PD(5), + R8A73A4_PIN_IO_PU_PD(6), R8A73A4_PIN_IO_PU_PD(7), + R8A73A4_PIN_IO_PU_PD(8), R8A73A4_PIN_IO_PU_PD(9), + R8A73A4_PIN_IO_PU_PD(10), R8A73A4_PIN_IO_PU_PD(11), + R8A73A4_PIN_IO_PU_PD(12), R8A73A4_PIN_IO_PU_PD(13), + R8A73A4_PIN_IO_PU_PD(14), R8A73A4_PIN_IO_PU_PD(15), + R8A73A4_PIN_IO_PU_PD(16), R8A73A4_PIN_IO_PU_PD(17), + R8A73A4_PIN_IO_PU_PD(18), R8A73A4_PIN_IO_PU_PD(19), + R8A73A4_PIN_IO_PU_PD(20), R8A73A4_PIN_IO_PU_PD(21), + R8A73A4_PIN_IO_PU_PD(22), R8A73A4_PIN_IO_PU_PD(23), + R8A73A4_PIN_IO_PU_PD(24), R8A73A4_PIN_IO_PU_PD(25), + R8A73A4_PIN_IO_PU_PD(26), R8A73A4_PIN_IO_PU_PD(27), + R8A73A4_PIN_IO_PU_PD(28), R8A73A4_PIN_IO_PU_PD(29), + R8A73A4_PIN_IO_PU_PD(30), + R8A73A4_PIN_IO_PU_PD(32), R8A73A4_PIN_IO_PU_PD(33), + R8A73A4_PIN_IO_PU_PD(34), R8A73A4_PIN_IO_PU_PD(35), + R8A73A4_PIN_IO_PU_PD(36), R8A73A4_PIN_IO_PU_PD(37), + R8A73A4_PIN_IO_PU_PD(38), R8A73A4_PIN_IO_PU_PD(39), + R8A73A4_PIN_IO_PU_PD(40), + R8A73A4_PIN_IO_PU_PD(64), R8A73A4_PIN_IO_PU_PD(65), + R8A73A4_PIN_IO_PU_PD(66), R8A73A4_PIN_IO_PU_PD(67), + R8A73A4_PIN_IO_PU_PD(68), R8A73A4_PIN_IO_PU_PD(69), + R8A73A4_PIN_IO_PU_PD(70), R8A73A4_PIN_IO_PU_PD(71), + R8A73A4_PIN_IO_PU_PD(72), R8A73A4_PIN_IO_PU_PD(73), + R8A73A4_PIN_O(74), R8A73A4_PIN_IO_PU_PD(75), + R8A73A4_PIN_IO_PU_PD(76), R8A73A4_PIN_IO_PU_PD(77), + R8A73A4_PIN_IO_PU_PD(78), R8A73A4_PIN_IO_PU_PD(79), + R8A73A4_PIN_IO_PU_PD(80), R8A73A4_PIN_IO_PU_PD(81), + R8A73A4_PIN_IO_PU_PD(82), R8A73A4_PIN_IO_PU_PD(83), + R8A73A4_PIN_IO_PU_PD(84), R8A73A4_PIN_IO_PU_PD(85), + R8A73A4_PIN_IO_PU_PD(96), R8A73A4_PIN_IO_PU_PD(97), + R8A73A4_PIN_IO_PU_PD(98), R8A73A4_PIN_IO_PU_PD(99), + R8A73A4_PIN_IO_PU_PD(100), R8A73A4_PIN_IO_PU_PD(101), + R8A73A4_PIN_IO_PU_PD(102), R8A73A4_PIN_IO_PU_PD(103), + R8A73A4_PIN_IO_PU_PD(104), R8A73A4_PIN_IO_PU_PD(105), + R8A73A4_PIN_IO_PU_PD(106), R8A73A4_PIN_IO_PU_PD(107), + R8A73A4_PIN_IO_PU_PD(108), R8A73A4_PIN_IO_PU_PD(109), + R8A73A4_PIN_IO_PU_PD(110), R8A73A4_PIN_IO_PU_PD(111), + R8A73A4_PIN_IO_PU_PD(112), R8A73A4_PIN_IO_PU_PD(113), + R8A73A4_PIN_IO_PU_PD(114), R8A73A4_PIN_IO_PU_PD(115), + R8A73A4_PIN_IO_PU_PD(116), R8A73A4_PIN_IO_PU_PD(117), + R8A73A4_PIN_IO_PU_PD(118), R8A73A4_PIN_IO_PU_PD(119), + R8A73A4_PIN_IO_PU_PD(120), R8A73A4_PIN_IO_PU_PD(121), + R8A73A4_PIN_IO_PU_PD(122), R8A73A4_PIN_IO_PU_PD(123), + R8A73A4_PIN_IO_PU_PD(124), R8A73A4_PIN_IO_PU_PD(125), + R8A73A4_PIN_IO_PU_PD(126), + R8A73A4_PIN_IO_PU_PD(128), R8A73A4_PIN_IO_PU_PD(129), + R8A73A4_PIN_IO_PU_PD(130), R8A73A4_PIN_IO_PU_PD(131), + R8A73A4_PIN_IO_PU_PD(132), R8A73A4_PIN_IO_PU_PD(133), + R8A73A4_PIN_IO_PU_PD(134), + R8A73A4_PIN_IO_PU_PD(160), R8A73A4_PIN_IO_PU_PD(161), + R8A73A4_PIN_IO_PU_PD(162), R8A73A4_PIN_IO_PU_PD(163), + R8A73A4_PIN_IO_PU_PD(164), R8A73A4_PIN_IO_PU_PD(165), + R8A73A4_PIN_IO_PU_PD(166), R8A73A4_PIN_IO_PU_PD(167), + R8A73A4_PIN_IO_PU_PD(168), R8A73A4_PIN_IO_PU_PD(169), + R8A73A4_PIN_IO_PU_PD(170), R8A73A4_PIN_IO_PU_PD(171), + R8A73A4_PIN_IO_PU_PD(172), R8A73A4_PIN_IO_PU_PD(173), + R8A73A4_PIN_IO_PU_PD(174), R8A73A4_PIN_IO_PU_PD(175), + R8A73A4_PIN_IO_PU_PD(176), R8A73A4_PIN_IO_PU_PD(177), + R8A73A4_PIN_IO_PU_PD(178), + R8A73A4_PIN_IO_PU_PD(192), R8A73A4_PIN_IO_PU_PD(193), + R8A73A4_PIN_IO_PU_PD(194), R8A73A4_PIN_IO_PU_PD(195), + R8A73A4_PIN_IO_PU_PD(196), R8A73A4_PIN_IO_PU_PD(197), + R8A73A4_PIN_IO_PU_PD(198), R8A73A4_PIN_IO_PU_PD(199), + R8A73A4_PIN_IO_PU_PD(200), R8A73A4_PIN_IO_PU_PD(201), + R8A73A4_PIN_IO_PU_PD(202), R8A73A4_PIN_IO_PU_PD(203), + R8A73A4_PIN_IO_PU_PD(204), R8A73A4_PIN_IO_PU_PD(205), + R8A73A4_PIN_IO_PU_PD(206), R8A73A4_PIN_IO_PU_PD(207), + R8A73A4_PIN_IO_PU_PD(208), R8A73A4_PIN_IO_PU_PD(209), + R8A73A4_PIN_IO_PU_PD(210), R8A73A4_PIN_IO_PU_PD(211), + R8A73A4_PIN_IO_PU_PD(212), R8A73A4_PIN_IO_PU_PD(213), + R8A73A4_PIN_IO_PU_PD(214), R8A73A4_PIN_IO_PU_PD(215), + R8A73A4_PIN_IO_PU_PD(216), R8A73A4_PIN_IO_PU_PD(217), + R8A73A4_PIN_IO_PU_PD(218), R8A73A4_PIN_IO_PU_PD(219), + R8A73A4_PIN_IO_PU_PD(220), R8A73A4_PIN_IO_PU_PD(221), + R8A73A4_PIN_IO_PU_PD(222), + R8A73A4_PIN_IO_PU_PD(224), R8A73A4_PIN_IO_PU_PD(225), + R8A73A4_PIN_IO_PU_PD(226), R8A73A4_PIN_IO_PU_PD(227), + R8A73A4_PIN_IO_PU_PD(228), R8A73A4_PIN_IO_PU_PD(229), + R8A73A4_PIN_IO_PU_PD(230), R8A73A4_PIN_IO_PU_PD(231), + R8A73A4_PIN_IO_PU_PD(232), R8A73A4_PIN_IO_PU_PD(233), + R8A73A4_PIN_IO_PU_PD(234), R8A73A4_PIN_IO_PU_PD(235), + R8A73A4_PIN_IO_PU_PD(236), R8A73A4_PIN_IO_PU_PD(237), + R8A73A4_PIN_IO_PU_PD(238), R8A73A4_PIN_IO_PU_PD(239), + R8A73A4_PIN_IO_PU_PD(240), R8A73A4_PIN_IO_PU_PD(241), + R8A73A4_PIN_IO_PU_PD(242), R8A73A4_PIN_IO_PU_PD(243), + R8A73A4_PIN_IO_PU_PD(244), R8A73A4_PIN_IO_PU_PD(245), + R8A73A4_PIN_IO_PU_PD(246), R8A73A4_PIN_IO_PU_PD(247), + R8A73A4_PIN_IO_PU_PD(248), R8A73A4_PIN_IO_PU_PD(249), + R8A73A4_PIN_IO_PU_PD(250), + R8A73A4_PIN_IO_PU_PD(256), R8A73A4_PIN_IO_PU_PD(257), + R8A73A4_PIN_IO_PU_PD(258), R8A73A4_PIN_IO_PU_PD(259), + R8A73A4_PIN_IO_PU_PD(260), R8A73A4_PIN_IO_PU_PD(261), + R8A73A4_PIN_IO_PU_PD(262), R8A73A4_PIN_IO_PU_PD(263), + R8A73A4_PIN_IO_PU_PD(264), R8A73A4_PIN_IO_PU_PD(265), + R8A73A4_PIN_IO_PU_PD(266), R8A73A4_PIN_IO_PU_PD(267), + R8A73A4_PIN_IO_PU_PD(268), R8A73A4_PIN_IO_PU_PD(269), + R8A73A4_PIN_IO_PU_PD(270), R8A73A4_PIN_IO_PU_PD(271), + R8A73A4_PIN_IO_PU_PD(272), R8A73A4_PIN_IO_PU_PD(273), + R8A73A4_PIN_IO_PU_PD(274), R8A73A4_PIN_IO_PU_PD(275), + R8A73A4_PIN_IO_PU_PD(276), R8A73A4_PIN_IO_PU_PD(277), + R8A73A4_PIN_IO_PU_PD(278), R8A73A4_PIN_IO_PU_PD(279), + R8A73A4_PIN_IO_PU_PD(280), R8A73A4_PIN_IO_PU_PD(281), + R8A73A4_PIN_IO_PU_PD(282), R8A73A4_PIN_IO_PU_PD(283), + R8A73A4_PIN_O(288), R8A73A4_PIN_IO_PU_PD(289), + R8A73A4_PIN_IO_PU_PD(290), R8A73A4_PIN_IO_PU_PD(291), + R8A73A4_PIN_IO_PU_PD(292), R8A73A4_PIN_IO_PU_PD(293), + R8A73A4_PIN_IO_PU_PD(294), R8A73A4_PIN_IO_PU_PD(295), + R8A73A4_PIN_IO_PU_PD(296), R8A73A4_PIN_IO_PU_PD(297), + R8A73A4_PIN_IO_PU_PD(298), R8A73A4_PIN_IO_PU_PD(299), + R8A73A4_PIN_IO_PU_PD(300), R8A73A4_PIN_IO_PU_PD(301), + R8A73A4_PIN_IO_PU_PD(302), R8A73A4_PIN_IO_PU_PD(303), + R8A73A4_PIN_IO_PU_PD(304), R8A73A4_PIN_IO_PU_PD(305), + R8A73A4_PIN_IO_PU_PD(306), R8A73A4_PIN_IO_PU_PD(307), + R8A73A4_PIN_IO_PU_PD(308), + R8A73A4_PIN_IO_PU_PD(320), R8A73A4_PIN_IO_PU_PD(321), + R8A73A4_PIN_IO_PU_PD(322), R8A73A4_PIN_IO_PU_PD(323), + R8A73A4_PIN_IO_PU_PD(324), R8A73A4_PIN_IO_PU_PD(325), + R8A73A4_PIN_IO_PU_PD(326), R8A73A4_PIN_IO_PU_PD(327), + R8A73A4_PIN_IO_PU_PD(328), R8A73A4_PIN_IO_PU_PD(329), }; static const struct pinmux_range pinmux_ranges[] = { @@ -2878,8 +3016,65 @@ static const struct pinmux_irq pinmux_irqs[] = { PINMUX_IRQ(irq_pin(56), 328), PINMUX_IRQ(irq_pin(57), 329), }; + +#define PORTCR_PULMD_OFF (0 << 6) +#define PORTCR_PULMD_DOWN (2 << 6) +#define PORTCR_PULMD_UP (3 << 6) +#define PORTCR_PULMD_MASK (3 << 6) + +static const unsigned int r8a73a4_portcr_offsets[] = { + 0x00000000, 0x00001000, 0x00000000, 0x00001000, + 0x00001000, 0x00002000, 0x00002000, 0x00002000, + 0x00002000, 0x00003000, 0x00003000, +}; + +static unsigned int r8a73a4_pinmux_get_bias(struct sh_pfc *pfc, + unsigned int pin) +{ + void __iomem *addr; + + addr = pfc->window->virt + r8a73a4_portcr_offsets[pin >> 5] + pin; + + switch (ioread8(addr) & PORTCR_PULMD_MASK) { + case PORTCR_PULMD_UP: + return PIN_CONFIG_BIAS_PULL_UP; + case PORTCR_PULMD_DOWN: + return PIN_CONFIG_BIAS_PULL_DOWN; + case PORTCR_PULMD_OFF: + default: + return PIN_CONFIG_BIAS_DISABLE; + } +} + +static void r8a73a4_pinmux_set_bias(struct sh_pfc *pfc, unsigned int pin, + unsigned int bias) +{ + void __iomem *addr; + u32 value; + + addr = pfc->window->virt + r8a73a4_portcr_offsets[pin >> 5] + pin; + value = ioread8(addr) & ~PORTCR_PULMD_MASK; + + switch (bias) { + case PIN_CONFIG_BIAS_PULL_UP: + value |= PORTCR_PULMD_UP; + break; + case PIN_CONFIG_BIAS_PULL_DOWN: + value |= PORTCR_PULMD_DOWN; + break; + } + + iowrite8(value, addr); +} + +static const struct sh_pfc_soc_operations r8a73a4_pinmux_ops = { + .get_bias = r8a73a4_pinmux_get_bias, + .set_bias = r8a73a4_pinmux_set_bias, +}; + const struct sh_pfc_soc_info r8a73a4_pinmux_info = { .name = "r8a73a4_pfc", + .ops = &r8a73a4_pinmux_ops, .input = { PINMUX_INPUT_BEGIN, PINMUX_INPUT_END }, .input_pu = { PINMUX_INPUT_PULLUP_BEGIN, PINMUX_INPUT_PULLUP_END }, -- GitLab From 172fd616dd46a181be5c9c17bd0a84dd8ae0ce94 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Mar 2013 22:50:36 +0900 Subject: [PATCH 0372/3163] sh-pfc: r8a73a4: Add SCIF pin groups and functions Add PINCTRL support for r8a73a4 SCIF ports SCIFA0->SCIFA1 and SCIFB0->SCIFB3. Signed-off-by: Magnus Damm Acked-by: Linus Walleij Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/pfc-r8a73a4.c | 287 +++++++++++++++++++++++++++ 1 file changed, 287 insertions(+) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c index 86a5fd33a479..cd5c5fd74ba4 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c @@ -1576,6 +1576,286 @@ static const struct pinmux_range pinmux_ranges[] = { {.begin = 320, .end = 329,}, }; +/* - SCIFA0 ----------------------------------------------------------------- */ +static const unsigned int scifa0_data_pins[] = { + /* SCIFA0_RXD, SCIFA0_TXD */ + 117, 116, +}; +static const unsigned int scifa0_data_mux[] = { + SCIFA0_RXD_MARK, SCIFA0_TXD_MARK, +}; +static const unsigned int scifa0_clk_pins[] = { + /* SCIFA0_SCK */ + 34, +}; +static const unsigned int scifa0_clk_mux[] = { + SCIFA0_SCK_MARK, +}; +static const unsigned int scifa0_ctrl_pins[] = { + /* SCIFA0_RTS, SCIFA0_CTS */ + 32, 33, +}; +static const unsigned int scifa0_ctrl_mux[] = { + SCIFA0_RTS_MARK, SCIFA0_CTS_MARK, +}; +/* - SCIFA1 ----------------------------------------------------------------- */ +static const unsigned int scifa1_data_pins[] = { + /* SCIFA1_RXD, SCIFA1_TXD */ + 119, 118, +}; +static const unsigned int scifa1_data_mux[] = { + SCIFA1_RXD_MARK, SCIFA1_TXD_MARK, +}; +static const unsigned int scifa1_clk_pins[] = { + /* SCIFA1_SCK */ + 37, +}; +static const unsigned int scifa1_clk_mux[] = { + SCIFA1_SCK_MARK, +}; +static const unsigned int scifa1_ctrl_pins[] = { + /* SCIFA1_RTS, SCIFA1_CTS */ + 35, 36, +}; +static const unsigned int scifa1_ctrl_mux[] = { + SCIFA1_RTS_MARK, SCIFA1_CTS_MARK, +}; +/* - SCIFB0 ----------------------------------------------------------------- */ +static const unsigned int scifb0_data_pins[] = { + /* SCIFB0_RXD, SCIFB0_TXD */ + 123, 122, +}; +static const unsigned int scifb0_data_mux[] = { + SCIFB0_RXD_MARK, SCIFB0_TXD_MARK, +}; +static const unsigned int scifb0_clk_pins[] = { + /* SCIFB0_SCK */ + 40, +}; +static const unsigned int scifb0_clk_mux[] = { + SCIFB0_SCK_MARK, +}; +static const unsigned int scifb0_ctrl_pins[] = { + /* SCIFB0_RTS, SCIFB0_CTS */ + 38, 39, +}; +static const unsigned int scifb0_ctrl_mux[] = { + SCIFB0_RTS_MARK, SCIFB0_CTS_MARK, +}; +/* - SCIFB1 ----------------------------------------------------------------- */ +static const unsigned int scifb1_data_pins[] = { + /* SCIFB1_RXD, SCIFB1_TXD */ + 27, 26, +}; +static const unsigned int scifb1_data_mux[] = { + SCIFB1_RXD_27_MARK, SCIFB1_TXD_26_MARK, +}; +static const unsigned int scifb1_clk_pins[] = { + /* SCIFB1_SCK */ + 28, +}; +static const unsigned int scifb1_clk_mux[] = { + SCIFB1_SCK_28_MARK, +}; +static const unsigned int scifb1_ctrl_pins[] = { + /* SCIFB1_RTS, SCIFB1_CTS */ + 24, 25, +}; +static const unsigned int scifb1_ctrl_mux[] = { + SCIFB1_RTS_24_MARK, SCIFB1_CTS_25_MARK, +}; +static const unsigned int scifb1_data_b_pins[] = { + /* SCIFB1_RXD, SCIFB1_TXD */ + 72, 67, +}; +static const unsigned int scifb1_data_b_mux[] = { + SCIFB1_RXD_72_MARK, SCIFB1_TXD_67_MARK, +}; +static const unsigned int scifb1_clk_b_pins[] = { + /* SCIFB1_SCK */ + 261, +}; +static const unsigned int scifb1_clk_b_mux[] = { + SCIFB1_SCK_261_MARK, +}; +static const unsigned int scifb1_ctrl_b_pins[] = { + /* SCIFB1_RTS, SCIFB1_CTS */ + 70, 71, +}; +static const unsigned int scifb1_ctrl_b_mux[] = { + SCIFB1_RTS_70_MARK, SCIFB1_CTS_71_MARK, +}; +/* - SCIFB2 ----------------------------------------------------------------- */ +static const unsigned int scifb2_data_pins[] = { + /* SCIFB2_RXD, SCIFB2_TXD */ + 69, 68, +}; +static const unsigned int scifb2_data_mux[] = { + SCIFB2_RXD_69_MARK, SCIFB2_TXD_68_MARK, +}; +static const unsigned int scifb2_clk_pins[] = { + /* SCIFB2_SCK */ + 262, +}; +static const unsigned int scifb2_clk_mux[] = { + SCIFB2_SCK_262_MARK, +}; +static const unsigned int scifb2_ctrl_pins[] = { + /* SCIFB2_RTS, SCIFB2_CTS */ + 73, 66, +}; +static const unsigned int scifb2_ctrl_mux[] = { + SCIFB2_RTS_73_MARK, SCIFB2_CTS_66_MARK, +}; +static const unsigned int scifb2_data_b_pins[] = { + /* SCIFB2_RXD, SCIFB2_TXD */ + 297, 295, +}; +static const unsigned int scifb2_data_b_mux[] = { + SCIFB2_RXD_297_MARK, SCIFB2_TXD_295_MARK, +}; +static const unsigned int scifb2_clk_b_pins[] = { + /* SCIFB2_SCK */ + 299, +}; +static const unsigned int scifb2_clk_b_mux[] = { + SCIFB2_SCK_299_MARK, +}; +static const unsigned int scifb2_ctrl_b_pins[] = { + /* SCIFB2_RTS, SCIFB2_CTS */ + 300, 298, +}; +static const unsigned int scifb2_ctrl_b_mux[] = { + SCIFB2_RTS_300_MARK, SCIFB2_CTS_298_MARK, +}; +/* - SCIFB3 ----------------------------------------------------------------- */ +static const unsigned int scifb3_data_pins[] = { + /* SCIFB3_RXD, SCIFB3_TXD */ + 22, 21, +}; +static const unsigned int scifb3_data_mux[] = { + SCIFB3_RXD_22_MARK, SCIFB3_TXD_21_MARK, +}; +static const unsigned int scifb3_clk_pins[] = { + /* SCIFB3_SCK */ + 23, +}; +static const unsigned int scifb3_clk_mux[] = { + SCIFB3_SCK_23_MARK, +}; +static const unsigned int scifb3_ctrl_pins[] = { + /* SCIFB3_RTS, SCIFB3_CTS */ + 19, 20, +}; +static const unsigned int scifb3_ctrl_mux[] = { + SCIFB3_RTS_19_MARK, SCIFB3_CTS_20_MARK, +}; +static const unsigned int scifb3_data_b_pins[] = { + /* SCIFB3_RXD, SCIFB3_TXD */ + 120, 121, +}; +static const unsigned int scifb3_data_b_mux[] = { + SCIFB3_RXD_120_MARK, SCIFB3_TXD_121_MARK, +}; +static const unsigned int scifb3_clk_b_pins[] = { + /* SCIFB3_SCK */ + 40, +}; +static const unsigned int scifb3_clk_b_mux[] = { + SCIFB3_SCK_40_MARK, +}; +static const unsigned int scifb3_ctrl_b_pins[] = { + /* SCIFB3_RTS, SCIFB3_CTS */ + 38, 39, +}; +static const unsigned int scifb3_ctrl_b_mux[] = { + SCIFB3_RTS_38_MARK, SCIFB3_CTS_39_MARK, +}; + +static const struct sh_pfc_pin_group pinmux_groups[] = { + SH_PFC_PIN_GROUP(scifa0_data), + SH_PFC_PIN_GROUP(scifa0_clk), + SH_PFC_PIN_GROUP(scifa0_ctrl), + SH_PFC_PIN_GROUP(scifa1_data), + SH_PFC_PIN_GROUP(scifa1_clk), + SH_PFC_PIN_GROUP(scifa1_ctrl), + SH_PFC_PIN_GROUP(scifb0_data), + SH_PFC_PIN_GROUP(scifb0_clk), + SH_PFC_PIN_GROUP(scifb0_ctrl), + SH_PFC_PIN_GROUP(scifb1_data), + SH_PFC_PIN_GROUP(scifb1_clk), + SH_PFC_PIN_GROUP(scifb1_ctrl), + SH_PFC_PIN_GROUP(scifb1_data_b), + SH_PFC_PIN_GROUP(scifb1_clk_b), + SH_PFC_PIN_GROUP(scifb1_ctrl_b), + SH_PFC_PIN_GROUP(scifb2_data), + SH_PFC_PIN_GROUP(scifb2_clk), + SH_PFC_PIN_GROUP(scifb2_ctrl), + SH_PFC_PIN_GROUP(scifb2_data_b), + SH_PFC_PIN_GROUP(scifb2_clk_b), + SH_PFC_PIN_GROUP(scifb2_ctrl_b), + SH_PFC_PIN_GROUP(scifb3_data), + SH_PFC_PIN_GROUP(scifb3_clk), + SH_PFC_PIN_GROUP(scifb3_ctrl), + SH_PFC_PIN_GROUP(scifb3_data_b), + SH_PFC_PIN_GROUP(scifb3_clk_b), + SH_PFC_PIN_GROUP(scifb3_ctrl_b), +}; + +static const char * const scifa0_groups[] = { + "scifa0_data", + "scifa0_clk", + "scifa0_ctrl", +}; + +static const char * const scifa1_groups[] = { + "scifa1_data", + "scifa1_clk", + "scifa1_ctrl", +}; + +static const char * const scifb0_groups[] = { + "scifb0_data", + "scifb0_clk", + "scifb0_ctrl", +}; + +static const char * const scifb1_groups[] = { + "scifb1_data", + "scifb1_clk", + "scifb1_ctrl", + "scifb1_data_b", + "scifb1_clk_b", + "scifb1_ctrl_b", +}; + +static const char * const scifb2_groups[] = { + "scifb2_data", + "scifb2_clk", + "scifb2_ctrl", + "scifb2_data_b", + "scifb2_clk_b", + "scifb2_ctrl_b", +}; + +static const char * const scifb3_groups[] = { + "scifb3_data", + "scifb3_clk", + "scifb3_ctrl", + "scifb3_data_b", + "scifb3_clk_b", + "scifb3_ctrl_b", +}; + +static const struct sh_pfc_function pinmux_functions[] = { + SH_PFC_FUNCTION(scifa0), + SH_PFC_FUNCTION(scifa1), + SH_PFC_FUNCTION(scifb0), + SH_PFC_FUNCTION(scifb1), + SH_PFC_FUNCTION(scifb2), + SH_PFC_FUNCTION(scifb3), +}; + #define PINMUX_FN_BASE ARRAY_SIZE(pinmux_pins) static const struct pinmux_func pinmux_func_gpios[] = { @@ -3084,8 +3364,15 @@ const struct sh_pfc_soc_info r8a73a4_pinmux_info = { .pins = pinmux_pins, .nr_pins = ARRAY_SIZE(pinmux_pins), + .ranges = pinmux_ranges, .nr_ranges = ARRAY_SIZE(pinmux_ranges), + + .groups = pinmux_groups, + .nr_groups = ARRAY_SIZE(pinmux_groups), + .functions = pinmux_functions, + .nr_functions = ARRAY_SIZE(pinmux_functions), + .func_gpios = pinmux_func_gpios, .nr_func_gpios = ARRAY_SIZE(pinmux_func_gpios), -- GitLab From 515a828f773ba7389d77db90b980565382ece977 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 28 Mar 2013 00:16:37 +0900 Subject: [PATCH 0373/3163] sh-pfc: r8a73a4: Add IRQC pin groups and functions V2 of PINCTRL support for r8a73a4 IRQC hardware and in particular the external pins IRQ0 -> IRQ57. Signed-off-by: Magnus Damm Acked-by: Linus Walleij Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/pfc-r8a73a4.c | 186 +++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c index cd5c5fd74ba4..8a5288c99a83 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c @@ -1576,6 +1576,72 @@ static const struct pinmux_range pinmux_ranges[] = { {.begin = 320, .end = 329,}, }; +/* - IRQC ------------------------------------------------------------------- */ +#define IRQC_PINS_MUX(pin, irq_mark) \ +static const unsigned int irqc_irq##irq_mark##_pins[] = { \ + pin, \ +}; \ +static const unsigned int irqc_irq##irq_mark##_mux[] = { \ + IRQ##irq_mark##_MARK, \ +} +IRQC_PINS_MUX(0, 0); +IRQC_PINS_MUX(1, 1); +IRQC_PINS_MUX(2, 2); +IRQC_PINS_MUX(3, 3); +IRQC_PINS_MUX(4, 4); +IRQC_PINS_MUX(5, 5); +IRQC_PINS_MUX(6, 6); +IRQC_PINS_MUX(7, 7); +IRQC_PINS_MUX(8, 8); +IRQC_PINS_MUX(9, 9); +IRQC_PINS_MUX(10, 10); +IRQC_PINS_MUX(11, 11); +IRQC_PINS_MUX(12, 12); +IRQC_PINS_MUX(13, 13); +IRQC_PINS_MUX(14, 14); +IRQC_PINS_MUX(15, 15); +IRQC_PINS_MUX(66, 40); +IRQC_PINS_MUX(84, 19); +IRQC_PINS_MUX(85, 18); +IRQC_PINS_MUX(102, 41); +IRQC_PINS_MUX(103, 42); +IRQC_PINS_MUX(109, 43); +IRQC_PINS_MUX(110, 44); +IRQC_PINS_MUX(111, 45); +IRQC_PINS_MUX(112, 46); +IRQC_PINS_MUX(113, 47); +IRQC_PINS_MUX(114, 48); +IRQC_PINS_MUX(115, 49); +IRQC_PINS_MUX(160, 20); +IRQC_PINS_MUX(161, 21); +IRQC_PINS_MUX(162, 22); +IRQC_PINS_MUX(163, 23); +IRQC_PINS_MUX(175, 24); +IRQC_PINS_MUX(176, 25); +IRQC_PINS_MUX(177, 26); +IRQC_PINS_MUX(178, 27); +IRQC_PINS_MUX(192, 31); +IRQC_PINS_MUX(193, 32); +IRQC_PINS_MUX(194, 33); +IRQC_PINS_MUX(195, 34); +IRQC_PINS_MUX(196, 35); +IRQC_PINS_MUX(197, 36); +IRQC_PINS_MUX(198, 37); +IRQC_PINS_MUX(199, 38); +IRQC_PINS_MUX(200, 39); +IRQC_PINS_MUX(290, 51); +IRQC_PINS_MUX(296, 52); +IRQC_PINS_MUX(301, 50); +IRQC_PINS_MUX(320, 16); +IRQC_PINS_MUX(321, 17); +IRQC_PINS_MUX(322, 28); +IRQC_PINS_MUX(323, 29); +IRQC_PINS_MUX(324, 30); +IRQC_PINS_MUX(325, 53); +IRQC_PINS_MUX(326, 54); +IRQC_PINS_MUX(327, 55); +IRQC_PINS_MUX(328, 56); +IRQC_PINS_MUX(329, 57); /* - SCIFA0 ----------------------------------------------------------------- */ static const unsigned int scifa0_data_pins[] = { /* SCIFA0_RXD, SCIFA0_TXD */ @@ -1773,6 +1839,64 @@ static const unsigned int scifb3_ctrl_b_mux[] = { }; static const struct sh_pfc_pin_group pinmux_groups[] = { + SH_PFC_PIN_GROUP(irqc_irq0), + SH_PFC_PIN_GROUP(irqc_irq1), + SH_PFC_PIN_GROUP(irqc_irq2), + SH_PFC_PIN_GROUP(irqc_irq3), + SH_PFC_PIN_GROUP(irqc_irq4), + SH_PFC_PIN_GROUP(irqc_irq5), + SH_PFC_PIN_GROUP(irqc_irq6), + SH_PFC_PIN_GROUP(irqc_irq7), + SH_PFC_PIN_GROUP(irqc_irq8), + SH_PFC_PIN_GROUP(irqc_irq9), + SH_PFC_PIN_GROUP(irqc_irq10), + SH_PFC_PIN_GROUP(irqc_irq11), + SH_PFC_PIN_GROUP(irqc_irq12), + SH_PFC_PIN_GROUP(irqc_irq13), + SH_PFC_PIN_GROUP(irqc_irq14), + SH_PFC_PIN_GROUP(irqc_irq15), + SH_PFC_PIN_GROUP(irqc_irq16), + SH_PFC_PIN_GROUP(irqc_irq17), + SH_PFC_PIN_GROUP(irqc_irq18), + SH_PFC_PIN_GROUP(irqc_irq19), + SH_PFC_PIN_GROUP(irqc_irq20), + SH_PFC_PIN_GROUP(irqc_irq21), + SH_PFC_PIN_GROUP(irqc_irq22), + SH_PFC_PIN_GROUP(irqc_irq23), + SH_PFC_PIN_GROUP(irqc_irq24), + SH_PFC_PIN_GROUP(irqc_irq25), + SH_PFC_PIN_GROUP(irqc_irq26), + SH_PFC_PIN_GROUP(irqc_irq27), + SH_PFC_PIN_GROUP(irqc_irq28), + SH_PFC_PIN_GROUP(irqc_irq29), + SH_PFC_PIN_GROUP(irqc_irq30), + SH_PFC_PIN_GROUP(irqc_irq31), + SH_PFC_PIN_GROUP(irqc_irq32), + SH_PFC_PIN_GROUP(irqc_irq33), + SH_PFC_PIN_GROUP(irqc_irq34), + SH_PFC_PIN_GROUP(irqc_irq35), + SH_PFC_PIN_GROUP(irqc_irq36), + SH_PFC_PIN_GROUP(irqc_irq37), + SH_PFC_PIN_GROUP(irqc_irq38), + SH_PFC_PIN_GROUP(irqc_irq39), + SH_PFC_PIN_GROUP(irqc_irq40), + SH_PFC_PIN_GROUP(irqc_irq41), + SH_PFC_PIN_GROUP(irqc_irq42), + SH_PFC_PIN_GROUP(irqc_irq43), + SH_PFC_PIN_GROUP(irqc_irq44), + SH_PFC_PIN_GROUP(irqc_irq45), + SH_PFC_PIN_GROUP(irqc_irq46), + SH_PFC_PIN_GROUP(irqc_irq47), + SH_PFC_PIN_GROUP(irqc_irq48), + SH_PFC_PIN_GROUP(irqc_irq49), + SH_PFC_PIN_GROUP(irqc_irq50), + SH_PFC_PIN_GROUP(irqc_irq51), + SH_PFC_PIN_GROUP(irqc_irq52), + SH_PFC_PIN_GROUP(irqc_irq53), + SH_PFC_PIN_GROUP(irqc_irq54), + SH_PFC_PIN_GROUP(irqc_irq55), + SH_PFC_PIN_GROUP(irqc_irq56), + SH_PFC_PIN_GROUP(irqc_irq57), SH_PFC_PIN_GROUP(scifa0_data), SH_PFC_PIN_GROUP(scifa0_clk), SH_PFC_PIN_GROUP(scifa0_ctrl), @@ -1802,6 +1926,67 @@ static const struct sh_pfc_pin_group pinmux_groups[] = { SH_PFC_PIN_GROUP(scifb3_ctrl_b), }; +static const char * const irqc_groups[] = { + "irqc_irq0", + "irqc_irq1", + "irqc_irq2", + "irqc_irq3", + "irqc_irq4", + "irqc_irq5", + "irqc_irq6", + "irqc_irq7", + "irqc_irq8", + "irqc_irq9", + "irqc_irq10", + "irqc_irq11", + "irqc_irq12", + "irqc_irq13", + "irqc_irq14", + "irqc_irq15", + "irqc_irq16", + "irqc_irq17", + "irqc_irq18", + "irqc_irq19", + "irqc_irq20", + "irqc_irq21", + "irqc_irq22", + "irqc_irq23", + "irqc_irq24", + "irqc_irq25", + "irqc_irq26", + "irqc_irq27", + "irqc_irq28", + "irqc_irq29", + "irqc_irq30", + "irqc_irq31", + "irqc_irq32", + "irqc_irq33", + "irqc_irq34", + "irqc_irq35", + "irqc_irq36", + "irqc_irq37", + "irqc_irq38", + "irqc_irq39", + "irqc_irq40", + "irqc_irq41", + "irqc_irq42", + "irqc_irq43", + "irqc_irq44", + "irqc_irq45", + "irqc_irq46", + "irqc_irq47", + "irqc_irq48", + "irqc_irq49", + "irqc_irq50", + "irqc_irq51", + "irqc_irq52", + "irqc_irq53", + "irqc_irq54", + "irqc_irq55", + "irqc_irq56", + "irqc_irq57", +}; + static const char * const scifa0_groups[] = { "scifa0_data", "scifa0_clk", @@ -1848,6 +2033,7 @@ static const char * const scifb3_groups[] = { }; static const struct sh_pfc_function pinmux_functions[] = { + SH_PFC_FUNCTION(irqc), SH_PFC_FUNCTION(scifa0), SH_PFC_FUNCTION(scifa1), SH_PFC_FUNCTION(scifb0), -- GitLab From 504e584aa1937d4819859ecadf0140ea252f3a84 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Mar 2013 22:50:55 +0900 Subject: [PATCH 0374/3163] sh-pfc: r8a73a4: Remove SCIF function GPIOS The r8a73a4 board support will use the pinctrl API to control the SCIF pins, remove the corresponding unused function GPIOS. Signed-off-by: Magnus Damm Acked-by: Linus Walleij Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/pfc-r8a73a4.c | 45 ---------------------------- 1 file changed, 45 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c index 8a5288c99a83..470b18f1a910 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c @@ -2160,55 +2160,45 @@ static const struct pinmux_func pinmux_func_gpios[] = { /* Port19 */ GPIO_FN(LCDD19), - GPIO_FN(SCIFB3_RTS_19), GPIO_FN(DU0_DB3), /* Port20 */ GPIO_FN(LCDD20), - GPIO_FN(SCIFB3_CTS_20), GPIO_FN(DU0_DB4), /* Port21 */ GPIO_FN(LCDD21), - GPIO_FN(SCIFB3_TXD_21), GPIO_FN(DU0_DB5), /* Port22 */ GPIO_FN(LCDD22), - GPIO_FN(SCIFB3_RXD_22), GPIO_FN(DU0_DB6), /* Port23 */ GPIO_FN(LCDD23), - GPIO_FN(SCIFB3_SCK_23), GPIO_FN(DU0_DB7), /* Port24 */ GPIO_FN(LCDHSYN), GPIO_FN(LCDCS), - GPIO_FN(SCIFB1_RTS_24), GPIO_FN(DU0_EXHSYNC_N_CSYNC_N_HSYNC_N), /* Port25 */ GPIO_FN(LCDVSYN), - GPIO_FN(SCIFB1_CTS_25), GPIO_FN(DU0_EXVSYNC_N_VSYNC_N_CSYNC_N), /* Port26 */ GPIO_FN(LCDDCK), GPIO_FN(LCDWR), - GPIO_FN(SCIFB1_TXD_26), GPIO_FN(DU0_DOTCLKIN), /* Port27 */ GPIO_FN(LCDDISP), GPIO_FN(LCDRS), - GPIO_FN(SCIFB1_RXD_27), GPIO_FN(DU0_DOTCLKOUT), /* Port28 */ GPIO_FN(LCDRD_N), - GPIO_FN(SCIFB1_SCK_28), GPIO_FN(DU0_DOTCLKOUTB), /* Port29 */ @@ -2222,48 +2212,36 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(DU0_ODDF_N_CLAMP), /* Port32 */ - GPIO_FN(SCIFA0_RTS), GPIO_FN(SIM0_DET), GPIO_FN(CSCIF0_RTS), /* Port33 */ - GPIO_FN(SCIFA0_CTS), GPIO_FN(SIM1_DET), GPIO_FN(CSCIF0_CTS), /* Port34 */ - GPIO_FN(SCIFA0_SCK), GPIO_FN(SIM0_PWRON), GPIO_FN(CSCIF0_SCK), /* Port35 */ - GPIO_FN(SCIFA1_RTS), GPIO_FN(CSCIF1_RTS), /* Port36 */ - GPIO_FN(SCIFA1_CTS), GPIO_FN(CSCIF1_CTS), /* Port37 */ - GPIO_FN(SCIFA1_SCK), GPIO_FN(CSCIF1_SCK), /* Port38 */ - GPIO_FN(SCIFB0_RTS), GPIO_FN(TPU0TO1), - GPIO_FN(SCIFB3_RTS_38), GPIO_FN(CHSCIF0_HRTS), /* Port39 */ - GPIO_FN(SCIFB0_CTS), GPIO_FN(TPU0TO2), - GPIO_FN(SCIFB3_CTS_39), GPIO_FN(CHSCIF0_HCTS), /* Port40 */ - GPIO_FN(SCIFB0_SCK), GPIO_FN(TPU0TO3), - GPIO_FN(SCIFB3_SCK_40), GPIO_FN(CHSCIF0_HSCK), /* Port64 */ @@ -2274,52 +2252,44 @@ static const struct pinmux_func pinmux_func_gpios[] = { /* Port66 */ GPIO_FN(HSI_RX_WAKE), - GPIO_FN(SCIFB2_CTS_66), GPIO_FN(MSIOF3_SYNC), GPIO_FN(GenIO4), GPIO_FN(IRQ40), /* Port67 */ GPIO_FN(HSI_RX_READY), - GPIO_FN(SCIFB1_TXD_67), GPIO_FN(GIO_OUT3_67), GPIO_FN(CHSCIF1_HTX), /* Port68 */ GPIO_FN(HSI_RX_FLAG), - GPIO_FN(SCIFB2_TXD_68), GPIO_FN(MSIOF3_TXD), GPIO_FN(GIO_OUT4_68), /* Port69 */ GPIO_FN(HSI_RX_DATA), - GPIO_FN(SCIFB2_RXD_69), GPIO_FN(MSIOF3_RXD), GPIO_FN(GIO_OUT5_69), /* Port70 */ GPIO_FN(HSI_TX_FLAG), - GPIO_FN(SCIFB1_RTS_70), GPIO_FN(GIO_OUT1_70), GPIO_FN(HSIC_TSTCLK0), GPIO_FN(CHSCIF1_HRTS), /* Port71 */ GPIO_FN(HSI_TX_DATA), - GPIO_FN(SCIFB1_CTS_71), GPIO_FN(GIO_OUT2_71), GPIO_FN(HSIC_TSTCLK1), GPIO_FN(CHSCIF1_HCTS), /* Port72 */ GPIO_FN(HSI_TX_WAKE), - GPIO_FN(SCIFB1_RXD_72), GPIO_FN(GenIO8), GPIO_FN(CHSCIF1_HRX), /* Port73 */ GPIO_FN(HSI_TX_READY), - GPIO_FN(SCIFB2_RTS_73), GPIO_FN(MSIOF3_SCK), GPIO_FN(GIO_OUT0_73), @@ -2398,36 +2368,28 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(IRQ49), /* Port116 */ - GPIO_FN(SCIFA0_TXD), GPIO_FN(CSCIF0_TX), /* Port117 */ - GPIO_FN(SCIFA0_RXD), GPIO_FN(CSCIF0_RX), /* Port118 */ - GPIO_FN(SCIFA1_TXD), GPIO_FN(CSCIF1_TX), /* Port119 */ - GPIO_FN(SCIFA1_RXD), GPIO_FN(CSCIF1_RX), /* Port120 */ GPIO_FN(SF_PORT_1_120), - GPIO_FN(SCIFB3_RXD_120), GPIO_FN(DU0_CDE), /* Port121 */ GPIO_FN(SF_PORT_0_121), - GPIO_FN(SCIFB3_TXD_121), /* Port122 */ - GPIO_FN(SCIFB0_TXD), GPIO_FN(CHSCIF0_HTX), /* Port123 */ - GPIO_FN(SCIFB0_RXD), GPIO_FN(CHSCIF0_HRX), /* Port124 */ @@ -2721,11 +2683,9 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(MSIOF0_TXD), /* Port261 */ - GPIO_FN(SCIFB1_SCK_261), GPIO_FN(CHSCIF1_HSCK), /* Port262 */ - GPIO_FN(SCIFB2_SCK_262), /* Port263 - Port266 FN1 */ GPIO_FN(MSIOF1_SS2), @@ -2809,7 +2769,6 @@ static const struct pinmux_func pinmux_func_gpios[] = { /* Port295 */ GPIO_FN(SDHID2_0), GPIO_FN(MSIOF4_TXD), - GPIO_FN(SCIFB2_TXD_295), GPIO_FN(MSIOF6_TXD), /* Port296 */ @@ -2830,10 +2789,6 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(MSIOF4_SS1), /* Port297 - Port300 FN3 */ - GPIO_FN(SCIFB2_RXD_297), - GPIO_FN(SCIFB2_CTS_298), - GPIO_FN(SCIFB2_SCK_299), - GPIO_FN(SCIFB2_RTS_300), /* Port297 - Port300 FN4 */ GPIO_FN(MSIOF6_RXD), -- GitLab From f91663ff5619d73588fc91c3e7483bcb85570372 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Mar 2013 22:51:05 +0900 Subject: [PATCH 0375/3163] sh-pfc: r8a73a4: Remove IRQC function GPIOS The r8a73a4 board support will use the pinctrl API to control the external IRQ pins so remove the unused function GPIOS. Signed-off-by: Magnus Damm Acked-by: Linus Walleij Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/pfc-r8a73a4.c | 58 ---------------------------- 1 file changed, 58 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c index 470b18f1a910..66fc7478a17e 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c @@ -2049,99 +2049,83 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(LCDD0), GPIO_FN(PDM2_CLK_0), GPIO_FN(DU0_DR0), - GPIO_FN(IRQ0), /* Port1 */ GPIO_FN(LCDD1), GPIO_FN(PDM2_DATA_1), GPIO_FN(DU0_DR19), - GPIO_FN(IRQ1), /* Port2 */ GPIO_FN(LCDD2), GPIO_FN(PDM3_CLK_2), GPIO_FN(DU0_DR2), - GPIO_FN(IRQ2), /* Port3 */ GPIO_FN(LCDD3), GPIO_FN(PDM3_DATA_3), GPIO_FN(DU0_DR3), - GPIO_FN(IRQ3), /* Port4 */ GPIO_FN(LCDD4), GPIO_FN(PDM4_CLK_4), GPIO_FN(DU0_DR4), - GPIO_FN(IRQ4), /* Port5 */ GPIO_FN(LCDD5), GPIO_FN(PDM4_DATA_5), GPIO_FN(DU0_DR5), - GPIO_FN(IRQ5), /* Port6 */ GPIO_FN(LCDD6), GPIO_FN(PDM0_OUTCLK_6), GPIO_FN(DU0_DR6), - GPIO_FN(IRQ6), /* Port7 */ GPIO_FN(LCDD7), GPIO_FN(PDM0_OUTDATA_7), GPIO_FN(DU0_DR7), - GPIO_FN(IRQ7), /* Port8 */ GPIO_FN(LCDD8), GPIO_FN(PDM1_OUTCLK_8), GPIO_FN(DU0_DG0), - GPIO_FN(IRQ8), /* Port9 */ GPIO_FN(LCDD9), GPIO_FN(PDM1_OUTDATA_9), GPIO_FN(DU0_DG1), - GPIO_FN(IRQ9), /* Port10 */ GPIO_FN(LCDD10), GPIO_FN(FSICCK), GPIO_FN(DU0_DG2), - GPIO_FN(IRQ10), /* Port11 */ GPIO_FN(LCDD11), GPIO_FN(FSICISLD), GPIO_FN(DU0_DG3), - GPIO_FN(IRQ11), /* Port12 */ GPIO_FN(LCDD12), GPIO_FN(FSICOMC), GPIO_FN(DU0_DG4), - GPIO_FN(IRQ12), /* Port13 */ GPIO_FN(LCDD13), GPIO_FN(FSICOLR), GPIO_FN(FSICILR), GPIO_FN(DU0_DG5), - GPIO_FN(IRQ13), /* Port14 */ GPIO_FN(LCDD14), GPIO_FN(FSICOBT), GPIO_FN(FSICIBT), GPIO_FN(DU0_DG6), - GPIO_FN(IRQ14), /* Port15 */ GPIO_FN(LCDD15), GPIO_FN(FSICOSLD), GPIO_FN(DU0_DG7), - GPIO_FN(IRQ15), /* Port16 */ GPIO_FN(LCDD16), @@ -2254,7 +2238,6 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(HSI_RX_WAKE), GPIO_FN(MSIOF3_SYNC), GPIO_FN(GenIO4), - GPIO_FN(IRQ40), /* Port67 */ GPIO_FN(HSI_RX_READY), @@ -2304,8 +2287,6 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(TXP2), GPIO_FN(COEX_0), GPIO_FN(COEX_1), - GPIO_FN(IRQ19), - GPIO_FN(IRQ18), /* Port96 - Port101 */ GPIO_FN(KEYIN0), @@ -2317,11 +2298,9 @@ static const struct pinmux_func pinmux_func_gpios[] = { /* Port102 */ GPIO_FN(KEYIN6), - GPIO_FN(IRQ41), /* Port103 */ GPIO_FN(KEYIN7), - GPIO_FN(IRQ42), /* Port104 - Port108 */ GPIO_FN(KEYOUT0), @@ -2332,40 +2311,33 @@ static const struct pinmux_func pinmux_func_gpios[] = { /* Port109 */ GPIO_FN(KEYOUT5), - GPIO_FN(IRQ43), /* Port110 */ GPIO_FN(KEYOUT6), - GPIO_FN(IRQ44), /* Port111 */ GPIO_FN(KEYOUT7), GPIO_FN(RFANAEN), - GPIO_FN(IRQ45), /* Port112 */ GPIO_FN(KEYIN8), GPIO_FN(KEYOUT8), GPIO_FN(SF_IRQ_04), - GPIO_FN(IRQ46), /* Port113 */ GPIO_FN(KEYIN9), GPIO_FN(KEYOUT9), GPIO_FN(SF_IRQ_05), - GPIO_FN(IRQ47), /* Port114 */ GPIO_FN(KEYIN10), GPIO_FN(KEYOUT10), GPIO_FN(SF_IRQ_06), - GPIO_FN(IRQ48), /* Port115 */ GPIO_FN(KEYIN11), GPIO_FN(KEYOUT11), GPIO_FN(SF_IRQ_07), - GPIO_FN(IRQ49), /* Port116 */ GPIO_FN(CSCIF0_TX), @@ -2444,10 +2416,6 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(STP_ISD_1), /* Port160 - Port178 */ - GPIO_FN(IRQ20), - GPIO_FN(IRQ21), - GPIO_FN(IRQ22), - GPIO_FN(IRQ23), GPIO_FN(MMCD0_0), GPIO_FN(MMCD0_1), GPIO_FN(MMCD0_2), @@ -2459,10 +2427,6 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(MMCCMD0), GPIO_FN(MMCCLK0), GPIO_FN(MMCRST), - GPIO_FN(IRQ24), - GPIO_FN(IRQ25), - GPIO_FN(IRQ26), - GPIO_FN(IRQ27), /* Port192 - Port200 FN1 */ GPIO_FN(A10), @@ -2487,15 +2451,6 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(MMCCMD1), /* Port192 - Port200 IRQ */ - GPIO_FN(IRQ31), - GPIO_FN(IRQ32), - GPIO_FN(IRQ33), - GPIO_FN(IRQ34), - GPIO_FN(IRQ35), - GPIO_FN(IRQ36), - GPIO_FN(IRQ37), - GPIO_FN(IRQ38), - GPIO_FN(IRQ39), /* Port201 */ GPIO_FN(A1), @@ -2752,7 +2707,6 @@ static const struct pinmux_func pinmux_func_gpios[] = { /* Port290 */ GPIO_FN(SDHID1_1), GPIO_FN(STMDATA1_2), - GPIO_FN(IRQ51), /* Port291 - Port294 FN1 */ GPIO_FN(SDHID1_2), @@ -2774,7 +2728,6 @@ static const struct pinmux_func pinmux_func_gpios[] = { /* Port296 */ GPIO_FN(SDHID2_1), GPIO_FN(MSIOF6_SS2), - GPIO_FN(IRQ52), /* Port297 - Port300 FN1 */ GPIO_FN(SDHID2_2), @@ -2798,7 +2751,6 @@ static const struct pinmux_func pinmux_func_gpios[] = { /* Port301 */ GPIO_FN(SDHICD0), - GPIO_FN(IRQ50), /* Port302 - Port306 FN1 */ GPIO_FN(SDHID0_0), @@ -2822,16 +2774,6 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(STMCLK_1), /* Port320 - Port329 */ - GPIO_FN(IRQ16), - GPIO_FN(IRQ17), - GPIO_FN(IRQ28), - GPIO_FN(IRQ29), - GPIO_FN(IRQ30), - GPIO_FN(IRQ53), - GPIO_FN(IRQ54), - GPIO_FN(IRQ55), - GPIO_FN(IRQ56), - GPIO_FN(IRQ57), }; static const struct pinmux_cfg_reg pinmux_config_regs[] = { -- GitLab From 9fdec7b1fe2b6a6566e92eb1f88800e06b555255 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Mar 2013 22:51:15 +0900 Subject: [PATCH 0376/3163] ARM: shmobile: r8a73a4: Remove SCIF function GPIOs Remove SCIF function GPIOs that have been deprecated by the pinctrl API. Signed-off-by: Magnus Damm Acked-by: Linus Walleij Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/include/mach/r8a73a4.h | 45 ------------------- 1 file changed, 45 deletions(-) diff --git a/arch/arm/mach-shmobile/include/mach/r8a73a4.h b/arch/arm/mach-shmobile/include/mach/r8a73a4.h index 2d4af4af3634..703387a813db 100644 --- a/arch/arm/mach-shmobile/include/mach/r8a73a4.h +++ b/arch/arm/mach-shmobile/include/mach/r8a73a4.h @@ -200,55 +200,45 @@ enum { /* Port19 */ GPIO_FN_LCDD19, - GPIO_FN_SCIFB3_RTS_19, GPIO_FN_DU0_DB3, /* Port20 */ GPIO_FN_LCDD20, - GPIO_FN_SCIFB3_CTS_20, GPIO_FN_DU0_DB4, /* Port21 */ GPIO_FN_LCDD21, - GPIO_FN_SCIFB3_TXD_21, GPIO_FN_DU0_DB5, /* Port22 */ GPIO_FN_LCDD22, - GPIO_FN_SCIFB3_RXD_22, GPIO_FN_DU0_DB6, /* Port23 */ GPIO_FN_LCDD23, - GPIO_FN_SCIFB3_SCK_23, GPIO_FN_DU0_DB7, /* Port24 */ GPIO_FN_LCDHSYN, GPIO_FN_LCDCS, - GPIO_FN_SCIFB1_RTS_24, GPIO_FN_DU0_EXHSYNC_N_CSYNC_N_HSYNC_N, /* Port25 */ GPIO_FN_LCDVSYN, - GPIO_FN_SCIFB1_CTS_25, GPIO_FN_DU0_EXVSYNC_N_VSYNC_N_CSYNC_N, /* Port26 */ GPIO_FN_LCDDCK, GPIO_FN_LCDWR, - GPIO_FN_SCIFB1_TXD_26, GPIO_FN_DU0_DOTCLKIN, /* Port27 */ GPIO_FN_LCDDISP, GPIO_FN_LCDRS, - GPIO_FN_SCIFB1_RXD_27, GPIO_FN_DU0_DOTCLKOUT, /* Port28 */ GPIO_FN_LCDRD_N, - GPIO_FN_SCIFB1_SCK_28, GPIO_FN_DU0_DOTCLKOUTB, /* Port29 */ @@ -262,48 +252,36 @@ enum { GPIO_FN_DU0_ODDF_N_CLAMP, /* Port32 */ - GPIO_FN_SCIFA0_RTS, GPIO_FN_SIM0_DET, GPIO_FN_CSCIF0_RTS, /* Port33 */ - GPIO_FN_SCIFA0_CTS, GPIO_FN_SIM1_DET, GPIO_FN_CSCIF0_CTS, /* Port34 */ - GPIO_FN_SCIFA0_SCK, GPIO_FN_SIM0_PWRON, GPIO_FN_CSCIF0_SCK, /* Port35 */ - GPIO_FN_SCIFA1_RTS, GPIO_FN_CSCIF1_RTS, /* Port36 */ - GPIO_FN_SCIFA1_CTS, GPIO_FN_CSCIF1_CTS, /* Port37 */ - GPIO_FN_SCIFA1_SCK, GPIO_FN_CSCIF1_SCK, /* Port38 */ - GPIO_FN_SCIFB0_RTS, GPIO_FN_TPU0TO1, - GPIO_FN_SCIFB3_RTS_38, GPIO_FN_CHSCIF0_HRTS, /* Port39 */ - GPIO_FN_SCIFB0_CTS, GPIO_FN_TPU0TO2, - GPIO_FN_SCIFB3_CTS_39, GPIO_FN_CHSCIF0_HCTS, /* Port40 */ - GPIO_FN_SCIFB0_SCK, GPIO_FN_TPU0TO3, - GPIO_FN_SCIFB3_SCK_40, GPIO_FN_CHSCIF0_HSCK, /* Port64 */ @@ -314,52 +292,44 @@ enum { /* Port66 */ GPIO_FN_HSI_RX_WAKE, - GPIO_FN_SCIFB2_CTS_66, GPIO_FN_MSIOF3_SYNC, GPIO_FN_GenIO4, GPIO_FN_IRQ40, /* Port67 */ GPIO_FN_HSI_RX_READY, - GPIO_FN_SCIFB1_TXD_67, GPIO_FN_GIO_OUT3_67, GPIO_FN_CHSCIF1_HTX, /* Port68 */ GPIO_FN_HSI_RX_FLAG, - GPIO_FN_SCIFB2_TXD_68, GPIO_FN_MSIOF3_TXD, GPIO_FN_GIO_OUT4_68, /* Port69 */ GPIO_FN_HSI_RX_DATA, - GPIO_FN_SCIFB2_RXD_69, GPIO_FN_MSIOF3_RXD, GPIO_FN_GIO_OUT5_69, /* Port70 */ GPIO_FN_HSI_TX_FLAG, - GPIO_FN_SCIFB1_RTS_70, GPIO_FN_GIO_OUT1_70, GPIO_FN_HSIC_TSTCLK0, GPIO_FN_CHSCIF1_HRTS, /* Port71 */ GPIO_FN_HSI_TX_DATA, - GPIO_FN_SCIFB1_CTS_71, GPIO_FN_GIO_OUT2_71, GPIO_FN_HSIC_TSTCLK1, GPIO_FN_CHSCIF1_HCTS, /* Port72 */ GPIO_FN_HSI_TX_WAKE, - GPIO_FN_SCIFB1_RXD_72, GPIO_FN_GenIO8, GPIO_FN_CHSCIF1_HRX, /* Port73 */ GPIO_FN_HSI_TX_READY, - GPIO_FN_SCIFB2_RTS_73, GPIO_FN_MSIOF3_SCK, GPIO_FN_GIO_OUT0_73, @@ -438,36 +408,28 @@ enum { GPIO_FN_IRQ49, /* Port116 */ - GPIO_FN_SCIFA0_TXD, GPIO_FN_CSCIF0_TX, /* Port117 */ - GPIO_FN_SCIFA0_RXD, GPIO_FN_CSCIF0_RX, /* Port118 */ - GPIO_FN_SCIFA1_TXD, GPIO_FN_CSCIF1_TX, /* Port119 */ - GPIO_FN_SCIFA1_RXD, GPIO_FN_CSCIF1_RX, /* Port120 */ GPIO_FN_SF_PORT_1_120, - GPIO_FN_SCIFB3_RXD_120, GPIO_FN_DU0_CDE, /* Port121 */ GPIO_FN_SF_PORT_0_121, - GPIO_FN_SCIFB3_TXD_121, /* Port122 */ - GPIO_FN_SCIFB0_TXD, GPIO_FN_CHSCIF0_HTX, /* Port123 */ - GPIO_FN_SCIFB0_RXD, GPIO_FN_CHSCIF0_HRX, /* Port124 */ @@ -761,11 +723,9 @@ enum { GPIO_FN_MSIOF0_TXD, /* Port261 */ - GPIO_FN_SCIFB1_SCK_261, GPIO_FN_CHSCIF1_HSCK, /* Port262 */ - GPIO_FN_SCIFB2_SCK_262, /* Port263 - Port266 FN1 */ GPIO_FN_MSIOF1_SS2, @@ -849,7 +809,6 @@ enum { /* Port295 */ GPIO_FN_SDHID2_0, GPIO_FN_MSIOF4_TXD, - GPIO_FN_SCIFB2_TXD_295, GPIO_FN_MSIOF6_TXD, /* Port296 */ @@ -870,10 +829,6 @@ enum { GPIO_FN_MSIOF4_SS1, /* Port297 - Port300 FN3 */ - GPIO_FN_SCIFB2_RXD_297, - GPIO_FN_SCIFB2_CTS_298, - GPIO_FN_SCIFB2_SCK_299, - GPIO_FN_SCIFB2_RTS_300, /* Port297 - Port300 FN4 */ GPIO_FN_MSIOF6_RXD, -- GitLab From 5260a7a36356b0b0b34aeaff8e9fc6a5a7fff9e7 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Mar 2013 22:51:24 +0900 Subject: [PATCH 0377/3163] ARM: shmobile: r8a73a4: Remove IRQC function GPIOs Remove IRQ pin function GPIOs that have been deprecated by the pinctrl API. Signed-off-by: Magnus Damm Acked-by: Linus Walleij Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/include/mach/r8a73a4.h | 58 ------------------- 1 file changed, 58 deletions(-) diff --git a/arch/arm/mach-shmobile/include/mach/r8a73a4.h b/arch/arm/mach-shmobile/include/mach/r8a73a4.h index 703387a813db..9162e8645110 100644 --- a/arch/arm/mach-shmobile/include/mach/r8a73a4.h +++ b/arch/arm/mach-shmobile/include/mach/r8a73a4.h @@ -89,99 +89,83 @@ enum { GPIO_FN_LCDD0 = 330, GPIO_FN_PDM2_CLK_0, GPIO_FN_DU0_DR0, - GPIO_FN_IRQ0, /* Port1 */ GPIO_FN_LCDD1, GPIO_FN_PDM2_DATA_1, GPIO_FN_DU0_DR19, - GPIO_FN_IRQ1, /* Port2 */ GPIO_FN_LCDD2, GPIO_FN_PDM3_CLK_2, GPIO_FN_DU0_DR2, - GPIO_FN_IRQ2, /* Port3 */ GPIO_FN_LCDD3, GPIO_FN_PDM3_DATA_3, GPIO_FN_DU0_DR3, - GPIO_FN_IRQ3, /* Port4 */ GPIO_FN_LCDD4, GPIO_FN_PDM4_CLK_4, GPIO_FN_DU0_DR4, - GPIO_FN_IRQ4, /* Port5 */ GPIO_FN_LCDD5, GPIO_FN_PDM4_DATA_5, GPIO_FN_DU0_DR5, - GPIO_FN_IRQ5, /* Port6 */ GPIO_FN_LCDD6, GPIO_FN_PDM0_OUTCLK_6, GPIO_FN_DU0_DR6, - GPIO_FN_IRQ6, /* Port7 */ GPIO_FN_LCDD7, GPIO_FN_PDM0_OUTDATA_7, GPIO_FN_DU0_DR7, - GPIO_FN_IRQ7, /* Port8 */ GPIO_FN_LCDD8, GPIO_FN_PDM1_OUTCLK_8, GPIO_FN_DU0_DG0, - GPIO_FN_IRQ8, /* Port9 */ GPIO_FN_LCDD9, GPIO_FN_PDM1_OUTDATA_9, GPIO_FN_DU0_DG1, - GPIO_FN_IRQ9, /* Port10 */ GPIO_FN_LCDD10, GPIO_FN_FSICCK, GPIO_FN_DU0_DG2, - GPIO_FN_IRQ10, /* Port11 */ GPIO_FN_LCDD11, GPIO_FN_FSICISLD, GPIO_FN_DU0_DG3, - GPIO_FN_IRQ11, /* Port12 */ GPIO_FN_LCDD12, GPIO_FN_FSICOMC, GPIO_FN_DU0_DG4, - GPIO_FN_IRQ12, /* Port13 */ GPIO_FN_LCDD13, GPIO_FN_FSICOLR, GPIO_FN_FSICILR, GPIO_FN_DU0_DG5, - GPIO_FN_IRQ13, /* Port14 */ GPIO_FN_LCDD14, GPIO_FN_FSICOBT, GPIO_FN_FSICIBT, GPIO_FN_DU0_DG6, - GPIO_FN_IRQ14, /* Port15 */ GPIO_FN_LCDD15, GPIO_FN_FSICOSLD, GPIO_FN_DU0_DG7, - GPIO_FN_IRQ15, /* Port16 */ GPIO_FN_LCDD16, @@ -294,7 +278,6 @@ enum { GPIO_FN_HSI_RX_WAKE, GPIO_FN_MSIOF3_SYNC, GPIO_FN_GenIO4, - GPIO_FN_IRQ40, /* Port67 */ GPIO_FN_HSI_RX_READY, @@ -344,8 +327,6 @@ enum { GPIO_FN_TXP2, GPIO_FN_COEX_0, GPIO_FN_COEX_1, - GPIO_FN_IRQ19, - GPIO_FN_IRQ18, /* Port96 - Port101 */ GPIO_FN_KEYIN0, @@ -357,11 +338,9 @@ enum { /* Port102 */ GPIO_FN_KEYIN6, - GPIO_FN_IRQ41, /* Port103 */ GPIO_FN_KEYIN7, - GPIO_FN_IRQ42, /* Port104 - Port108 */ GPIO_FN_KEYOUT0, @@ -372,40 +351,33 @@ enum { /* Port109 */ GPIO_FN_KEYOUT5, - GPIO_FN_IRQ43, /* Port110 */ GPIO_FN_KEYOUT6, - GPIO_FN_IRQ44, /* Port111 */ GPIO_FN_KEYOUT7, GPIO_FN_RFANAEN, - GPIO_FN_IRQ45, /* Port112 */ GPIO_FN_KEYIN8, GPIO_FN_KEYOUT8, GPIO_FN_SF_IRQ_04, - GPIO_FN_IRQ46, /* Port113 */ GPIO_FN_KEYIN9, GPIO_FN_KEYOUT9, GPIO_FN_SF_IRQ_05, - GPIO_FN_IRQ47, /* Port114 */ GPIO_FN_KEYIN10, GPIO_FN_KEYOUT10, GPIO_FN_SF_IRQ_06, - GPIO_FN_IRQ48, /* Port115 */ GPIO_FN_KEYIN11, GPIO_FN_KEYOUT11, GPIO_FN_SF_IRQ_07, - GPIO_FN_IRQ49, /* Port116 */ GPIO_FN_CSCIF0_TX, @@ -484,10 +456,6 @@ enum { GPIO_FN_STP_ISD_1, /* Port160 - Port178 */ - GPIO_FN_IRQ20, - GPIO_FN_IRQ21, - GPIO_FN_IRQ22, - GPIO_FN_IRQ23, GPIO_FN_MMCD0_0, GPIO_FN_MMCD0_1, GPIO_FN_MMCD0_2, @@ -499,10 +467,6 @@ enum { GPIO_FN_MMCCMD0, GPIO_FN_MMCCLK0, GPIO_FN_MMCRST, - GPIO_FN_IRQ24, - GPIO_FN_IRQ25, - GPIO_FN_IRQ26, - GPIO_FN_IRQ27, /* Port192 - Port200 FN1 */ GPIO_FN_A10, @@ -527,15 +491,6 @@ enum { GPIO_FN_MMCCMD1, /* Port192 - Port200 IRQ */ - GPIO_FN_IRQ31, - GPIO_FN_IRQ32, - GPIO_FN_IRQ33, - GPIO_FN_IRQ34, - GPIO_FN_IRQ35, - GPIO_FN_IRQ36, - GPIO_FN_IRQ37, - GPIO_FN_IRQ38, - GPIO_FN_IRQ39, /* Port201 */ GPIO_FN_A1, @@ -792,7 +747,6 @@ enum { /* Port290 */ GPIO_FN_SDHID1_1, GPIO_FN_STMDATA1_2, - GPIO_FN_IRQ51, /* Port291 - Port294 FN1 */ GPIO_FN_SDHID1_2, @@ -814,7 +768,6 @@ enum { /* Port296 */ GPIO_FN_SDHID2_1, GPIO_FN_MSIOF6_SS2, - GPIO_FN_IRQ52, /* Port297 - Port300 FN1 */ GPIO_FN_SDHID2_2, @@ -838,7 +791,6 @@ enum { /* Port301 */ GPIO_FN_SDHICD0, - GPIO_FN_IRQ50, /* Port302 - Port306 FN1 */ GPIO_FN_SDHID0_0, @@ -862,16 +814,6 @@ enum { GPIO_FN_STMCLK_1, /* Port320 - Port329 */ - GPIO_FN_IRQ16, - GPIO_FN_IRQ17, - GPIO_FN_IRQ28, - GPIO_FN_IRQ29, - GPIO_FN_IRQ30, - GPIO_FN_IRQ53, - GPIO_FN_IRQ54, - GPIO_FN_IRQ55, - GPIO_FN_IRQ56, - GPIO_FN_IRQ57, }; void r8a73a4_add_standard_devices(void); -- GitLab From 3e36ab671c36d0e9471c75d6e8b78926dc153f6b Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Mar 2013 22:51:34 +0900 Subject: [PATCH 0378/3163] sh-pfc: r8a73a4: Remove function GPIOs All r8a73a4 platforms use the pinctrl API to control pin functions. Function GPIOs are no longer needed. Signed-off-by: Magnus Damm Acked-by: Linus Walleij Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/pfc-r8a73a4.c | 737 --------------------------- 1 file changed, 737 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c index 66fc7478a17e..464c5f9ce9fa 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c @@ -2042,740 +2042,6 @@ static const struct sh_pfc_function pinmux_functions[] = { SH_PFC_FUNCTION(scifb3), }; -#define PINMUX_FN_BASE ARRAY_SIZE(pinmux_pins) - -static const struct pinmux_func pinmux_func_gpios[] = { - /* Port0 */ - GPIO_FN(LCDD0), - GPIO_FN(PDM2_CLK_0), - GPIO_FN(DU0_DR0), - - /* Port1 */ - GPIO_FN(LCDD1), - GPIO_FN(PDM2_DATA_1), - GPIO_FN(DU0_DR19), - - /* Port2 */ - GPIO_FN(LCDD2), - GPIO_FN(PDM3_CLK_2), - GPIO_FN(DU0_DR2), - - /* Port3 */ - GPIO_FN(LCDD3), - GPIO_FN(PDM3_DATA_3), - GPIO_FN(DU0_DR3), - - /* Port4 */ - GPIO_FN(LCDD4), - GPIO_FN(PDM4_CLK_4), - GPIO_FN(DU0_DR4), - - /* Port5 */ - GPIO_FN(LCDD5), - GPIO_FN(PDM4_DATA_5), - GPIO_FN(DU0_DR5), - - /* Port6 */ - GPIO_FN(LCDD6), - GPIO_FN(PDM0_OUTCLK_6), - GPIO_FN(DU0_DR6), - - /* Port7 */ - GPIO_FN(LCDD7), - GPIO_FN(PDM0_OUTDATA_7), - GPIO_FN(DU0_DR7), - - /* Port8 */ - GPIO_FN(LCDD8), - GPIO_FN(PDM1_OUTCLK_8), - GPIO_FN(DU0_DG0), - - /* Port9 */ - GPIO_FN(LCDD9), - GPIO_FN(PDM1_OUTDATA_9), - GPIO_FN(DU0_DG1), - - /* Port10 */ - GPIO_FN(LCDD10), - GPIO_FN(FSICCK), - GPIO_FN(DU0_DG2), - - /* Port11 */ - GPIO_FN(LCDD11), - GPIO_FN(FSICISLD), - GPIO_FN(DU0_DG3), - - /* Port12 */ - GPIO_FN(LCDD12), - GPIO_FN(FSICOMC), - GPIO_FN(DU0_DG4), - - /* Port13 */ - GPIO_FN(LCDD13), - GPIO_FN(FSICOLR), - GPIO_FN(FSICILR), - GPIO_FN(DU0_DG5), - - /* Port14 */ - GPIO_FN(LCDD14), - GPIO_FN(FSICOBT), - GPIO_FN(FSICIBT), - GPIO_FN(DU0_DG6), - - /* Port15 */ - GPIO_FN(LCDD15), - GPIO_FN(FSICOSLD), - GPIO_FN(DU0_DG7), - - /* Port16 */ - GPIO_FN(LCDD16), - GPIO_FN(TPU1TO1), - GPIO_FN(DU0_DB0), - - /* Port17 */ - GPIO_FN(LCDD17), - GPIO_FN(SF_IRQ_00), - GPIO_FN(DU0_DB1), - - /* Port18 */ - GPIO_FN(LCDD18), - GPIO_FN(SF_IRQ_01), - GPIO_FN(DU0_DB2), - - /* Port19 */ - GPIO_FN(LCDD19), - GPIO_FN(DU0_DB3), - - /* Port20 */ - GPIO_FN(LCDD20), - GPIO_FN(DU0_DB4), - - /* Port21 */ - GPIO_FN(LCDD21), - GPIO_FN(DU0_DB5), - - /* Port22 */ - GPIO_FN(LCDD22), - GPIO_FN(DU0_DB6), - - /* Port23 */ - GPIO_FN(LCDD23), - GPIO_FN(DU0_DB7), - - /* Port24 */ - GPIO_FN(LCDHSYN), - GPIO_FN(LCDCS), - GPIO_FN(DU0_EXHSYNC_N_CSYNC_N_HSYNC_N), - - /* Port25 */ - GPIO_FN(LCDVSYN), - GPIO_FN(DU0_EXVSYNC_N_VSYNC_N_CSYNC_N), - - /* Port26 */ - GPIO_FN(LCDDCK), - GPIO_FN(LCDWR), - GPIO_FN(DU0_DOTCLKIN), - - /* Port27 */ - GPIO_FN(LCDDISP), - GPIO_FN(LCDRS), - GPIO_FN(DU0_DOTCLKOUT), - - /* Port28 */ - GPIO_FN(LCDRD_N), - GPIO_FN(DU0_DOTCLKOUTB), - - /* Port29 */ - GPIO_FN(LCDLCLK), - GPIO_FN(SF_IRQ_02), - GPIO_FN(DU0_DISP_CSYNC_N_DE), - - /* Port30 */ - GPIO_FN(LCDDON), - GPIO_FN(SF_IRQ_03), - GPIO_FN(DU0_ODDF_N_CLAMP), - - /* Port32 */ - GPIO_FN(SIM0_DET), - GPIO_FN(CSCIF0_RTS), - - /* Port33 */ - GPIO_FN(SIM1_DET), - GPIO_FN(CSCIF0_CTS), - - /* Port34 */ - GPIO_FN(SIM0_PWRON), - GPIO_FN(CSCIF0_SCK), - - /* Port35 */ - GPIO_FN(CSCIF1_RTS), - - /* Port36 */ - GPIO_FN(CSCIF1_CTS), - - /* Port37 */ - GPIO_FN(CSCIF1_SCK), - - /* Port38 */ - GPIO_FN(TPU0TO1), - GPIO_FN(CHSCIF0_HRTS), - - /* Port39 */ - GPIO_FN(TPU0TO2), - GPIO_FN(CHSCIF0_HCTS), - - /* Port40 */ - GPIO_FN(TPU0TO3), - GPIO_FN(CHSCIF0_HSCK), - - /* Port64 */ - GPIO_FN(PDM0_DATA), - - /* Port65 */ - GPIO_FN(PDM1_DATA), - - /* Port66 */ - GPIO_FN(HSI_RX_WAKE), - GPIO_FN(MSIOF3_SYNC), - GPIO_FN(GenIO4), - - /* Port67 */ - GPIO_FN(HSI_RX_READY), - GPIO_FN(GIO_OUT3_67), - GPIO_FN(CHSCIF1_HTX), - - /* Port68 */ - GPIO_FN(HSI_RX_FLAG), - GPIO_FN(MSIOF3_TXD), - GPIO_FN(GIO_OUT4_68), - - /* Port69 */ - GPIO_FN(HSI_RX_DATA), - GPIO_FN(MSIOF3_RXD), - GPIO_FN(GIO_OUT5_69), - - /* Port70 */ - GPIO_FN(HSI_TX_FLAG), - GPIO_FN(GIO_OUT1_70), - GPIO_FN(HSIC_TSTCLK0), - GPIO_FN(CHSCIF1_HRTS), - - /* Port71 */ - GPIO_FN(HSI_TX_DATA), - GPIO_FN(GIO_OUT2_71), - GPIO_FN(HSIC_TSTCLK1), - GPIO_FN(CHSCIF1_HCTS), - - /* Port72 */ - GPIO_FN(HSI_TX_WAKE), - GPIO_FN(GenIO8), - GPIO_FN(CHSCIF1_HRX), - - /* Port73 */ - GPIO_FN(HSI_TX_READY), - GPIO_FN(MSIOF3_SCK), - GPIO_FN(GIO_OUT0_73), - - /* Port74 - Port85 */ - GPIO_FN(IRDA_OUT), - GPIO_FN(IRDA_IN), - GPIO_FN(IRDA_FIRSEL), - GPIO_FN(TPU0TO0), - GPIO_FN(DIGRFEN), - GPIO_FN(GPS_TIMESTAMP), - GPIO_FN(TXP), - GPIO_FN(TXP2), - GPIO_FN(COEX_0), - GPIO_FN(COEX_1), - - /* Port96 - Port101 */ - GPIO_FN(KEYIN0), - GPIO_FN(KEYIN1), - GPIO_FN(KEYIN2), - GPIO_FN(KEYIN3), - GPIO_FN(KEYIN4), - GPIO_FN(KEYIN5), - - /* Port102 */ - GPIO_FN(KEYIN6), - - /* Port103 */ - GPIO_FN(KEYIN7), - - /* Port104 - Port108 */ - GPIO_FN(KEYOUT0), - GPIO_FN(KEYOUT1), - GPIO_FN(KEYOUT2), - GPIO_FN(KEYOUT3), - GPIO_FN(KEYOUT4), - - /* Port109 */ - GPIO_FN(KEYOUT5), - - /* Port110 */ - GPIO_FN(KEYOUT6), - - /* Port111 */ - GPIO_FN(KEYOUT7), - GPIO_FN(RFANAEN), - - /* Port112 */ - GPIO_FN(KEYIN8), - GPIO_FN(KEYOUT8), - GPIO_FN(SF_IRQ_04), - - /* Port113 */ - GPIO_FN(KEYIN9), - GPIO_FN(KEYOUT9), - GPIO_FN(SF_IRQ_05), - - /* Port114 */ - GPIO_FN(KEYIN10), - GPIO_FN(KEYOUT10), - GPIO_FN(SF_IRQ_06), - - /* Port115 */ - GPIO_FN(KEYIN11), - GPIO_FN(KEYOUT11), - GPIO_FN(SF_IRQ_07), - - /* Port116 */ - GPIO_FN(CSCIF0_TX), - - /* Port117 */ - GPIO_FN(CSCIF0_RX), - - /* Port118 */ - GPIO_FN(CSCIF1_TX), - - /* Port119 */ - GPIO_FN(CSCIF1_RX), - - /* Port120 */ - GPIO_FN(SF_PORT_1_120), - GPIO_FN(DU0_CDE), - - /* Port121 */ - GPIO_FN(SF_PORT_0_121), - - /* Port122 */ - GPIO_FN(CHSCIF0_HTX), - - /* Port123 */ - GPIO_FN(CHSCIF0_HRX), - - /* Port124 */ - GPIO_FN(ISP_STROBE_124), - - /* Port125 */ - GPIO_FN(STP_ISD_0), - GPIO_FN(PDM4_CLK_125), - GPIO_FN(MSIOF2_TXD), - GPIO_FN(SIM0_VOLTSEL0), - - /* Port126 */ - GPIO_FN(TS_SDEN), - GPIO_FN(MSIOF7_SYNC), - GPIO_FN(STP_ISEN_1), - - /* Port128 */ - GPIO_FN(STP_ISEN_0), - GPIO_FN(PDM1_OUTDATA_128), - GPIO_FN(MSIOF2_SYNC), - GPIO_FN(SIM1_VOLTSEL1), - - /* Port129 */ - GPIO_FN(TS_SPSYNC), - GPIO_FN(MSIOF7_RXD), - GPIO_FN(STP_ISSYNC_1), - - /* Port130 */ - GPIO_FN(STP_ISSYNC_0), - GPIO_FN(PDM4_DATA_130), - GPIO_FN(MSIOF2_RXD), - GPIO_FN(SIM0_VOLTSEL1), - - /* Port131 */ - GPIO_FN(STP_OPWM_0), - GPIO_FN(SIM1_PWRON), - - /* Port132 */ - GPIO_FN(TS_SCK), - GPIO_FN(MSIOF7_SCK), - GPIO_FN(STP_ISCLK_1), - - /* Port133 */ - GPIO_FN(STP_ISCLK_0), - GPIO_FN(PDM1_OUTCLK_133), - GPIO_FN(MSIOF2_SCK), - GPIO_FN(SIM1_VOLTSEL0), - - /* Port134 */ - GPIO_FN(TS_SDAT), - GPIO_FN(MSIOF7_TXD), - GPIO_FN(STP_ISD_1), - - /* Port160 - Port178 */ - GPIO_FN(MMCD0_0), - GPIO_FN(MMCD0_1), - GPIO_FN(MMCD0_2), - GPIO_FN(MMCD0_3), - GPIO_FN(MMCD0_4), - GPIO_FN(MMCD0_5), - GPIO_FN(MMCD0_6), - GPIO_FN(MMCD0_7), - GPIO_FN(MMCCMD0), - GPIO_FN(MMCCLK0), - GPIO_FN(MMCRST), - - /* Port192 - Port200 FN1 */ - GPIO_FN(A10), - GPIO_FN(A9), - GPIO_FN(A8), - GPIO_FN(A7), - GPIO_FN(A6), - GPIO_FN(A5), - GPIO_FN(A4), - GPIO_FN(A3), - GPIO_FN(A2), - - /* Port192 - Port200 FN2 */ - GPIO_FN(MMCD1_7), - GPIO_FN(MMCD1_6), - GPIO_FN(MMCD1_5), - GPIO_FN(MMCD1_4), - GPIO_FN(MMCD1_3), - GPIO_FN(MMCD1_2), - GPIO_FN(MMCD1_1), - GPIO_FN(MMCD1_0), - GPIO_FN(MMCCMD1), - - /* Port192 - Port200 IRQ */ - - /* Port201 */ - GPIO_FN(A1), - - /* Port202 */ - GPIO_FN(A0), - GPIO_FN(BS), - - /* Port203 */ - GPIO_FN(CKO), - GPIO_FN(MMCCLK1), - - /* Port204 */ - GPIO_FN(CS0_N), - GPIO_FN(SIM0_GPO1), - - /* Port205 */ - GPIO_FN(CS2_N), - GPIO_FN(SIM0_GPO2), - - /* Port206 */ - GPIO_FN(CS4_N), - GPIO_FN(VIO_VD), - GPIO_FN(SIM1_GPO0), - - /* Port207 - Port212 FN1 */ - GPIO_FN(D15), - GPIO_FN(D14), - GPIO_FN(D13), - GPIO_FN(D12), - GPIO_FN(D11), - GPIO_FN(D10), - - /* Port207 - Port212 FN5 */ - GPIO_FN(GIO_OUT15), - GPIO_FN(GIO_OUT14), - GPIO_FN(GIO_OUT13), - GPIO_FN(GIO_OUT12), - GPIO_FN(WGM_TXP2), - GPIO_FN(WGM_GPS_TIMEM_ASK_RFCLK), - - /* Port213 - Port222 FN1 */ - GPIO_FN(D9), - GPIO_FN(D8), - GPIO_FN(D7), - GPIO_FN(D6), - GPIO_FN(D5), - GPIO_FN(D4), - GPIO_FN(D3), - GPIO_FN(D2), - GPIO_FN(D1), - GPIO_FN(D0), - - /* Port213 - Port222 FN2 */ - GPIO_FN(VIO_D9), - GPIO_FN(VIO_D8), - GPIO_FN(VIO_D7), - GPIO_FN(VIO_D6), - GPIO_FN(VIO_D5), - GPIO_FN(VIO_D4), - GPIO_FN(VIO_D3), - GPIO_FN(VIO_D2), - GPIO_FN(VIO_D1), - GPIO_FN(VIO_D0), - - /* Port213 - Port222 FN5 */ - GPIO_FN(GIO_OUT9), - GPIO_FN(GIO_OUT8), - GPIO_FN(GIO_OUT7), - GPIO_FN(GIO_OUT6), - GPIO_FN(GIO_OUT5_217), - GPIO_FN(GIO_OUT4_218), - GPIO_FN(GIO_OUT3_219), - GPIO_FN(GIO_OUT2_220), - GPIO_FN(GIO_OUT1_221), - GPIO_FN(GIO_OUT0_222), - - /* Port224 */ - GPIO_FN(RDWR_224), - GPIO_FN(VIO_HD), - GPIO_FN(SIM1_GPO2), - - /* Port225 */ - GPIO_FN(RD_N), - - /* Port226 */ - GPIO_FN(WAIT_N), - GPIO_FN(VIO_CLK), - GPIO_FN(SIM1_GPO1), - - /* Port227 */ - GPIO_FN(WE0_N), - GPIO_FN(RDWR_227), - - /* Port228 */ - GPIO_FN(WE1_N), - GPIO_FN(SIM0_GPO0), - - /* Port229 */ - GPIO_FN(PWMO), - GPIO_FN(VIO_CKO1_229), - - /* Port230 */ - GPIO_FN(SLIM_CLK), - GPIO_FN(VIO_CKO4_230), - - /* Port231 */ - GPIO_FN(SLIM_DATA), - GPIO_FN(VIO_CKO5_231), - - /* Port232 */ - GPIO_FN(VIO_CKO2_232), - GPIO_FN(SF_PORT_0_232), - - /* Port233 */ - GPIO_FN(VIO_CKO3_233), - GPIO_FN(SF_PORT_1_233), - - /* Port234 */ - GPIO_FN(FSIACK), - GPIO_FN(PDM3_CLK_234), - GPIO_FN(ISP_IRIS1_234), - - /* Port235 */ - GPIO_FN(FSIAISLD), - GPIO_FN(PDM3_DATA_235), - - /* Port236 */ - GPIO_FN(FSIAOMC), - GPIO_FN(PDM0_OUTCLK_236), - GPIO_FN(ISP_IRIS0_236), - - /* Port237 */ - GPIO_FN(FSIAOLR), - GPIO_FN(FSIAILR), - - /* Port238 */ - GPIO_FN(FSIAOBT), - GPIO_FN(FSIAIBT), - - /* Port239 */ - GPIO_FN(FSIAOSLD), - GPIO_FN(PDM0_OUTDATA_239), - - /* Port240 */ - GPIO_FN(FSIBISLD), - - /* Port241 */ - GPIO_FN(FSIBOLR), - GPIO_FN(FSIBILR), - - /* Port242 */ - GPIO_FN(FSIBOMC), - GPIO_FN(ISP_SHUTTER1_242), - - /* Port243 */ - GPIO_FN(FSIBOBT), - GPIO_FN(FSIBIBT), - - /* Port244 */ - GPIO_FN(FSIBOSLD), - GPIO_FN(FSIASPDIF), - - /* Port245 */ - GPIO_FN(FSIBCK), - GPIO_FN(ISP_SHUTTER0_245), - - /* Port246 - Port250 FN1 */ - GPIO_FN(ISP_IRIS1_246), - GPIO_FN(ISP_IRIS0_247), - GPIO_FN(ISP_SHUTTER1_248), - GPIO_FN(ISP_SHUTTER0_249), - GPIO_FN(ISP_STROBE_250), - - /* Port256 - Port258 */ - GPIO_FN(MSIOF0_SYNC), - GPIO_FN(MSIOF0_RXD), - GPIO_FN(MSIOF0_SCK), - - /* Port259 */ - GPIO_FN(MSIOF0_SS2), - GPIO_FN(VIO_CKO3_259), - - /* Port260 */ - GPIO_FN(MSIOF0_TXD), - - /* Port261 */ - GPIO_FN(CHSCIF1_HSCK), - - /* Port262 */ - - /* Port263 - Port266 FN1 */ - GPIO_FN(MSIOF1_SS2), - GPIO_FN(MSIOF1_TXD), - GPIO_FN(MSIOF1_RXD), - GPIO_FN(MSIOF1_SS1), - - /* Port263 - Port266 FN4 */ - GPIO_FN(MSIOF5_SS2), - GPIO_FN(MSIOF5_TXD), - GPIO_FN(MSIOF5_RXD), - GPIO_FN(MSIOF5_SS1), - - /* Port267 */ - GPIO_FN(MSIOF0_SS1), - - /* Port268 */ - GPIO_FN(MSIOF1_SCK), - GPIO_FN(MSIOF5_SCK), - - /* Port269 */ - GPIO_FN(MSIOF1_SYNC), - GPIO_FN(MSIOF5_SYNC), - - /* Port270 - Port273 FN1 */ - GPIO_FN(MSIOF2_SS1), - GPIO_FN(MSIOF2_SS2), - GPIO_FN(MSIOF3_SS2), - GPIO_FN(MSIOF3_SS1), - - /* Port270 - Port273 FN3 */ - GPIO_FN(VIO_CKO5_270), - GPIO_FN(VIO_CKO2_271), - GPIO_FN(VIO_CKO1_272), - GPIO_FN(VIO_CKO4_273), - - /* Port274 */ - GPIO_FN(MSIOF4_SS2), - GPIO_FN(TPU1TO0), - - /* Port275 - Port280 */ - GPIO_FN(IC_DP), - GPIO_FN(SIM0_RST), - GPIO_FN(IC_DM), - GPIO_FN(SIM0_BSICOMP), - GPIO_FN(SIM0_CLK), - GPIO_FN(SIM0_IO), - - /* Port281 */ - GPIO_FN(SIM1_IO), - GPIO_FN(PDM2_DATA_281), - - /* Port282 */ - GPIO_FN(SIM1_CLK), - GPIO_FN(PDM2_CLK_282), - - /* Port283 */ - GPIO_FN(SIM1_RST), - - /* Port289 */ - GPIO_FN(SDHID1_0), - GPIO_FN(STMDATA0_2), - - /* Port290 */ - GPIO_FN(SDHID1_1), - GPIO_FN(STMDATA1_2), - - /* Port291 - Port294 FN1 */ - GPIO_FN(SDHID1_2), - GPIO_FN(SDHID1_3), - GPIO_FN(SDHICLK1), - GPIO_FN(SDHICMD1), - - /* Port291 - Port294 FN3 */ - GPIO_FN(STMDATA2_2), - GPIO_FN(STMDATA3_2), - GPIO_FN(STMCLK_2), - GPIO_FN(STMSIDI_2), - - /* Port295 */ - GPIO_FN(SDHID2_0), - GPIO_FN(MSIOF4_TXD), - GPIO_FN(MSIOF6_TXD), - - /* Port296 */ - GPIO_FN(SDHID2_1), - GPIO_FN(MSIOF6_SS2), - - /* Port297 - Port300 FN1 */ - GPIO_FN(SDHID2_2), - GPIO_FN(SDHID2_3), - GPIO_FN(SDHICLK2), - GPIO_FN(SDHICMD2), - - /* Port297 - Port300 FN2 */ - GPIO_FN(MSIOF4_RXD), - GPIO_FN(MSIOF4_SYNC), - GPIO_FN(MSIOF4_SCK), - GPIO_FN(MSIOF4_SS1), - - /* Port297 - Port300 FN3 */ - - /* Port297 - Port300 FN4 */ - GPIO_FN(MSIOF6_RXD), - GPIO_FN(MSIOF6_SYNC), - GPIO_FN(MSIOF6_SCK), - GPIO_FN(MSIOF6_SS1), - - /* Port301 */ - GPIO_FN(SDHICD0), - - /* Port302 - Port306 FN1 */ - GPIO_FN(SDHID0_0), - GPIO_FN(SDHID0_1), - GPIO_FN(SDHID0_2), - GPIO_FN(SDHID0_3), - GPIO_FN(SDHICMD0), - - /* Port302 - Port306 FN3 */ - GPIO_FN(STMDATA0_1), - GPIO_FN(STMDATA1_1), - GPIO_FN(STMDATA2_1), - GPIO_FN(STMDATA3_1), - GPIO_FN(STMSIDI_1), - - /* Port307 */ - GPIO_FN(SDHIWP0), - - /* Port308 */ - GPIO_FN(SDHICLK0), - GPIO_FN(STMCLK_1), - - /* Port320 - Port329 */ -}; - static const struct pinmux_cfg_reg pinmux_config_regs[] = { PORTCR(0, 0xe6050000), @@ -3456,9 +2722,6 @@ const struct sh_pfc_soc_info r8a73a4_pinmux_info = { .functions = pinmux_functions, .nr_functions = ARRAY_SIZE(pinmux_functions), - .func_gpios = pinmux_func_gpios, - .nr_func_gpios = ARRAY_SIZE(pinmux_func_gpios), - .cfg_regs = pinmux_config_regs, .data_regs = pinmux_data_regs, -- GitLab From 17924ac1b9d42da19320b35daa0fe6ccddd3c3c2 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Mar 2013 22:51:43 +0900 Subject: [PATCH 0379/3163] ARM: shmobile: r8a73a4: Remove all GPIO enums Function GPIOs are not used anymore, and all code use the GPIO numbers directly. Remove the GPIOs enumeration. Signed-off-by: Magnus Damm Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/include/mach/r8a73a4.h | 815 ------------------ 1 file changed, 815 deletions(-) diff --git a/arch/arm/mach-shmobile/include/mach/r8a73a4.h b/arch/arm/mach-shmobile/include/mach/r8a73a4.h index 9162e8645110..f043103e32c9 100644 --- a/arch/arm/mach-shmobile/include/mach/r8a73a4.h +++ b/arch/arm/mach-shmobile/include/mach/r8a73a4.h @@ -1,821 +1,6 @@ #ifndef __ASM_R8A73A4_H__ #define __ASM_R8A73A4_H__ -/* - * Pin Function Controller: - * GPIO_FN_xx - GPIO used to select pin function - * GPIO_PORTxx - GPIO mapped to real I/O pin on CPU - */ -enum { - - /* PORT */ - GPIO_PORT0, GPIO_PORT1, GPIO_PORT2, GPIO_PORT3, GPIO_PORT4, - GPIO_PORT5, GPIO_PORT6, GPIO_PORT7, GPIO_PORT8, GPIO_PORT9, - - GPIO_PORT10, GPIO_PORT11, GPIO_PORT12, GPIO_PORT13, GPIO_PORT14, - GPIO_PORT15, GPIO_PORT16, GPIO_PORT17, GPIO_PORT18, GPIO_PORT19, - - GPIO_PORT20, GPIO_PORT21, GPIO_PORT22, GPIO_PORT23, GPIO_PORT24, - GPIO_PORT25, GPIO_PORT26, GPIO_PORT27, GPIO_PORT28, GPIO_PORT29, - - GPIO_PORT30, GPIO_PORT32, GPIO_PORT33, GPIO_PORT34, - GPIO_PORT35, GPIO_PORT36, GPIO_PORT37, GPIO_PORT38, GPIO_PORT39, - - GPIO_PORT40, GPIO_PORT64, - GPIO_PORT65, GPIO_PORT66, GPIO_PORT67, GPIO_PORT68, GPIO_PORT69, - - GPIO_PORT70, GPIO_PORT71, GPIO_PORT72, GPIO_PORT73, GPIO_PORT74, - GPIO_PORT75, GPIO_PORT76, GPIO_PORT77, GPIO_PORT78, GPIO_PORT79, - - GPIO_PORT80, GPIO_PORT81, GPIO_PORT82, GPIO_PORT83, GPIO_PORT84, - GPIO_PORT85, GPIO_PORT96, GPIO_PORT97, GPIO_PORT98, GPIO_PORT99, - - GPIO_PORT100, GPIO_PORT101, GPIO_PORT102, GPIO_PORT103, GPIO_PORT104, - GPIO_PORT105, GPIO_PORT106, GPIO_PORT107, GPIO_PORT108, GPIO_PORT109, - - GPIO_PORT110, GPIO_PORT111, GPIO_PORT112, GPIO_PORT113, GPIO_PORT114, - GPIO_PORT115, GPIO_PORT116, GPIO_PORT117, GPIO_PORT118, GPIO_PORT119, - - GPIO_PORT120, GPIO_PORT121, GPIO_PORT122, GPIO_PORT123, GPIO_PORT124, - GPIO_PORT125, GPIO_PORT126, GPIO_PORT128, GPIO_PORT129, - - GPIO_PORT130, GPIO_PORT131, GPIO_PORT132, GPIO_PORT133, GPIO_PORT134, - - GPIO_PORT160, GPIO_PORT161, GPIO_PORT162, GPIO_PORT163, GPIO_PORT164, - GPIO_PORT165, GPIO_PORT166, GPIO_PORT167, GPIO_PORT168, GPIO_PORT169, - - GPIO_PORT170, GPIO_PORT171, GPIO_PORT172, GPIO_PORT173, GPIO_PORT174, - GPIO_PORT175, GPIO_PORT176, GPIO_PORT177, GPIO_PORT178, - - GPIO_PORT192, GPIO_PORT193, GPIO_PORT194, - GPIO_PORT195, GPIO_PORT196, GPIO_PORT197, GPIO_PORT198, GPIO_PORT199, - - GPIO_PORT200, GPIO_PORT201, GPIO_PORT202, GPIO_PORT203, GPIO_PORT204, - GPIO_PORT205, GPIO_PORT206, GPIO_PORT207, GPIO_PORT208, GPIO_PORT209, - - GPIO_PORT210, GPIO_PORT211, GPIO_PORT212, GPIO_PORT213, GPIO_PORT214, - GPIO_PORT215, GPIO_PORT216, GPIO_PORT217, GPIO_PORT218, GPIO_PORT219, - - GPIO_PORT220, GPIO_PORT221, GPIO_PORT222, GPIO_PORT224, - GPIO_PORT225, GPIO_PORT226, GPIO_PORT227, GPIO_PORT228, GPIO_PORT229, - - GPIO_PORT230, GPIO_PORT231, GPIO_PORT232, GPIO_PORT233, GPIO_PORT234, - GPIO_PORT235, GPIO_PORT236, GPIO_PORT237, GPIO_PORT238, GPIO_PORT239, - - GPIO_PORT240, GPIO_PORT241, GPIO_PORT242, GPIO_PORT243, GPIO_PORT244, - GPIO_PORT245, GPIO_PORT246, GPIO_PORT247, GPIO_PORT248, GPIO_PORT249, - - GPIO_PORT250, GPIO_PORT256, GPIO_PORT257, GPIO_PORT258, GPIO_PORT259, - - GPIO_PORT260, GPIO_PORT261, GPIO_PORT262, GPIO_PORT263, GPIO_PORT264, - GPIO_PORT265, GPIO_PORT266, GPIO_PORT267, GPIO_PORT268, GPIO_PORT269, - - GPIO_PORT270, GPIO_PORT271, GPIO_PORT272, GPIO_PORT273, GPIO_PORT274, - GPIO_PORT275, GPIO_PORT276, GPIO_PORT277, GPIO_PORT278, GPIO_PORT279, - - GPIO_PORT280, GPIO_PORT281, GPIO_PORT282, GPIO_PORT283, - GPIO_PORT288, GPIO_PORT289, - - GPIO_PORT290, GPIO_PORT291, GPIO_PORT292, GPIO_PORT293, GPIO_PORT294, - GPIO_PORT295, GPIO_PORT296, GPIO_PORT297, GPIO_PORT298, GPIO_PORT299, - - GPIO_PORT300, GPIO_PORT301, GPIO_PORT302, GPIO_PORT303, GPIO_PORT304, - GPIO_PORT305, GPIO_PORT306, GPIO_PORT307, GPIO_PORT308, - - GPIO_PORT320, GPIO_PORT321, GPIO_PORT322, GPIO_PORT323, GPIO_PORT324, - GPIO_PORT325, GPIO_PORT326, GPIO_PORT327, GPIO_PORT328, GPIO_PORT329, - - /* Port0 */ - GPIO_FN_LCDD0 = 330, - GPIO_FN_PDM2_CLK_0, - GPIO_FN_DU0_DR0, - - /* Port1 */ - GPIO_FN_LCDD1, - GPIO_FN_PDM2_DATA_1, - GPIO_FN_DU0_DR19, - - /* Port2 */ - GPIO_FN_LCDD2, - GPIO_FN_PDM3_CLK_2, - GPIO_FN_DU0_DR2, - - /* Port3 */ - GPIO_FN_LCDD3, - GPIO_FN_PDM3_DATA_3, - GPIO_FN_DU0_DR3, - - /* Port4 */ - GPIO_FN_LCDD4, - GPIO_FN_PDM4_CLK_4, - GPIO_FN_DU0_DR4, - - /* Port5 */ - GPIO_FN_LCDD5, - GPIO_FN_PDM4_DATA_5, - GPIO_FN_DU0_DR5, - - /* Port6 */ - GPIO_FN_LCDD6, - GPIO_FN_PDM0_OUTCLK_6, - GPIO_FN_DU0_DR6, - - /* Port7 */ - GPIO_FN_LCDD7, - GPIO_FN_PDM0_OUTDATA_7, - GPIO_FN_DU0_DR7, - - /* Port8 */ - GPIO_FN_LCDD8, - GPIO_FN_PDM1_OUTCLK_8, - GPIO_FN_DU0_DG0, - - /* Port9 */ - GPIO_FN_LCDD9, - GPIO_FN_PDM1_OUTDATA_9, - GPIO_FN_DU0_DG1, - - /* Port10 */ - GPIO_FN_LCDD10, - GPIO_FN_FSICCK, - GPIO_FN_DU0_DG2, - - /* Port11 */ - GPIO_FN_LCDD11, - GPIO_FN_FSICISLD, - GPIO_FN_DU0_DG3, - - /* Port12 */ - GPIO_FN_LCDD12, - GPIO_FN_FSICOMC, - GPIO_FN_DU0_DG4, - - /* Port13 */ - GPIO_FN_LCDD13, - GPIO_FN_FSICOLR, - GPIO_FN_FSICILR, - GPIO_FN_DU0_DG5, - - /* Port14 */ - GPIO_FN_LCDD14, - GPIO_FN_FSICOBT, - GPIO_FN_FSICIBT, - GPIO_FN_DU0_DG6, - - /* Port15 */ - GPIO_FN_LCDD15, - GPIO_FN_FSICOSLD, - GPIO_FN_DU0_DG7, - - /* Port16 */ - GPIO_FN_LCDD16, - GPIO_FN_TPU1TO1, - GPIO_FN_DU0_DB0, - - /* Port17 */ - GPIO_FN_LCDD17, - GPIO_FN_SF_IRQ_00, - GPIO_FN_DU0_DB1, - - /* Port18 */ - GPIO_FN_LCDD18, - GPIO_FN_SF_IRQ_01, - GPIO_FN_DU0_DB2, - - /* Port19 */ - GPIO_FN_LCDD19, - GPIO_FN_DU0_DB3, - - /* Port20 */ - GPIO_FN_LCDD20, - GPIO_FN_DU0_DB4, - - /* Port21 */ - GPIO_FN_LCDD21, - GPIO_FN_DU0_DB5, - - /* Port22 */ - GPIO_FN_LCDD22, - GPIO_FN_DU0_DB6, - - /* Port23 */ - GPIO_FN_LCDD23, - GPIO_FN_DU0_DB7, - - /* Port24 */ - GPIO_FN_LCDHSYN, - GPIO_FN_LCDCS, - GPIO_FN_DU0_EXHSYNC_N_CSYNC_N_HSYNC_N, - - /* Port25 */ - GPIO_FN_LCDVSYN, - GPIO_FN_DU0_EXVSYNC_N_VSYNC_N_CSYNC_N, - - /* Port26 */ - GPIO_FN_LCDDCK, - GPIO_FN_LCDWR, - GPIO_FN_DU0_DOTCLKIN, - - /* Port27 */ - GPIO_FN_LCDDISP, - GPIO_FN_LCDRS, - GPIO_FN_DU0_DOTCLKOUT, - - /* Port28 */ - GPIO_FN_LCDRD_N, - GPIO_FN_DU0_DOTCLKOUTB, - - /* Port29 */ - GPIO_FN_LCDLCLK, - GPIO_FN_SF_IRQ_02, - GPIO_FN_DU0_DISP_CSYNC_N_DE, - - /* Port30 */ - GPIO_FN_LCDDON, - GPIO_FN_SF_IRQ_03, - GPIO_FN_DU0_ODDF_N_CLAMP, - - /* Port32 */ - GPIO_FN_SIM0_DET, - GPIO_FN_CSCIF0_RTS, - - /* Port33 */ - GPIO_FN_SIM1_DET, - GPIO_FN_CSCIF0_CTS, - - /* Port34 */ - GPIO_FN_SIM0_PWRON, - GPIO_FN_CSCIF0_SCK, - - /* Port35 */ - GPIO_FN_CSCIF1_RTS, - - /* Port36 */ - GPIO_FN_CSCIF1_CTS, - - /* Port37 */ - GPIO_FN_CSCIF1_SCK, - - /* Port38 */ - GPIO_FN_TPU0TO1, - GPIO_FN_CHSCIF0_HRTS, - - /* Port39 */ - GPIO_FN_TPU0TO2, - GPIO_FN_CHSCIF0_HCTS, - - /* Port40 */ - GPIO_FN_TPU0TO3, - GPIO_FN_CHSCIF0_HSCK, - - /* Port64 */ - GPIO_FN_PDM0_DATA, - - /* Port65 */ - GPIO_FN_PDM1_DATA, - - /* Port66 */ - GPIO_FN_HSI_RX_WAKE, - GPIO_FN_MSIOF3_SYNC, - GPIO_FN_GenIO4, - - /* Port67 */ - GPIO_FN_HSI_RX_READY, - GPIO_FN_GIO_OUT3_67, - GPIO_FN_CHSCIF1_HTX, - - /* Port68 */ - GPIO_FN_HSI_RX_FLAG, - GPIO_FN_MSIOF3_TXD, - GPIO_FN_GIO_OUT4_68, - - /* Port69 */ - GPIO_FN_HSI_RX_DATA, - GPIO_FN_MSIOF3_RXD, - GPIO_FN_GIO_OUT5_69, - - /* Port70 */ - GPIO_FN_HSI_TX_FLAG, - GPIO_FN_GIO_OUT1_70, - GPIO_FN_HSIC_TSTCLK0, - GPIO_FN_CHSCIF1_HRTS, - - /* Port71 */ - GPIO_FN_HSI_TX_DATA, - GPIO_FN_GIO_OUT2_71, - GPIO_FN_HSIC_TSTCLK1, - GPIO_FN_CHSCIF1_HCTS, - - /* Port72 */ - GPIO_FN_HSI_TX_WAKE, - GPIO_FN_GenIO8, - GPIO_FN_CHSCIF1_HRX, - - /* Port73 */ - GPIO_FN_HSI_TX_READY, - GPIO_FN_MSIOF3_SCK, - GPIO_FN_GIO_OUT0_73, - - /* Port74 - Port85 */ - GPIO_FN_IRDA_OUT, - GPIO_FN_IRDA_IN, - GPIO_FN_IRDA_FIRSEL, - GPIO_FN_TPU0TO0, - GPIO_FN_DIGRFEN, - GPIO_FN_GPS_TIMESTAMP, - GPIO_FN_TXP, - GPIO_FN_TXP2, - GPIO_FN_COEX_0, - GPIO_FN_COEX_1, - - /* Port96 - Port101 */ - GPIO_FN_KEYIN0, - GPIO_FN_KEYIN1, - GPIO_FN_KEYIN2, - GPIO_FN_KEYIN3, - GPIO_FN_KEYIN4, - GPIO_FN_KEYIN5, - - /* Port102 */ - GPIO_FN_KEYIN6, - - /* Port103 */ - GPIO_FN_KEYIN7, - - /* Port104 - Port108 */ - GPIO_FN_KEYOUT0, - GPIO_FN_KEYOUT1, - GPIO_FN_KEYOUT2, - GPIO_FN_KEYOUT3, - GPIO_FN_KEYOUT4, - - /* Port109 */ - GPIO_FN_KEYOUT5, - - /* Port110 */ - GPIO_FN_KEYOUT6, - - /* Port111 */ - GPIO_FN_KEYOUT7, - GPIO_FN_RFANAEN, - - /* Port112 */ - GPIO_FN_KEYIN8, - GPIO_FN_KEYOUT8, - GPIO_FN_SF_IRQ_04, - - /* Port113 */ - GPIO_FN_KEYIN9, - GPIO_FN_KEYOUT9, - GPIO_FN_SF_IRQ_05, - - /* Port114 */ - GPIO_FN_KEYIN10, - GPIO_FN_KEYOUT10, - GPIO_FN_SF_IRQ_06, - - /* Port115 */ - GPIO_FN_KEYIN11, - GPIO_FN_KEYOUT11, - GPIO_FN_SF_IRQ_07, - - /* Port116 */ - GPIO_FN_CSCIF0_TX, - - /* Port117 */ - GPIO_FN_CSCIF0_RX, - - /* Port118 */ - GPIO_FN_CSCIF1_TX, - - /* Port119 */ - GPIO_FN_CSCIF1_RX, - - /* Port120 */ - GPIO_FN_SF_PORT_1_120, - GPIO_FN_DU0_CDE, - - /* Port121 */ - GPIO_FN_SF_PORT_0_121, - - /* Port122 */ - GPIO_FN_CHSCIF0_HTX, - - /* Port123 */ - GPIO_FN_CHSCIF0_HRX, - - /* Port124 */ - GPIO_FN_ISP_STROBE_124, - - /* Port125 */ - GPIO_FN_STP_ISD_0, - GPIO_FN_PDM4_CLK_125, - GPIO_FN_MSIOF2_TXD, - GPIO_FN_SIM0_VOLTSEL0, - - /* Port126 */ - GPIO_FN_TS_SDEN, - GPIO_FN_MSIOF7_SYNC, - GPIO_FN_STP_ISEN_1, - - /* Port128 */ - GPIO_FN_STP_ISEN_0, - GPIO_FN_PDM1_OUTDATA_128, - GPIO_FN_MSIOF2_SYNC, - GPIO_FN_SIM1_VOLTSEL1, - - /* Port129 */ - GPIO_FN_TS_SPSYNC, - GPIO_FN_MSIOF7_RXD, - GPIO_FN_STP_ISSYNC_1, - - /* Port130 */ - GPIO_FN_STP_ISSYNC_0, - GPIO_FN_PDM4_DATA_130, - GPIO_FN_MSIOF2_RXD, - GPIO_FN_SIM0_VOLTSEL1, - - /* Port131 */ - GPIO_FN_STP_OPWM_0, - GPIO_FN_SIM1_PWRON, - - /* Port132 */ - GPIO_FN_TS_SCK, - GPIO_FN_MSIOF7_SCK, - GPIO_FN_STP_ISCLK_1, - - /* Port133 */ - GPIO_FN_STP_ISCLK_0, - GPIO_FN_PDM1_OUTCLK_133, - GPIO_FN_MSIOF2_SCK, - GPIO_FN_SIM1_VOLTSEL0, - - /* Port134 */ - GPIO_FN_TS_SDAT, - GPIO_FN_MSIOF7_TXD, - GPIO_FN_STP_ISD_1, - - /* Port160 - Port178 */ - GPIO_FN_MMCD0_0, - GPIO_FN_MMCD0_1, - GPIO_FN_MMCD0_2, - GPIO_FN_MMCD0_3, - GPIO_FN_MMCD0_4, - GPIO_FN_MMCD0_5, - GPIO_FN_MMCD0_6, - GPIO_FN_MMCD0_7, - GPIO_FN_MMCCMD0, - GPIO_FN_MMCCLK0, - GPIO_FN_MMCRST, - - /* Port192 - Port200 FN1 */ - GPIO_FN_A10, - GPIO_FN_A9, - GPIO_FN_A8, - GPIO_FN_A7, - GPIO_FN_A6, - GPIO_FN_A5, - GPIO_FN_A4, - GPIO_FN_A3, - GPIO_FN_A2, - - /* Port192 - Port200 FN2 */ - GPIO_FN_MMCD1_7, - GPIO_FN_MMCD1_6, - GPIO_FN_MMCD1_5, - GPIO_FN_MMCD1_4, - GPIO_FN_MMCD1_3, - GPIO_FN_MMCD1_2, - GPIO_FN_MMCD1_1, - GPIO_FN_MMCD1_0, - GPIO_FN_MMCCMD1, - - /* Port192 - Port200 IRQ */ - - /* Port201 */ - GPIO_FN_A1, - - /* Port202 */ - GPIO_FN_A0, - GPIO_FN_BS, - - /* Port203 */ - GPIO_FN_CKO, - GPIO_FN_MMCCLK1, - - /* Port204 */ - GPIO_FN_CS0_N, - GPIO_FN_SIM0_GPO1, - - /* Port205 */ - GPIO_FN_CS2_N, - GPIO_FN_SIM0_GPO2, - - /* Port206 */ - GPIO_FN_CS4_N, - GPIO_FN_VIO_VD, - GPIO_FN_SIM1_GPO0, - - /* Port207 - Port212 FN1 */ - GPIO_FN_D15, - GPIO_FN_D14, - GPIO_FN_D13, - GPIO_FN_D12, - GPIO_FN_D11, - GPIO_FN_D10, - - /* Port207 - Port212 FN5 */ - GPIO_FN_GIO_OUT15, - GPIO_FN_GIO_OUT14, - GPIO_FN_GIO_OUT13, - GPIO_FN_GIO_OUT12, - GPIO_FN_WGM_TXP2, - GPIO_FN_WGM_GPS_TIMEM_ASK_RFCLK, - - /* Port213 - Port222 FN1 */ - GPIO_FN_D9, - GPIO_FN_D8, - GPIO_FN_D7, - GPIO_FN_D6, - GPIO_FN_D5, - GPIO_FN_D4, - GPIO_FN_D3, - GPIO_FN_D2, - GPIO_FN_D1, - GPIO_FN_D0, - - /* Port213 - Port222 FN2 */ - GPIO_FN_VIO_D9, - GPIO_FN_VIO_D8, - GPIO_FN_VIO_D7, - GPIO_FN_VIO_D6, - GPIO_FN_VIO_D5, - GPIO_FN_VIO_D4, - GPIO_FN_VIO_D3, - GPIO_FN_VIO_D2, - GPIO_FN_VIO_D1, - GPIO_FN_VIO_D0, - - /* Port213 - Port222 FN5 */ - GPIO_FN_GIO_OUT9, - GPIO_FN_GIO_OUT8, - GPIO_FN_GIO_OUT7, - GPIO_FN_GIO_OUT6, - GPIO_FN_GIO_OUT5_217, - GPIO_FN_GIO_OUT4_218, - GPIO_FN_GIO_OUT3_219, - GPIO_FN_GIO_OUT2_220, - GPIO_FN_GIO_OUT1_221, - GPIO_FN_GIO_OUT0_222, - - /* Port224 */ - GPIO_FN_RDWR_224, - GPIO_FN_VIO_HD, - GPIO_FN_SIM1_GPO2, - - /* Port225 */ - GPIO_FN_RD_N, - - /* Port226 */ - GPIO_FN_WAIT_N, - GPIO_FN_VIO_CLK, - GPIO_FN_SIM1_GPO1, - - /* Port227 */ - GPIO_FN_WE0_N, - GPIO_FN_RDWR_227, - - /* Port228 */ - GPIO_FN_WE1_N, - GPIO_FN_SIM0_GPO0, - - /* Port229 */ - GPIO_FN_PWMO, - GPIO_FN_VIO_CKO1_229, - - /* Port230 */ - GPIO_FN_SLIM_CLK, - GPIO_FN_VIO_CKO4_230, - - /* Port231 */ - GPIO_FN_SLIM_DATA, - GPIO_FN_VIO_CKO5_231, - - /* Port232 */ - GPIO_FN_VIO_CKO2_232, - GPIO_FN_SF_PORT_0_232, - - /* Port233 */ - GPIO_FN_VIO_CKO3_233, - GPIO_FN_SF_PORT_1_233, - - /* Port234 */ - GPIO_FN_FSIACK, - GPIO_FN_PDM3_CLK_234, - GPIO_FN_ISP_IRIS1_234, - - /* Port235 */ - GPIO_FN_FSIAISLD, - GPIO_FN_PDM3_DATA_235, - - /* Port236 */ - GPIO_FN_FSIAOMC, - GPIO_FN_PDM0_OUTCLK_236, - GPIO_FN_ISP_IRIS0_236, - - /* Port237 */ - GPIO_FN_FSIAOLR, - GPIO_FN_FSIAILR, - - /* Port238 */ - GPIO_FN_FSIAOBT, - GPIO_FN_FSIAIBT, - - /* Port239 */ - GPIO_FN_FSIAOSLD, - GPIO_FN_PDM0_OUTDATA_239, - - /* Port240 */ - GPIO_FN_FSIBISLD, - - /* Port241 */ - GPIO_FN_FSIBOLR, - GPIO_FN_FSIBILR, - - /* Port242 */ - GPIO_FN_FSIBOMC, - GPIO_FN_ISP_SHUTTER1_242, - - /* Port243 */ - GPIO_FN_FSIBOBT, - GPIO_FN_FSIBIBT, - - /* Port244 */ - GPIO_FN_FSIBOSLD, - GPIO_FN_FSIASPDIF, - - /* Port245 */ - GPIO_FN_FSIBCK, - GPIO_FN_ISP_SHUTTER0_245, - - /* Port246 - Port250 FN1 */ - GPIO_FN_ISP_IRIS1_246, - GPIO_FN_ISP_IRIS0_247, - GPIO_FN_ISP_SHUTTER1_248, - GPIO_FN_ISP_SHUTTER0_249, - GPIO_FN_ISP_STROBE_250, - - /* Port256 - Port258 */ - GPIO_FN_MSIOF0_SYNC, - GPIO_FN_MSIOF0_RXD, - GPIO_FN_MSIOF0_SCK, - - /* Port259 */ - GPIO_FN_MSIOF0_SS2, - GPIO_FN_VIO_CKO3_259, - - /* Port260 */ - GPIO_FN_MSIOF0_TXD, - - /* Port261 */ - GPIO_FN_CHSCIF1_HSCK, - - /* Port262 */ - - /* Port263 - Port266 FN1 */ - GPIO_FN_MSIOF1_SS2, - GPIO_FN_MSIOF1_TXD, - GPIO_FN_MSIOF1_RXD, - GPIO_FN_MSIOF1_SS1, - - /* Port263 - Port266 FN4 */ - GPIO_FN_MSIOF5_SS2, - GPIO_FN_MSIOF5_TXD, - GPIO_FN_MSIOF5_RXD, - GPIO_FN_MSIOF5_SS1, - - /* Port267 */ - GPIO_FN_MSIOF0_SS1, - - /* Port268 */ - GPIO_FN_MSIOF1_SCK, - GPIO_FN_MSIOF5_SCK, - - /* Port269 */ - GPIO_FN_MSIOF1_SYNC, - GPIO_FN_MSIOF5_SYNC, - - /* Port270 - Port273 FN1 */ - GPIO_FN_MSIOF2_SS1, - GPIO_FN_MSIOF2_SS2, - GPIO_FN_MSIOF3_SS2, - GPIO_FN_MSIOF3_SS1, - - /* Port270 - Port273 FN3 */ - GPIO_FN_VIO_CKO5_270, - GPIO_FN_VIO_CKO2_271, - GPIO_FN_VIO_CKO1_272, - GPIO_FN_VIO_CKO4_273, - - /* Port274 */ - GPIO_FN_MSIOF4_SS2, - GPIO_FN_TPU1TO0, - - /* Port275 - Port280 */ - GPIO_FN_IC_DP, - GPIO_FN_SIM0_RST, - GPIO_FN_IC_DM, - GPIO_FN_SIM0_BSICOMP, - GPIO_FN_SIM0_CLK, - GPIO_FN_SIM0_IO, - - /* Port281 */ - GPIO_FN_SIM1_IO, - GPIO_FN_PDM2_DATA_281, - - /* Port282 */ - GPIO_FN_SIM1_CLK, - GPIO_FN_PDM2_CLK_282, - - /* Port283 */ - GPIO_FN_SIM1_RST, - - /* Port289 */ - GPIO_FN_SDHID1_0, - GPIO_FN_STMDATA0_2, - - /* Port290 */ - GPIO_FN_SDHID1_1, - GPIO_FN_STMDATA1_2, - - /* Port291 - Port294 FN1 */ - GPIO_FN_SDHID1_2, - GPIO_FN_SDHID1_3, - GPIO_FN_SDHICLK1, - GPIO_FN_SDHICMD1, - - /* Port291 - Port294 FN3 */ - GPIO_FN_STMDATA2_2, - GPIO_FN_STMDATA3_2, - GPIO_FN_STMCLK_2, - GPIO_FN_STMSIDI_2, - - /* Port295 */ - GPIO_FN_SDHID2_0, - GPIO_FN_MSIOF4_TXD, - GPIO_FN_MSIOF6_TXD, - - /* Port296 */ - GPIO_FN_SDHID2_1, - GPIO_FN_MSIOF6_SS2, - - /* Port297 - Port300 FN1 */ - GPIO_FN_SDHID2_2, - GPIO_FN_SDHID2_3, - GPIO_FN_SDHICLK2, - GPIO_FN_SDHICMD2, - - /* Port297 - Port300 FN2 */ - GPIO_FN_MSIOF4_RXD, - GPIO_FN_MSIOF4_SYNC, - GPIO_FN_MSIOF4_SCK, - GPIO_FN_MSIOF4_SS1, - - /* Port297 - Port300 FN3 */ - - /* Port297 - Port300 FN4 */ - GPIO_FN_MSIOF6_RXD, - GPIO_FN_MSIOF6_SYNC, - GPIO_FN_MSIOF6_SCK, - GPIO_FN_MSIOF6_SS1, - - /* Port301 */ - GPIO_FN_SDHICD0, - - /* Port302 - Port306 FN1 */ - GPIO_FN_SDHID0_0, - GPIO_FN_SDHID0_1, - GPIO_FN_SDHID0_2, - GPIO_FN_SDHID0_3, - GPIO_FN_SDHICMD0, - - /* Port302 - Port306 FN3 */ - GPIO_FN_STMDATA0_1, - GPIO_FN_STMDATA1_1, - GPIO_FN_STMDATA2_1, - GPIO_FN_STMDATA3_1, - GPIO_FN_STMSIDI_1, - - /* Port307 */ - GPIO_FN_SDHIWP0, - - /* Port308 */ - GPIO_FN_SDHICLK0, - GPIO_FN_STMCLK_1, - - /* Port320 - Port329 */ -}; - void r8a73a4_add_standard_devices(void); void r8a73a4_clock_init(void); void r8a73a4_pinmux_init(void); -- GitLab From 202ac6a21a79500ef5aab4cd8665be2597e9345c Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Mar 2013 22:51:53 +0900 Subject: [PATCH 0380/3163] sh-pfc: r8a73a4: Remove unused GPIO bias data Remove unused pull-up/down data from the r8a73a4 PFC code. Signed-off-by: Magnus Damm Acked-by: Linus Walleij Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman --- drivers/pinctrl/sh-pfc/pfc-r8a73a4.c | 176 +++------------------------ 1 file changed, 15 insertions(+), 161 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c index 464c5f9ce9fa..bbff5596e922 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a73a4.c @@ -115,16 +115,6 @@ enum { PORT_ALL(IN), PINMUX_INPUT_END, - /* PORT0_IN_PU -> PORT329_IN_PU */ - PINMUX_INPUT_PULLUP_BEGIN, - PORT_ALL(IN_PU), - PINMUX_INPUT_PULLUP_END, - - /* PORT0_IN_PD -> PORT329_IN_PD */ - PINMUX_INPUT_PULLDOWN_BEGIN, - PORT_ALL(IN_PD), - PINMUX_INPUT_PULLDOWN_END, - /* PORT0_OUT -> PORT329_OUT */ PINMUX_OUTPUT_BEGIN, PORT_ALL(OUT), @@ -438,156 +428,12 @@ enum { PINMUX_MARK_END, }; +#define _PORT_DATA(pfx, sfx) PORT_DATA_IO(pfx) +#define PINMUX_DATA_ALL() CPU_ALL_PORT(_PORT_DATA, , unused) + static const pinmux_enum_t pinmux_data[] = { /* specify valid pin states for each pin in GPIO mode */ - - PORT_DATA_IO_PU_PD(0), PORT_DATA_IO_PU_PD(1), - PORT_DATA_IO_PU_PD(2), PORT_DATA_IO_PU_PD(3), - PORT_DATA_IO_PU_PD(4), PORT_DATA_IO_PU_PD(5), - PORT_DATA_IO_PU_PD(6), PORT_DATA_IO_PU_PD(7), - PORT_DATA_IO_PU_PD(8), PORT_DATA_IO_PU_PD(9), - - PORT_DATA_IO_PU_PD(10), PORT_DATA_IO_PU_PD(11), - PORT_DATA_IO_PU_PD(12), PORT_DATA_IO_PU_PD(13), - PORT_DATA_IO_PU_PD(14), PORT_DATA_IO_PU_PD(15), - PORT_DATA_IO_PU_PD(16), PORT_DATA_IO_PU_PD(17), - PORT_DATA_IO_PU_PD(18), PORT_DATA_IO_PU_PD(19), - - PORT_DATA_IO_PU_PD(20), PORT_DATA_IO_PU_PD(21), - PORT_DATA_IO_PU_PD(22), PORT_DATA_IO_PU_PD(23), - PORT_DATA_IO_PU_PD(24), PORT_DATA_IO_PU_PD(25), - PORT_DATA_IO_PU_PD(26), PORT_DATA_IO_PU_PD(27), - PORT_DATA_IO_PU_PD(28), PORT_DATA_IO_PU_PD(29), - - PORT_DATA_IO_PU_PD(30), PORT_DATA_IO_PU_PD(32), - PORT_DATA_IO_PU_PD(33), PORT_DATA_IO_PU_PD(34), - PORT_DATA_IO_PU_PD(35), PORT_DATA_IO_PU_PD(36), - PORT_DATA_IO_PU_PD(37), PORT_DATA_IO_PU_PD(38), - PORT_DATA_IO_PU_PD(39), PORT_DATA_IO_PU_PD(40), - - PORT_DATA_IO_PU_PD(64), PORT_DATA_IO_PU_PD(65), - PORT_DATA_IO_PU_PD(66), PORT_DATA_IO_PU_PD(67), - PORT_DATA_IO_PU_PD(68), PORT_DATA_IO_PU_PD(69), - - PORT_DATA_IO_PU_PD(70), PORT_DATA_IO_PU_PD(71), - PORT_DATA_IO_PU_PD(72), PORT_DATA_IO_PU_PD(73), - PORT_DATA_O(74), PORT_DATA_IO_PU_PD(75), - PORT_DATA_IO_PU_PD(76), PORT_DATA_IO_PU_PD(77), - PORT_DATA_IO_PU_PD(78), PORT_DATA_IO_PU_PD(79), - - PORT_DATA_IO_PU_PD(80), PORT_DATA_IO_PU_PD(81), - PORT_DATA_IO_PU_PD(82), PORT_DATA_IO_PU_PD(83), - PORT_DATA_IO_PU_PD(84), PORT_DATA_IO_PU_PD(85), - - PORT_DATA_IO_PU_PD(96), PORT_DATA_IO_PU_PD(97), - PORT_DATA_IO_PU_PD(98), PORT_DATA_IO_PU_PD(99), - - PORT_DATA_IO_PU_PD(100), PORT_DATA_IO_PU_PD(101), - PORT_DATA_IO_PU_PD(102), PORT_DATA_IO_PU_PD(103), - PORT_DATA_IO_PU_PD(104), PORT_DATA_IO_PU_PD(105), - PORT_DATA_IO_PU_PD(106), PORT_DATA_IO_PU_PD(107), - PORT_DATA_IO_PU_PD(108), PORT_DATA_IO_PU_PD(109), - - PORT_DATA_IO_PU_PD(110), PORT_DATA_IO_PU_PD(111), - PORT_DATA_IO_PU_PD(112), PORT_DATA_IO_PU_PD(113), - PORT_DATA_IO_PU_PD(114), PORT_DATA_IO_PU_PD(115), - PORT_DATA_IO_PU_PD(116), PORT_DATA_IO_PU_PD(117), - PORT_DATA_IO_PU_PD(118), PORT_DATA_IO_PU_PD(119), - - PORT_DATA_IO_PU_PD(120), PORT_DATA_IO_PU_PD(121), - PORT_DATA_IO_PU_PD(122), PORT_DATA_IO_PU_PD(123), - PORT_DATA_IO_PU_PD(124), PORT_DATA_IO_PU_PD(125), - PORT_DATA_IO_PU_PD(126), - PORT_DATA_IO_PU_PD(128), PORT_DATA_IO_PU_PD(129), - - PORT_DATA_IO_PU_PD(130), PORT_DATA_IO_PU_PD(131), - PORT_DATA_IO_PU_PD(132), PORT_DATA_IO_PU_PD(133), - PORT_DATA_IO_PU_PD(134), - - PORT_DATA_IO_PU_PD(160), PORT_DATA_IO_PU_PD(161), - PORT_DATA_IO_PU_PD(162), PORT_DATA_IO_PU_PD(163), - PORT_DATA_IO_PU_PD(164), PORT_DATA_IO_PU_PD(165), - PORT_DATA_IO_PU_PD(166), PORT_DATA_IO_PU_PD(167), - PORT_DATA_IO_PU_PD(168), PORT_DATA_IO_PU_PD(169), - - PORT_DATA_IO_PU_PD(170), PORT_DATA_IO_PU_PD(171), - PORT_DATA_IO_PU_PD(172), PORT_DATA_IO_PU_PD(173), - PORT_DATA_IO_PU_PD(174), PORT_DATA_IO_PU_PD(175), - PORT_DATA_IO_PU_PD(176), PORT_DATA_IO_PU_PD(177), - PORT_DATA_IO_PU_PD(178), - - PORT_DATA_IO_PU_PD(192), PORT_DATA_IO_PU_PD(193), - PORT_DATA_IO_PU_PD(194), PORT_DATA_IO_PU_PD(195), - PORT_DATA_IO_PU_PD(196), PORT_DATA_IO_PU_PD(197), - PORT_DATA_IO_PU_PD(198), PORT_DATA_IO_PU_PD(199), - - PORT_DATA_IO_PU_PD(200), PORT_DATA_IO_PU_PD(201), - PORT_DATA_IO_PU_PD(202), PORT_DATA_IO_PU_PD(203), - PORT_DATA_IO_PU_PD(204), PORT_DATA_IO_PU_PD(205), - PORT_DATA_IO_PU_PD(206), PORT_DATA_IO_PU_PD(207), - PORT_DATA_IO_PU_PD(208), PORT_DATA_IO_PU_PD(209), - - PORT_DATA_IO_PU_PD(210), PORT_DATA_IO_PU_PD(211), - PORT_DATA_IO_PU_PD(212), PORT_DATA_IO_PU_PD(213), - PORT_DATA_IO_PU_PD(214), PORT_DATA_IO_PU_PD(215), - PORT_DATA_IO_PU_PD(216), PORT_DATA_IO_PU_PD(217), - PORT_DATA_IO_PU_PD(218), PORT_DATA_IO_PU_PD(219), - - PORT_DATA_IO_PU_PD(220), PORT_DATA_IO_PU_PD(221), - PORT_DATA_IO_PU_PD(222), PORT_DATA_IO_PU_PD(224), - PORT_DATA_IO_PU_PD(225), PORT_DATA_IO_PU_PD(226), - PORT_DATA_IO_PU_PD(227), PORT_DATA_IO_PU_PD(228), - PORT_DATA_IO_PU_PD(229), - - PORT_DATA_IO_PU_PD(230), PORT_DATA_IO_PU_PD(231), - PORT_DATA_IO_PU_PD(232), PORT_DATA_IO_PU_PD(233), - PORT_DATA_IO_PU_PD(234), PORT_DATA_IO_PU_PD(235), - PORT_DATA_IO_PU_PD(236), PORT_DATA_IO_PU_PD(237), - PORT_DATA_IO_PU_PD(238), PORT_DATA_IO_PU_PD(239), - - PORT_DATA_IO_PU_PD(240), PORT_DATA_IO_PU_PD(241), - PORT_DATA_IO_PU_PD(242), PORT_DATA_IO_PU_PD(243), - PORT_DATA_IO_PU_PD(244), PORT_DATA_IO_PU_PD(245), - PORT_DATA_IO_PU_PD(246), PORT_DATA_IO_PU_PD(247), - PORT_DATA_IO_PU_PD(248), PORT_DATA_IO_PU_PD(249), - - PORT_DATA_IO_PU_PD(250), - PORT_DATA_IO_PU_PD(256), PORT_DATA_IO_PU_PD(257), - PORT_DATA_IO_PU_PD(258), PORT_DATA_IO_PU_PD(259), - - PORT_DATA_IO_PU_PD(260), PORT_DATA_IO_PU_PD(261), - PORT_DATA_IO_PU_PD(262), PORT_DATA_IO_PU_PD(263), - PORT_DATA_IO_PU_PD(264), PORT_DATA_IO_PU_PD(265), - PORT_DATA_IO_PU_PD(266), PORT_DATA_IO_PU_PD(267), - PORT_DATA_IO_PU_PD(268), PORT_DATA_IO_PU_PD(269), - - PORT_DATA_IO_PU_PD(270), PORT_DATA_IO_PU_PD(271), - PORT_DATA_IO_PU_PD(272), PORT_DATA_IO_PU_PD(273), - PORT_DATA_IO_PU_PD(274), PORT_DATA_IO_PU_PD(275), - PORT_DATA_IO_PU_PD(276), PORT_DATA_IO_PU_PD(277), - PORT_DATA_IO_PU_PD(278), PORT_DATA_IO_PU_PD(279), - - PORT_DATA_IO_PU_PD(280), PORT_DATA_IO_PU_PD(281), - PORT_DATA_IO_PU_PD(282), PORT_DATA_IO_PU_PD(283), - PORT_DATA_O(288), PORT_DATA_IO_PU_PD(289), - - PORT_DATA_IO_PU_PD(290), PORT_DATA_IO_PU_PD(291), - PORT_DATA_IO_PU_PD(292), PORT_DATA_IO_PU_PD(293), - PORT_DATA_IO_PU_PD(294), PORT_DATA_IO_PU_PD(295), - PORT_DATA_IO_PU_PD(296), PORT_DATA_IO_PU_PD(297), - PORT_DATA_IO_PU_PD(298), PORT_DATA_IO_PU_PD(299), - - PORT_DATA_IO_PU_PD(300), PORT_DATA_IO_PU_PD(301), - PORT_DATA_IO_PU_PD(302), PORT_DATA_IO_PU_PD(303), - PORT_DATA_IO_PU_PD(304), PORT_DATA_IO_PU_PD(305), - PORT_DATA_IO_PU_PD(306), PORT_DATA_IO_PU_PD(307), - PORT_DATA_IO_PU_PD(308), - - PORT_DATA_IO_PU_PD(320), PORT_DATA_IO_PU_PD(321), - PORT_DATA_IO_PU_PD(322), PORT_DATA_IO_PU_PD(323), - PORT_DATA_IO_PU_PD(324), PORT_DATA_IO_PU_PD(325), - PORT_DATA_IO_PU_PD(326), PORT_DATA_IO_PU_PD(327), - PORT_DATA_IO_PU_PD(328), PORT_DATA_IO_PU_PD(329), + PINMUX_DATA_ALL(), /* Port0 */ PINMUX_DATA(LCDD0_MARK, PORT0_FN1), @@ -2042,8 +1888,18 @@ static const struct sh_pfc_function pinmux_functions[] = { SH_PFC_FUNCTION(scifb3), }; -static const struct pinmux_cfg_reg pinmux_config_regs[] = { +#undef PORTCR +#define PORTCR(nr, reg) \ + { \ + PINMUX_CFG_REG("PORT" nr "CR", reg, 8, 4) { \ + _PCRH(PORT##nr##_IN, 0, 0, PORT##nr##_OUT), \ + PORT##nr##_FN0, PORT##nr##_FN1, \ + PORT##nr##_FN2, PORT##nr##_FN3, \ + PORT##nr##_FN4, PORT##nr##_FN5, \ + PORT##nr##_FN6, PORT##nr##_FN7 } \ + } +static const struct pinmux_cfg_reg pinmux_config_regs[] = { PORTCR(0, 0xe6050000), PORTCR(1, 0xe6050001), PORTCR(2, 0xe6050002), @@ -2706,8 +2562,6 @@ const struct sh_pfc_soc_info r8a73a4_pinmux_info = { .ops = &r8a73a4_pinmux_ops, .input = { PINMUX_INPUT_BEGIN, PINMUX_INPUT_END }, - .input_pu = { PINMUX_INPUT_PULLUP_BEGIN, PINMUX_INPUT_PULLUP_END }, - .input_pd = { PINMUX_INPUT_PULLDOWN_BEGIN, PINMUX_INPUT_PULLDOWN_END }, .output = { PINMUX_OUTPUT_BEGIN, PINMUX_OUTPUT_END }, .function = { PINMUX_FUNCTION_BEGIN, PINMUX_FUNCTION_END }, -- GitLab From 53e42c2974feaa269bc485267d0a4df0ef55e549 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 21 Mar 2013 03:03:38 -0700 Subject: [PATCH 0381/3163] ARM: shmobile: add R-Car M1A Bock-W platform support Add basic Bock-W board support More devices will be added on top of this patch after PICNTRL and clock framework are in better shape. Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/boot/dts/Makefile | 1 + arch/arm/boot/dts/r8a7778-bockw.dts | 32 ++++++++++++++ arch/arm/configs/bockw_defconfig | 66 ++++++++++++++++++++++++++++ arch/arm/mach-shmobile/Kconfig | 6 +++ arch/arm/mach-shmobile/Makefile | 1 + arch/arm/mach-shmobile/board-bockw.c | 43 ++++++++++++++++++ 6 files changed, 149 insertions(+) create mode 100644 arch/arm/boot/dts/r8a7778-bockw.dts create mode 100644 arch/arm/configs/bockw_defconfig create mode 100644 arch/arm/mach-shmobile/board-bockw.c diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index ee9fbe4d9912..363fa5070310 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -136,6 +136,7 @@ dtb-$(CONFIG_ARCH_U8500) += snowball.dtb \ ccu9540.dtb dtb-$(CONFIG_ARCH_SHMOBILE) += emev2-kzm9d.dtb \ r8a7740-armadillo800eva.dtb \ + r8a7778-bockw.dtb \ r8a7779-marzen-reference.dtb \ sh73a0-kzm9g.dtb \ sh73a0-kzm9g-reference.dtb \ diff --git a/arch/arm/boot/dts/r8a7778-bockw.dts b/arch/arm/boot/dts/r8a7778-bockw.dts new file mode 100644 index 000000000000..735c964920f9 --- /dev/null +++ b/arch/arm/boot/dts/r8a7778-bockw.dts @@ -0,0 +1,32 @@ +/* + * Reference Device Tree Source for the Bock-W board + * + * Copyright (C) 2013 Renesas Solutions Corp. + * Copyright (C) 2013 Kuninori Morimoto + * + * based on r8a7779 + * + * Copyright (C) 2013 Renesas Solutions Corp. + * Copyright (C) 2013 Simon Horman + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +/dts-v1/; +/include/ "r8a7778.dtsi" + +/ { + model = "bockw"; + compatible = "renesas,bockw", "renesas,r8a7778"; + + chosen { + bootargs = "console=ttySC0,115200 ignore_loglevel"; + }; + + memory { + device_type = "memory"; + reg = <0x60000000 0x10000000>; + }; +}; diff --git a/arch/arm/configs/bockw_defconfig b/arch/arm/configs/bockw_defconfig new file mode 100644 index 000000000000..6037705b7b67 --- /dev/null +++ b/arch/arm/configs/bockw_defconfig @@ -0,0 +1,66 @@ +# CONFIG_ARM_PATCH_PHYS_VIRT is not set +CONFIG_KERNEL_LZMA=y +CONFIG_NO_HZ=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=16 +CONFIG_SYSCTL_SYSCALL=y +CONFIG_EMBEDDED=y +CONFIG_SLAB=y +# CONFIG_IOSCHED_CFQ is not set +CONFIG_ARCH_SHMOBILE=y +CONFIG_ARCH_R8A7778=y +CONFIG_MACH_BOCKW=y +CONFIG_MEMORY_START=0x60000000 +CONFIG_MEMORY_SIZE=0x10000000 +CONFIG_SHMOBILE_TIMER_HZ=1024 +# CONFIG_SH_TIMER_CMT is not set +# CONFIG_EM_TIMER_STI is not set +CONFIG_ARM_ERRATA_430973=y +CONFIG_ARM_ERRATA_458693=y +CONFIG_ARM_ERRATA_460075=y +CONFIG_ARM_ERRATA_743622=y +CONFIG_ARM_ERRATA_754322=y +CONFIG_AEABI=y +# CONFIG_OABI_COMPAT is not set +CONFIG_HIGHMEM=y +CONFIG_ZBOOT_ROM_TEXT=0x0 +CONFIG_ZBOOT_ROM_BSS=0x0 +CONFIG_ARM_APPENDED_DTB=y +CONFIG_CMDLINE="console=ttySC0,115200 ignore_loglevel" +CONFIG_CMDLINE_FORCE=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_SUSPEND is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FW_LOADER is not set +# CONFIG_INPUT is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_SH_SCI=y +CONFIG_SERIAL_SH_SCI_NR_UARTS=6 +CONFIG_SERIAL_SH_SCI_CONSOLE=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_UIO=y +CONFIG_UIO_PDRV_GENIRQ=y +# CONFIG_IOMMU_SUPPORT is not set +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY_USER is not set +CONFIG_TMPFS=y +# CONFIG_MISC_FILESYSTEMS is not set +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +# CONFIG_SCHED_DEBUG is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +# CONFIG_FTRACE is not set +# CONFIG_ARM_UNWIND is not set +CONFIG_KEYS=y +CONFIG_CRYPTO=y +CONFIG_AVERAGE=y diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index 3933a315adf2..18712ad7ccc4 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -124,6 +124,12 @@ config MACH_ARMADILLO800EVA select SND_SOC_WM8978 if SND_SIMPLE_CARD select USE_OF +config MACH_BOCKW + bool "BOCK-W platform" + depends on ARCH_R8A7778 + select ARCH_REQUIRE_GPIOLIB + select USE_OF + config MACH_MARZEN bool "MARZEN board" depends on ARCH_R8A7779 diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile index 0c9a2901370e..5eada967ec6b 100644 --- a/arch/arm/mach-shmobile/Makefile +++ b/arch/arm/mach-shmobile/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_MACH_AG5EVM) += board-ag5evm.o obj-$(CONFIG_MACH_MACKEREL) += board-mackerel.o obj-$(CONFIG_MACH_KOTA2) += board-kota2.o obj-$(CONFIG_MACH_BONITO) += board-bonito.o +obj-$(CONFIG_MACH_BOCKW) += board-bockw.o obj-$(CONFIG_MACH_MARZEN) += board-marzen.o obj-$(CONFIG_MACH_MARZEN_REFERENCE) += board-marzen-reference.o obj-$(CONFIG_MACH_ARMADILLO800EVA) += board-armadillo800eva.o diff --git a/arch/arm/mach-shmobile/board-bockw.c b/arch/arm/mach-shmobile/board-bockw.c new file mode 100644 index 000000000000..56ab56ed5f59 --- /dev/null +++ b/arch/arm/mach-shmobile/board-bockw.c @@ -0,0 +1,43 @@ +/* + * Bock-W board support + * + * Copyright (C) 2013 Renesas Solutions Corp. + * Copyright (C) 2013 Kuninori Morimoto + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include + +static void __init bockw_init(void) +{ + r8a7778_clock_init(); + r8a7778_add_standard_devices(); +} + +static const char *bockw_boards_compat_dt[] __initdata = { + "renesas,bockw", + NULL, +}; + +DT_MACHINE_START(BOCKW_DT, "bockw") + .init_early = r8a7778_init_delay, + .init_irq = r8a7778_init_irq_dt, + .init_machine = bockw_init, + .init_time = shmobile_timer_init, + .dt_compat = bockw_boards_compat_dt, +MACHINE_END -- GitLab From 8c1de8ee030e929f96d38415da7551e7fdd847b6 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 21 Mar 2013 17:05:44 +0100 Subject: [PATCH 0382/3163] ARM: shmobile: kzm9g-reference: add ethernet support Add a DT node for the SMSC 9221 ethernet chip, found on kzm9g, to its reference implementation. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Simon Horman --- arch/arm/boot/dts/sh73a0-kzm9g-reference.dts | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm/boot/dts/sh73a0-kzm9g-reference.dts b/arch/arm/boot/dts/sh73a0-kzm9g-reference.dts index f33b5ccb7446..5972abb55f9c 100644 --- a/arch/arm/boot/dts/sh73a0-kzm9g-reference.dts +++ b/arch/arm/boot/dts/sh73a0-kzm9g-reference.dts @@ -44,6 +44,19 @@ regulator-always-on; regulator-boot-on; }; + + lan9220@10000000 { + compatible = "smsc,lan9220", "smsc,lan9115"; + reg = <0x10000000 0x100>; + phy-mode = "mii"; + interrupt-parent = <&irqpin0>; + interrupts = <3 0>; /* active low */ + reg-io-width = <4>; + smsc,irq-push-pull; + smsc,save-mac-address; + vddvario-supply = <®_1p8v>; + vdd33a-supply = <®_3p3v>; + }; }; &mmcif { -- GitLab From 78ded16886f539830ed58d9bc043656c1785a082 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 29 Mar 2013 17:00:02 +0900 Subject: [PATCH 0383/3163] ARM: shmobile: APE6EVM base support V3 of APE6EVM base board support making use of 1 GiB of memory, the SCIFA0 serial port and ARM architected timer. Signed-off-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/boot/dts/Makefile | 1 + arch/arm/boot/dts/r8a73a4-ape6evm.dts | 31 +++++++++++++++++ arch/arm/mach-shmobile/Kconfig | 8 ++++- arch/arm/mach-shmobile/Makefile | 1 + arch/arm/mach-shmobile/board-ape6evm.c | 46 ++++++++++++++++++++++++++ 5 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 arch/arm/boot/dts/r8a73a4-ape6evm.dts create mode 100644 arch/arm/mach-shmobile/board-ape6evm.c diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 363fa5070310..a99b0f7baa72 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -140,6 +140,7 @@ dtb-$(CONFIG_ARCH_SHMOBILE) += emev2-kzm9d.dtb \ r8a7779-marzen-reference.dtb \ sh73a0-kzm9g.dtb \ sh73a0-kzm9g-reference.dtb \ + r8a73a4-ape6evm.dtb \ sh7372-mackerel.dtb dtb-$(CONFIG_ARCH_SOCFPGA) += socfpga_cyclone5.dtb \ socfpga_vt.dtb diff --git a/arch/arm/boot/dts/r8a73a4-ape6evm.dts b/arch/arm/boot/dts/r8a73a4-ape6evm.dts new file mode 100644 index 000000000000..833f703f9be5 --- /dev/null +++ b/arch/arm/boot/dts/r8a73a4-ape6evm.dts @@ -0,0 +1,31 @@ +/* + * Device Tree Source for the APE6EVM board + * + * Copyright (C) 2013 Renesas Solutions Corp. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +/dts-v1/; +/include/ "r8a73a4.dtsi" + +/ { + model = "APE6EVM"; + compatible = "renesas,ape6evm", "renesas,r8a73a4"; + + chosen { + bootargs = "console=ttySC0,115200 ignore_loglevel"; + }; + + memory@40000000 { + device_type = "memory"; + reg = <0 0x40000000 0 0x40000000>; + }; + + lbsc { + #address-cells = <1>; + #size-cells = <1>; + }; +}; diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index 18712ad7ccc4..c0da6afa8a51 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -96,6 +96,11 @@ config MACH_AG5EVM select REGULATOR_FIXED_VOLTAGE if REGULATOR select SH_LCD_MIPI_DSI +config MACH_APE6EVM + bool "APE6EVM board" + depends on ARCH_R8A73A4 + select USE_OF + config MACH_MACKEREL bool "mackerel board" depends on ARCH_SH7372 @@ -189,7 +194,7 @@ config MEMORY_START hex "Physical memory start address" default "0x40000000" if MACH_AP4EVB || MACH_AG5EVM || \ MACH_MACKEREL || MACH_BONITO || \ - MACH_ARMADILLO800EVA + MACH_ARMADILLO800EVA || MACH_APE6EVM default "0x41000000" if MACH_KOTA2 default "0x00000000" ---help--- @@ -199,6 +204,7 @@ config MEMORY_START config MEMORY_SIZE hex "Physical memory size" + default "0x40000000" if MACH_APE6EVM default "0x20000000" if MACH_AG5EVM || MACH_BONITO || \ MACH_ARMADILLO800EVA default "0x1e000000" if MACH_KOTA2 diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile index 5eada967ec6b..ec2524e43a47 100644 --- a/arch/arm/mach-shmobile/Makefile +++ b/arch/arm/mach-shmobile/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_ARCH_SH73A0) += pm-sh73a0.o # Board objects obj-$(CONFIG_MACH_AP4EVB) += board-ap4evb.o obj-$(CONFIG_MACH_AG5EVM) += board-ag5evm.o +obj-$(CONFIG_MACH_APE6EVM) += board-ape6evm.o obj-$(CONFIG_MACH_MACKEREL) += board-mackerel.o obj-$(CONFIG_MACH_KOTA2) += board-kota2.o obj-$(CONFIG_MACH_BONITO) += board-bonito.o diff --git a/arch/arm/mach-shmobile/board-ape6evm.c b/arch/arm/mach-shmobile/board-ape6evm.c new file mode 100644 index 000000000000..9234d4f567ba --- /dev/null +++ b/arch/arm/mach-shmobile/board-ape6evm.c @@ -0,0 +1,46 @@ +/* + * APE6EVM board support + * + * Copyright (C) 2013 Renesas Solutions Corp. + * Copyright (C) 2013 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static void __init ape6evm_add_standard_devices(void) +{ + r8a73a4_clock_init(); + r8a73a4_add_standard_devices(); +} + +static const char *ape6evm_boards_compat_dt[] __initdata = { + "renesas,ape6evm", + NULL, +}; + +DT_MACHINE_START(APE6EVM_DT, "ape6evm") + .init_irq = irqchip_init, + .init_time = shmobile_timer_init, + .init_machine = ape6evm_add_standard_devices, + .dt_compat = ape6evm_boards_compat_dt, +MACHINE_END -- GitLab From 2c56055c46502f24097de93aabc8a4c00e12dab4 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 29 Mar 2013 17:00:12 +0900 Subject: [PATCH 0384/3163] ARM: shmobile: APE6EVM PFC support Start using the r8a73a4 PFC on the APE6EVM board and configure the SCIFA0 console signals in the PFC via PINCTRL. Signed-off-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/board-ape6evm.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm/mach-shmobile/board-ape6evm.c b/arch/arm/mach-shmobile/board-ape6evm.c index 9234d4f567ba..33a8a62437f1 100644 --- a/arch/arm/mach-shmobile/board-ape6evm.c +++ b/arch/arm/mach-shmobile/board-ape6evm.c @@ -21,15 +21,25 @@ #include #include #include +#include #include #include #include #include #include +static const struct pinctrl_map ape6evm_pinctrl_map[] = { + /* SCIFA0 console */ + PIN_MAP_MUX_GROUP_DEFAULT("sh-sci.0", "pfc-r8a73a4", + "scifa0_data", "scifa0"), +}; + static void __init ape6evm_add_standard_devices(void) { r8a73a4_clock_init(); + pinctrl_register_mappings(ape6evm_pinctrl_map, + ARRAY_SIZE(ape6evm_pinctrl_map)); + r8a73a4_pinmux_init(); r8a73a4_add_standard_devices(); } -- GitLab From 15351a7a22c207b2e9ce83b01782a2324006014a Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 29 Mar 2013 17:00:21 +0900 Subject: [PATCH 0385/3163] ARM: shmobile: APE6EVM LAN9220 support Add LAN9220 support to the APE6EVM board using C and DT. At this point the PFC driver lacks DT bindings so to configure the PFC we use PINCTRL in C board code. Signed-off-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/boot/dts/r8a73a4-ape6evm.dts | 23 +++++++++++++++- arch/arm/mach-shmobile/board-ape6evm.c | 38 ++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/r8a73a4-ape6evm.dts b/arch/arm/boot/dts/r8a73a4-ape6evm.dts index 833f703f9be5..f603c6946c29 100644 --- a/arch/arm/boot/dts/r8a73a4-ape6evm.dts +++ b/arch/arm/boot/dts/r8a73a4-ape6evm.dts @@ -16,7 +16,7 @@ compatible = "renesas,ape6evm", "renesas,r8a73a4"; chosen { - bootargs = "console=ttySC0,115200 ignore_loglevel"; + bootargs = "console=ttySC0,115200 ignore_loglevel root=/dev/nfs ip=dhcp"; }; memory@40000000 { @@ -24,8 +24,29 @@ reg = <0 0x40000000 0 0x40000000>; }; + ape6evm_fixed_3v3: fixedregulator@0 { + compatible = "regulator-fixed"; + regulator-name = "3V3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + lbsc { #address-cells = <1>; #size-cells = <1>; + + ethernet@8000000 { + compatible = "smsc,lan9118", "smsc,lan9115"; + reg = <0x08000000 0x1000>; + interrupt-parent = <&irqc1>; + interrupts = <8 0x4>; + phy-mode = "mii"; + reg-io-width = <4>; + smsc,irq-active-high; + smsc,irq-push-pull; + vdd33a-supply = <&ape6evm_fixed_3v3>; + vddvario-supply = <&ape6evm_fixed_3v3>; + }; }; }; diff --git a/arch/arm/mach-shmobile/board-ape6evm.c b/arch/arm/mach-shmobile/board-ape6evm.c index 33a8a62437f1..55b8c9fef954 100644 --- a/arch/arm/mach-shmobile/board-ape6evm.c +++ b/arch/arm/mach-shmobile/board-ape6evm.c @@ -18,20 +18,49 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include #include #include #include #include #include +#include +#include +#include #include +#include #include #include #include +/* Dummy supplies, where voltage doesn't matter */ +static struct regulator_consumer_supply dummy_supplies[] = { + REGULATOR_SUPPLY("vddvario", "smsc911x"), + REGULATOR_SUPPLY("vdd33a", "smsc911x"), +}; + +/* SMSC LAN9220 */ +static const struct resource lan9220_res[] = { + DEFINE_RES_MEM(0x08000000, 0x1000), + { + .start = irq_pin(40), /* IRQ40 */ + .flags = IORESOURCE_IRQ | IRQF_TRIGGER_HIGH, + }, +}; + +static const struct smsc911x_platform_config lan9220_data = { + .flags = SMSC911X_USE_32BIT, + .irq_type = SMSC911X_IRQ_TYPE_PUSH_PULL, + .irq_polarity = SMSC911X_IRQ_POLARITY_ACTIVE_HIGH, +}; + static const struct pinctrl_map ape6evm_pinctrl_map[] = { /* SCIFA0 console */ PIN_MAP_MUX_GROUP_DEFAULT("sh-sci.0", "pfc-r8a73a4", "scifa0_data", "scifa0"), + /* SMSC */ + PIN_MAP_MUX_GROUP_DEFAULT("smsc911x", "pfc-r8a73a4", + "irqc_irq40", "irqc"), }; static void __init ape6evm_add_standard_devices(void) @@ -41,6 +70,15 @@ static void __init ape6evm_add_standard_devices(void) ARRAY_SIZE(ape6evm_pinctrl_map)); r8a73a4_pinmux_init(); r8a73a4_add_standard_devices(); + + /* LAN9220 ethernet */ + gpio_request_one(270, GPIOF_OUT_INIT_HIGH, NULL); /* smsc9220 RESET */ + + regulator_register_fixed(0, dummy_supplies, ARRAY_SIZE(dummy_supplies)); + + platform_device_register_resndata(&platform_bus, "smsc911x", -1, + lan9220_res, ARRAY_SIZE(lan9220_res), + &lan9220_data, sizeof(lan9220_data)); } static const char *ape6evm_boards_compat_dt[] __initdata = { -- GitLab From 3cc828fdb32281cc8166d3a40bee32b90ce3cad8 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 29 Mar 2013 17:49:37 +0900 Subject: [PATCH 0386/3163] ARM: shmobile: Initial r8a7790 Lager board support Lager base board support making use of 2 GiB of memory, the r8a7790 SoC with the SCIF0 serial port and CA15 with ARM architected timer. Signed-off-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/boot/dts/Makefile | 1 + arch/arm/boot/dts/r8a7790-lager.dts | 31 +++++++++++++++++++ arch/arm/mach-shmobile/Kconfig | 9 +++++- arch/arm/mach-shmobile/Makefile | 1 + arch/arm/mach-shmobile/board-lager.c | 46 ++++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 arch/arm/boot/dts/r8a7790-lager.dts create mode 100644 arch/arm/mach-shmobile/board-lager.c diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index a99b0f7baa72..47b6be7cf1e8 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -138,6 +138,7 @@ dtb-$(CONFIG_ARCH_SHMOBILE) += emev2-kzm9d.dtb \ r8a7740-armadillo800eva.dtb \ r8a7778-bockw.dtb \ r8a7779-marzen-reference.dtb \ + r8a7790-lager.dtb \ sh73a0-kzm9g.dtb \ sh73a0-kzm9g-reference.dtb \ r8a73a4-ape6evm.dtb \ diff --git a/arch/arm/boot/dts/r8a7790-lager.dts b/arch/arm/boot/dts/r8a7790-lager.dts new file mode 100644 index 000000000000..09a84fce89d6 --- /dev/null +++ b/arch/arm/boot/dts/r8a7790-lager.dts @@ -0,0 +1,31 @@ +/* + * Device Tree Source for the Lager board + * + * Copyright (C) 2013 Renesas Solutions Corp. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +/dts-v1/; +/include/ "r8a7790.dtsi" + +/ { + model = "Lager"; + compatible = "renesas,lager", "renesas,r8a7790"; + + chosen { + bootargs = "console=ttySC6,115200 ignore_loglevel"; + }; + + memory@40000000 { + device_type = "memory"; + reg = <0 0x40000000 0 0x80000000>; + }; + + lbsc { + #address-cells = <1>; + #size-cells = <1>; + }; +}; diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index c0da6afa8a51..549e05985745 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -154,6 +154,11 @@ config MACH_MARZEN_REFERENCE This is intended to aid developers +config MACH_LAGER + bool "Lager board" + depends on ARCH_R8A7790 + select USE_OF + config MACH_KZM9D bool "KZM9D board" depends on ARCH_EMEV2 @@ -194,7 +199,8 @@ config MEMORY_START hex "Physical memory start address" default "0x40000000" if MACH_AP4EVB || MACH_AG5EVM || \ MACH_MACKEREL || MACH_BONITO || \ - MACH_ARMADILLO800EVA || MACH_APE6EVM + MACH_ARMADILLO800EVA || MACH_APE6EVM || \ + MACH_LAGER default "0x41000000" if MACH_KOTA2 default "0x00000000" ---help--- @@ -204,6 +210,7 @@ config MEMORY_START config MEMORY_SIZE hex "Physical memory size" + default "0x80000000" if MACH_LAGER default "0x40000000" if MACH_APE6EVM default "0x20000000" if MACH_AG5EVM || MACH_BONITO || \ MACH_ARMADILLO800EVA diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile index ec2524e43a47..068f1dadc46b 100644 --- a/arch/arm/mach-shmobile/Makefile +++ b/arch/arm/mach-shmobile/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_MACH_BONITO) += board-bonito.o obj-$(CONFIG_MACH_BOCKW) += board-bockw.o obj-$(CONFIG_MACH_MARZEN) += board-marzen.o obj-$(CONFIG_MACH_MARZEN_REFERENCE) += board-marzen-reference.o +obj-$(CONFIG_MACH_LAGER) += board-lager.o obj-$(CONFIG_MACH_ARMADILLO800EVA) += board-armadillo800eva.o obj-$(CONFIG_MACH_KZM9D) += board-kzm9d.o obj-$(CONFIG_MACH_KZM9G) += board-kzm9g.o diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c new file mode 100644 index 000000000000..ea6f9e5d3500 --- /dev/null +++ b/arch/arm/mach-shmobile/board-lager.c @@ -0,0 +1,46 @@ +/* + * Lager board support + * + * Copyright (C) 2013 Renesas Solutions Corp. + * Copyright (C) 2013 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static void __init lager_add_standard_devices(void) +{ + r8a7790_clock_init(); + r8a7790_add_standard_devices(); +} + +static const char *lager_boards_compat_dt[] __initdata = { + "renesas,lager", + NULL, +}; + +DT_MACHINE_START(LAGER_DT, "lager") + .init_irq = irqchip_init, + .init_time = shmobile_timer_init, + .init_machine = lager_add_standard_devices, + .dt_compat = lager_boards_compat_dt, +MACHINE_END -- GitLab From fb7a89c4afa43240af705db44e3e41233016adf9 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 29 Mar 2013 22:05:48 +0100 Subject: [PATCH 0387/3163] ARM: shmobile: mackerel: SDHI resources do not have to be numbered The SDHI driver doesn't care about platform resource order, explicit resource numbering is redundant. Signed-off-by: Guennadi Liakhovetski Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/board-mackerel.c | 33 +++++++++---------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index 1a9c75383ca0..c4f80d4e4026 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -982,21 +982,18 @@ static struct sh_mobile_sdhi_info sdhi0_info = { }; static struct resource sdhi0_resources[] = { - [0] = { + { .name = "SDHI0", .start = 0xe6850000, .end = 0xe68500ff, .flags = IORESOURCE_MEM, - }, - [1] = { + }, { .start = evt2irq(0x0e00) /* SDHI0_SDHI0I0 */, .flags = IORESOURCE_IRQ, - }, - [2] = { + }, { .start = evt2irq(0x0e20) /* SDHI0_SDHI0I1 */, .flags = IORESOURCE_IRQ, - }, - [3] = { + }, { .start = evt2irq(0x0e40) /* SDHI0_SDHI0I2 */, .flags = IORESOURCE_IRQ, }, @@ -1025,23 +1022,20 @@ static struct sh_mobile_sdhi_info sdhi1_info = { }; static struct resource sdhi1_resources[] = { - [0] = { + { .name = "SDHI1", .start = 0xe6860000, .end = 0xe68600ff, .flags = IORESOURCE_MEM, - }, - [1] = { + }, { .name = SH_MOBILE_SDHI_IRQ_CARD_DETECT, .start = evt2irq(0x0e80), /* SDHI1_SDHI1I0 */ .flags = IORESOURCE_IRQ, - }, - [2] = { + }, { .name = SH_MOBILE_SDHI_IRQ_SDCARD, .start = evt2irq(0x0ea0), /* SDHI1_SDHI1I1 */ .flags = IORESOURCE_IRQ, - }, - [3] = { + }, { .name = SH_MOBILE_SDHI_IRQ_SDIO, .start = evt2irq(0x0ec0), /* SDHI1_SDHI1I2 */ .flags = IORESOURCE_IRQ, @@ -1079,23 +1073,20 @@ static struct sh_mobile_sdhi_info sdhi2_info = { }; static struct resource sdhi2_resources[] = { - [0] = { + { .name = "SDHI2", .start = 0xe6870000, .end = 0xe68700ff, .flags = IORESOURCE_MEM, - }, - [1] = { + }, { .name = SH_MOBILE_SDHI_IRQ_CARD_DETECT, .start = evt2irq(0x1200), /* SDHI2_SDHI2I0 */ .flags = IORESOURCE_IRQ, - }, - [2] = { + }, { .name = SH_MOBILE_SDHI_IRQ_SDCARD, .start = evt2irq(0x1220), /* SDHI2_SDHI2I1 */ .flags = IORESOURCE_IRQ, - }, - [3] = { + }, { .name = SH_MOBILE_SDHI_IRQ_SDIO, .start = evt2irq(0x1240), /* SDHI2_SDHI2I2 */ .flags = IORESOURCE_IRQ, -- GitLab From 3f9efeecb4876661b952746c8bbe460682dce1ad Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 29 Mar 2013 22:05:49 +0100 Subject: [PATCH 0388/3163] ARM: shmobile: mackerel: remove OCR masks, where regulators are used Both SDHI and MMCIF drivers ignore their OCR platform values, when available voltages can be retrieved from regulators. Signed-off-by: Guennadi Liakhovetski Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/board-mackerel.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index c4f80d4e4026..734565d4b0f0 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -1014,7 +1014,6 @@ static struct platform_device sdhi0_device = { static struct sh_mobile_sdhi_info sdhi1_info = { .dma_slave_tx = SHDMA_SLAVE_SDHI1_TX, .dma_slave_rx = SHDMA_SLAVE_SDHI1_RX, - .tmio_ocr_mask = MMC_VDD_165_195, .tmio_flags = TMIO_MMC_WRPROTECT_DISABLE, .tmio_caps = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ | MMC_CAP_NEEDS_POLL, @@ -1125,7 +1124,6 @@ static struct resource sh_mmcif_resources[] = { static struct sh_mmcif_plat_data sh_mmcif_plat = { .sup_pclk = 0, - .ocr = MMC_VDD_165_195 | MMC_VDD_32_33 | MMC_VDD_33_34, .caps = MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA | MMC_CAP_NEEDS_POLL, -- GitLab From fe0a14417bc95492621b45a3cf088ce53ee05c36 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 29 Mar 2013 22:05:50 +0100 Subject: [PATCH 0389/3163] ARM: shmobile: mackerel: switch SDHI and MMCIF interfaces to slot-gpio Both SDHI and MMCIF drivers can use the standard slot-gpio card-detection functions. Switch mackerel to using them instead of platform callbacks. Signed-off-by: Guennadi Liakhovetski Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/board-mackerel.c | 39 ++++++++----------------- 1 file changed, 12 insertions(+), 27 deletions(-) diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index 734565d4b0f0..a47a11160693 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -963,15 +963,6 @@ static struct platform_device nand_flash_device = { }, }; -/* - * The card detect pin of the top SD/MMC slot (CN7) is active low and is - * connected to GPIO A22 of SH7372 (GPIO 41). - */ -static int slot_cn7_get_cd(struct platform_device *pdev) -{ - return !gpio_get_value(41); -} - /* SDHI0 */ static struct sh_mobile_sdhi_info sdhi0_info = { .dma_slave_tx = SHDMA_SLAVE_SDHI0_TX, @@ -1011,13 +1002,15 @@ static struct platform_device sdhi0_device = { #if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE) /* SDHI1 */ + +/* GPIO 41 can trigger IRQ8, but it is used by USBHS1, we have to poll */ static struct sh_mobile_sdhi_info sdhi1_info = { .dma_slave_tx = SHDMA_SLAVE_SDHI1_TX, .dma_slave_rx = SHDMA_SLAVE_SDHI1_RX, - .tmio_flags = TMIO_MMC_WRPROTECT_DISABLE, + .tmio_flags = TMIO_MMC_WRPROTECT_DISABLE | TMIO_MMC_USE_GPIO_CD, .tmio_caps = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ | MMC_CAP_NEEDS_POLL, - .get_cd = slot_cn7_get_cd, + .cd_gpio = 41, }; static struct resource sdhi1_resources[] = { @@ -1052,23 +1045,19 @@ static struct platform_device sdhi1_device = { }; #endif +/* SDHI2 */ + /* * The card detect pin of the top SD/MMC slot (CN23) is active low and is - * connected to GPIO SCIFB_SCK of SH7372 (162). + * connected to GPIO SCIFB_SCK of SH7372 (GPIO 162). */ -static int slot_cn23_get_cd(struct platform_device *pdev) -{ - return !gpio_get_value(162); -} - -/* SDHI2 */ static struct sh_mobile_sdhi_info sdhi2_info = { .dma_slave_tx = SHDMA_SLAVE_SDHI2_TX, .dma_slave_rx = SHDMA_SLAVE_SDHI2_RX, - .tmio_flags = TMIO_MMC_WRPROTECT_DISABLE, + .tmio_flags = TMIO_MMC_WRPROTECT_DISABLE | TMIO_MMC_USE_GPIO_CD, .tmio_caps = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ | MMC_CAP_NEEDS_POLL, - .get_cd = slot_cn23_get_cd, + .cd_gpio = 162, }; static struct resource sdhi2_resources[] = { @@ -1127,7 +1116,9 @@ static struct sh_mmcif_plat_data sh_mmcif_plat = { .caps = MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA | MMC_CAP_NEEDS_POLL, - .get_cd = slot_cn7_get_cd, + .use_cd_gpio = true, + /* card detect pin for SD/MMC slot (CN7) */ + .cd_gpio = 41, .slave_id_tx = SHDMA_SLAVE_MMCIF_TX, .slave_id_rx = SHDMA_SLAVE_MMCIF_RX, }; @@ -1475,12 +1466,6 @@ static void __init mackerel_init(void) /* SDHI0 PORT172 card-detect IRQ26 */ gpio_request(GPIO_FN_IRQ26_172, NULL); - /* card detect pin for MMC slot (CN7) */ - gpio_request_one(41, GPIOF_IN, NULL); - - /* card detect pin for microSD slot (CN23) */ - gpio_request_one(162, GPIOF_IN, NULL); - /* FLCTL */ gpio_request(GPIO_FN_D0_NAF0, NULL); gpio_request(GPIO_FN_D1_NAF1, NULL); -- GitLab From 757688c8b51073450b77940dcb94f33bf4c3de88 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 29 Mar 2013 22:05:51 +0100 Subject: [PATCH 0390/3163] ARM: shmobile: mackerel: add interrupt names for SDHI0 To unify with SDHI1 also use named IRQs for SDHI0. This also clarifies which specific IRQs are used. Signed-off-by: Guennadi Liakhovetski Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/board-mackerel.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index a47a11160693..b976ad682016 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -979,12 +979,11 @@ static struct resource sdhi0_resources[] = { .end = 0xe68500ff, .flags = IORESOURCE_MEM, }, { - .start = evt2irq(0x0e00) /* SDHI0_SDHI0I0 */, - .flags = IORESOURCE_IRQ, - }, { + .name = SH_MOBILE_SDHI_IRQ_SDCARD, .start = evt2irq(0x0e20) /* SDHI0_SDHI0I1 */, .flags = IORESOURCE_IRQ, }, { + .name = SH_MOBILE_SDHI_IRQ_SDIO, .start = evt2irq(0x0e40) /* SDHI0_SDHI0I2 */, .flags = IORESOURCE_IRQ, }, @@ -1019,10 +1018,6 @@ static struct resource sdhi1_resources[] = { .start = 0xe6860000, .end = 0xe68600ff, .flags = IORESOURCE_MEM, - }, { - .name = SH_MOBILE_SDHI_IRQ_CARD_DETECT, - .start = evt2irq(0x0e80), /* SDHI1_SDHI1I0 */ - .flags = IORESOURCE_IRQ, }, { .name = SH_MOBILE_SDHI_IRQ_SDCARD, .start = evt2irq(0x0ea0), /* SDHI1_SDHI1I1 */ -- GitLab From 00ae962f8051bb143fa1c43c6a9c8274c3a9c593 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 29 Mar 2013 22:05:52 +0100 Subject: [PATCH 0391/3163] ARM: shmobile: mackerel: clean up MMCIF vs. SDHI1 selection MMCIF and SDHI1 share the same slot on mackerel. One of them is selected by a jumper, which cannot be queried from software. Currently in software one of the two interfaces is selected, depending whether or not the MMCIF driver is enabled. This is not optimal, since the kernel has to be rebuilt to switch from one interface to another. Still, so far there isn't a better option. At least make this selection consistent. Signed-off-by: Guennadi Liakhovetski Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/board-mackerel.c | 34 ++++++++++++------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index b976ad682016..5b452e1e7d8a 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -999,7 +999,7 @@ static struct platform_device sdhi0_device = { }, }; -#if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE) +#if !IS_ENABLED(CONFIG_MMC_SH_MMCIF) /* SDHI1 */ /* GPIO 41 can trigger IRQ8, but it is used by USBHS1, we have to poll */ @@ -1061,10 +1061,6 @@ static struct resource sdhi2_resources[] = { .start = 0xe6870000, .end = 0xe68700ff, .flags = IORESOURCE_MEM, - }, { - .name = SH_MOBILE_SDHI_IRQ_CARD_DETECT, - .start = evt2irq(0x1200), /* SDHI2_SDHI2I0 */ - .flags = IORESOURCE_IRQ, }, { .name = SH_MOBILE_SDHI_IRQ_SDCARD, .start = evt2irq(0x1220), /* SDHI2_SDHI2I1 */ @@ -1087,6 +1083,7 @@ static struct platform_device sdhi2_device = { }; /* SH_MMCIF */ +#if IS_ENABLED(CONFIG_MMC_SH_MMCIF) static struct resource sh_mmcif_resources[] = { [0] = { .name = "MMCIF", @@ -1118,7 +1115,7 @@ static struct sh_mmcif_plat_data sh_mmcif_plat = { .slave_id_rx = SHDMA_SLAVE_MMCIF_RX, }; -static struct platform_device sh_mmcif_device __maybe_unused = { +static struct platform_device sh_mmcif_device = { .name = "sh_mmcif", .id = 0, .dev = { @@ -1129,7 +1126,7 @@ static struct platform_device sh_mmcif_device __maybe_unused = { .num_resources = ARRAY_SIZE(sh_mmcif_resources), .resource = sh_mmcif_resources, }; - +#endif static int mackerel_camera_add(struct soc_camera_device *icd); static void mackerel_camera_del(struct soc_camera_device *icd); @@ -1236,11 +1233,12 @@ static struct platform_device *mackerel_devices[] __initdata = { &fsi_hdmi_device, &nand_flash_device, &sdhi0_device, -#if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE) +#if !IS_ENABLED(CONFIG_MMC_SH_MMCIF) &sdhi1_device, +#else + &sh_mmcif_device, #endif &sdhi2_device, - &sh_mmcif_device, &ceu_device, &mackerel_camera, &hdmi_device, @@ -1305,11 +1303,6 @@ static struct i2c_board_info i2c1_devices[] = { }; static const struct pinctrl_map mackerel_pinctrl_map[] = { - /* MMCIF */ - PIN_MAP_MUX_GROUP_DEFAULT("sh_mmcif.0", "pfc-sh7372", - "mmc0_data8_0", "mmc0"), - PIN_MAP_MUX_GROUP_DEFAULT("sh_mmcif.0", "pfc-sh7372", - "mmc0_ctrl_0", "mmc0"), /* SDHI0 */ PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.0", "pfc-sh7372", "sdhi0_data4", "sdhi0"), @@ -1318,11 +1311,17 @@ static const struct pinctrl_map mackerel_pinctrl_map[] = { PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.0", "pfc-sh7372", "sdhi0_wp", "sdhi0"), /* SDHI1 */ -#if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE) +#if !IS_ENABLED(CONFIG_MMC_SH_MMCIF) PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.1", "pfc-sh7372", "sdhi1_data4", "sdhi1"), PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.1", "pfc-sh7372", "sdhi1_ctrl", "sdhi1"), +#else + /* MMCIF */ + PIN_MAP_MUX_GROUP_DEFAULT("sh_mmcif.0", "pfc-sh7372", + "mmc0_data8_0", "mmc0"), + PIN_MAP_MUX_GROUP_DEFAULT("sh_mmcif.0", "pfc-sh7372", + "mmc0_ctrl_0", "mmc0"), #endif /* SDHI2 */ PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.2", "pfc-sh7372", @@ -1347,10 +1346,11 @@ static void __init mackerel_init(void) { "A3SP", &usbhs0_device, }, { "A3SP", &usbhs1_device, }, { "A3SP", &nand_flash_device, }, - { "A3SP", &sh_mmcif_device, }, { "A3SP", &sdhi0_device, }, -#if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE) +#if !IS_ENABLED(CONFIG_MMC_SH_MMCIF) { "A3SP", &sdhi1_device, }, +#else + { "A3SP", &sh_mmcif_device, }, #endif { "A3SP", &sdhi2_device, }, { "A4R", &ceu_device, }, -- GitLab From 5c773ba33a29c0dcddac7cfaa39fc63a7137130d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 31 Mar 2013 12:30:04 +0900 Subject: [PATCH 0392/3163] f2fs: do not use duplicate names in a macro A macro should not use duplicate parameter names. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index e399bd4d3af8..c0d774076ab9 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -23,13 +23,13 @@ ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \ (t == CURSEG_WARM_NODE)) -#define IS_CURSEG(sbi, segno) \ - ((segno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ - (segno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ - (segno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ - (segno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ - (segno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ - (segno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) +#define IS_CURSEG(sbi, seg) \ + ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) #define IS_CURSEC(sbi, secno) \ ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ -- GitLab From 53cf95222fad7a962cc03fb61a33e37bcf4f5c9d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 31 Mar 2013 12:39:49 +0900 Subject: [PATCH 0393/3163] f2fs: introduce TOTAL_SECS macro Let's use a macro to get the total number of sections. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 7 +++---- fs/f2fs/segment.c | 19 +++++++++---------- fs/f2fs/segment.h | 1 + 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 025b9e2f935d..20b8794ec8f6 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -106,7 +106,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi) } } mutex_unlock(&sit_i->sentry_lock); - dist = sbi->total_sections * hblks_per_sec * hblks_per_sec / 100; + dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; si->bimodal = bimodal / dist; if (si->dirty_count) si->avg_vblocks = total_vblocks / ndirty; @@ -138,14 +138,13 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi); if (sbi->segs_per_sec > 1) - si->base_mem += sbi->total_sections * - sizeof(struct sec_entry); + si->base_mem += TOTAL_SECS(sbi) * sizeof(struct sec_entry); si->base_mem += __bitmap_size(sbi, SIT_BITMAP); /* build free segmap */ si->base_mem += sizeof(struct free_segmap_info); si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); - si->base_mem += f2fs_bitmap_size(sbi->total_sections); + si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); /* build curseg */ si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 17581495bafb..179a13e86f69 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -348,9 +348,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, unsigned int *newseg, bool new_sec, int dir) { struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int total_secs = sbi->total_sections; unsigned int segno, secno, zoneno; - unsigned int total_zones = sbi->total_sections / sbi->secs_per_zone; + unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone; unsigned int hint = *newseg / sbi->segs_per_sec; unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); unsigned int left_start = hint; @@ -367,12 +366,12 @@ static void get_new_segment(struct f2fs_sb_info *sbi, goto got_it; } find_other_zone: - secno = find_next_zero_bit(free_i->free_secmap, total_secs, hint); - if (secno >= total_secs) { + secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint); + if (secno >= TOTAL_SECS(sbi)) { if (dir == ALLOC_RIGHT) { secno = find_next_zero_bit(free_i->free_secmap, - total_secs, 0); - BUG_ON(secno >= total_secs); + TOTAL_SECS(sbi), 0); + BUG_ON(secno >= TOTAL_SECS(sbi)); } else { go_left = 1; left_start = hint - 1; @@ -387,8 +386,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, continue; } left_start = find_next_zero_bit(free_i->free_secmap, - total_secs, 0); - BUG_ON(left_start >= total_secs); + TOTAL_SECS(sbi), 0); + BUG_ON(left_start >= TOTAL_SECS(sbi)); break; } secno = left_start; @@ -1390,7 +1389,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) } if (sbi->segs_per_sec > 1) { - sit_i->sec_entries = vzalloc(sbi->total_sections * + sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) * sizeof(struct sec_entry)); if (!sit_i->sec_entries) return -ENOMEM; @@ -1441,7 +1440,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) if (!free_i->free_segmap) return -ENOMEM; - sec_bitmap_size = f2fs_bitmap_size(sbi->total_sections); + sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); if (!free_i->free_secmap) return -ENOMEM; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index c0d774076ab9..fea9245d4774 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -81,6 +81,7 @@ #define f2fs_bitmap_size(nr) \ (BITS_TO_LONGS(nr) * sizeof(unsigned long)) #define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) +#define TOTAL_SECS(sbi) (sbi->total_sections) #define SECTOR_FROM_BLOCK(sbi, blk_addr) \ (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) -- GitLab From 56ae674cc27230ea86ab25db7fcf1f32dfe17ec1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 31 Mar 2013 12:47:20 +0900 Subject: [PATCH 0394/3163] f2fs: remove redundant lock_page calls In get_node_page, we do not need to call lock_page all the time. If the node page is cached as uptodate, 1. grab_cache_page locks the page, 2. read_node_page unlocks the page, and 3. lock_page is called for further process. Let's avoid this. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 40 +++++++++++++++++++++++++--------------- fs/f2fs/node.h | 3 +++ 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 10cbee9dc3d1..8510c5ed402e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -847,6 +847,12 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) return ERR_PTR(err); } +/* + * Caller should do after getting the following values. + * 0: f2fs_put_page(page, 0) + * LOCKED_PAGE: f2fs_put_page(page, 1) + * error: nothing + */ static int read_node_page(struct page *page, int type) { struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); @@ -859,10 +865,8 @@ static int read_node_page(struct page *page, int type) return -ENOENT; } - if (PageUptodate(page)) { - unlock_page(page); - return 0; - } + if (PageUptodate(page)) + return LOCKED_PAGE; return f2fs_readpage(sbi, page, ni.blk_addr, type); } @@ -874,6 +878,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) { struct address_space *mapping = sbi->node_inode->i_mapping; struct page *apage; + int err; apage = find_get_page(mapping, nid); if (apage && PageUptodate(apage)) { @@ -886,30 +891,36 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) if (!apage) return; - if (read_node_page(apage, READA) == 0) + err = read_node_page(apage, READA); + if (err == 0) f2fs_put_page(apage, 0); + else if (err == LOCKED_PAGE) + f2fs_put_page(apage, 1); return; } struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) { - int err; - struct page *page; struct address_space *mapping = sbi->node_inode->i_mapping; + struct page *page; + int err; page = grab_cache_page(mapping, nid); if (!page) return ERR_PTR(-ENOMEM); err = read_node_page(page, READ_SYNC); - if (err) + if (err < 0) return ERR_PTR(err); + else if (err == LOCKED_PAGE) + goto got_it; lock_page(page); if (!PageUptodate(page)) { f2fs_put_page(page, 1); return ERR_PTR(-EIO); } +got_it: BUG_ON(nid != nid_of_node(page)); mark_page_accessed(page); return page; @@ -923,10 +934,9 @@ struct page *get_node_page_ra(struct page *parent, int start) { struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); struct address_space *mapping = sbi->node_inode->i_mapping; - int i, end; - int err = 0; - nid_t nid; struct page *page; + int err, i, end; + nid_t nid; /* First, try getting the desired direct node. */ nid = get_nid(parent, start, false); @@ -936,12 +946,12 @@ struct page *get_node_page_ra(struct page *parent, int start) page = grab_cache_page(mapping, nid); if (!page) return ERR_PTR(-ENOMEM); - else if (PageUptodate(page)) - goto page_hit; err = read_node_page(page, READ_SYNC); - if (err) + if (err < 0) return ERR_PTR(err); + else if (err == LOCKED_PAGE) + goto page_hit; /* Then, try readahead for siblings of the desired node */ end = start + MAX_RA_NODE; @@ -956,7 +966,7 @@ struct page *get_node_page_ra(struct page *parent, int start) lock_page(page); page_hit: - if (PageError(page)) { + if (!PageUptodate(page)) { f2fs_put_page(page, 1); return ERR_PTR(-EIO); } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index d009cdfd2679..271a61c25601 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -29,6 +29,9 @@ /* vector size for gang look-up from nat cache that consists of radix tree */ #define NATVEC_SIZE 64 +/* return value for read_node_page */ +#define LOCKED_PAGE 1 + /* * For node information */ -- GitLab From 33afa7fde0defbb362328233e600e052d0a22cd5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 31 Mar 2013 12:59:53 +0900 Subject: [PATCH 0395/3163] f2fs: allocate new segment aligned with sections When allocating a new segment under the LFS mode, we should keep the section boundary. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 179a13e86f69..b3486f34af78 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -362,7 +362,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { segno = find_next_zero_bit(free_i->free_segmap, TOTAL_SEGS(sbi), *newseg + 1); - if (segno < TOTAL_SEGS(sbi)) + if (segno - *newseg < sbi->segs_per_sec - + (*newseg % sbi->segs_per_sec)) goto got_it; } find_other_zone: -- GitLab From 5ec4e49f9bd753e2a6857a96e01f8ae5ff00b459 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 31 Mar 2013 13:26:03 +0900 Subject: [PATCH 0396/3163] f2fs: change GC bitmaps to apply the section granularity This patch removes a bitmap for victim segments selected by foreground GC, and modifies the other bitmap for victim segments selected by background GC. 1) foreground GC bitmap : We don't need to manage this, since we just only one previous victim section number instead of the whole victim history. The f2fs uses the victim section number in order not to allocate currently GC'ed section to current active logs. 2) background GC bitmap : This bitmap is used to avoid selecting victims repeatedly by background GCs. In addition, the victims are able to be selected by foreground GCs, since there is no need to read victim blocks during foreground GCs. By the fact that the foreground GC reclaims segments in a section unit, it'd be better to manage this bitmap based on the section granularity. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 -- fs/f2fs/debug.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 43 ++++++++++++++++------------- fs/f2fs/segment.c | 66 +++++++++++++++++++++----------------------- fs/f2fs/segment.h | 10 ++++++- fs/f2fs/super.c | 2 ++ 7 files changed, 68 insertions(+), 59 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index d947e66ee8a8..93fd57d491ac 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -748,8 +748,6 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) flush_nat_entries(sbi); flush_sit_entries(sbi); - reset_victim_segmap(sbi); - /* unlock all the fs_lock[] in do_checkpoint() */ do_checkpoint(sbi, is_umount); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 20b8794ec8f6..c3bf343b0b82 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -153,7 +153,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) /* build dirty segmap */ si->base_mem += sizeof(struct dirty_seglist_info); si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); - si->base_mem += 2 * f2fs_bitmap_size(TOTAL_SEGS(sbi)); + si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); /* buld nm */ si->base_mem += sizeof(struct f2fs_nm_info); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 77e2eb061bfa..71eacd373916 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -410,6 +410,7 @@ struct f2fs_sb_info { /* for cleaning operations */ struct mutex gc_mutex; /* mutex for GC */ struct f2fs_gc_kthread *gc_thread; /* GC thread */ + unsigned int cur_victim_sec; /* current victim section num */ /* * for stat information. @@ -979,7 +980,6 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *, int, unsigned int, int); void flush_sit_entries(struct f2fs_sb_info *); int build_segment_manager(struct f2fs_sb_info *); -void reset_victim_segmap(struct f2fs_sb_info *); void destroy_segment_manager(struct f2fs_sb_info *); /* diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2e3eb2d4fc30..09b8a907400b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -160,18 +160,21 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi, static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int segno; + unsigned int hint = 0; + unsigned int secno; /* * If the gc_type is FG_GC, we can select victim segments * selected by background GC before. * Those segments guarantee they have small valid blocks. */ - segno = find_next_bit(dirty_i->victim_segmap[BG_GC], - TOTAL_SEGS(sbi), 0); - if (segno < TOTAL_SEGS(sbi)) { - clear_bit(segno, dirty_i->victim_segmap[BG_GC]); - return segno; +next: + secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++); + if (secno < TOTAL_SECS(sbi)) { + if (sec_usage_check(sbi, secno)) + goto next; + clear_bit(secno, dirty_i->victim_secmap); + return secno * sbi->segs_per_sec; } return NULL_SEGNO; } @@ -234,7 +237,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct victim_sel_policy p; - unsigned int segno; + unsigned int secno; int nsearched = 0; p.alloc_mode = alloc_mode; @@ -253,6 +256,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, while (1) { unsigned long cost; + unsigned int segno; segno = find_next_bit(p.dirty_segmap, TOTAL_SEGS(sbi), p.offset); @@ -265,13 +269,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, break; } p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit; + secno = GET_SECNO(sbi, segno); - if (test_bit(segno, dirty_i->victim_segmap[FG_GC])) + if (sec_usage_check(sbi, secno)) continue; - if (gc_type == BG_GC && - test_bit(segno, dirty_i->victim_segmap[BG_GC])) - continue; - if (IS_CURSEC(sbi, GET_SECNO(sbi, segno))) + if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) continue; cost = get_gc_cost(sbi, segno, &p); @@ -291,13 +293,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, } got_it: if (p.min_segno != NULL_SEGNO) { - *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; if (p.alloc_mode == LFS) { - int i; - for (i = 0; i < p.ofs_unit; i++) - set_bit(*result + i, - dirty_i->victim_segmap[gc_type]); + secno = GET_SECNO(sbi, p.min_segno); + if (gc_type == FG_GC) + sbi->cur_victim_sec = secno; + else + set_bit(secno, dirty_i->victim_secmap); } + *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; } mutex_unlock(&dirty_i->seglist_lock); @@ -662,9 +665,11 @@ int f2fs_gc(struct f2fs_sb_info *sbi) for (i = 0; i < sbi->segs_per_sec; i++) do_garbage_collect(sbi, segno + i, &ilist, gc_type); - if (gc_type == FG_GC && - get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) + if (gc_type == FG_GC) { + sbi->cur_victim_sec = NULL_SEGNO; nfree++; + WARN_ON(get_valid_blocks(sbi, segno, sbi->segs_per_sec)); + } if (has_not_enough_free_secs(sbi, nfree)) goto gc_more; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b3486f34af78..d5244f6765a9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -69,8 +69,9 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) dirty_i->nr_dirty[dirty_type]--; - clear_bit(segno, dirty_i->victim_segmap[FG_GC]); - clear_bit(segno, dirty_i->victim_segmap[BG_GC]); + if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) + clear_bit(GET_SECNO(sbi, segno), + dirty_i->victim_secmap); } } @@ -296,13 +297,12 @@ static void write_sum_page(struct f2fs_sb_info *sbi, f2fs_put_page(page, 1); } -static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, - int ofs_unit, int type) +static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, int type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE]; - unsigned int segno, next_segno, i; - int ofs = 0; + unsigned int segno; + unsigned int ofs = 0; /* * If there is not enough reserved sections, @@ -318,23 +318,30 @@ static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, if (IS_NODESEG(type)) return NULL_SEGNO; next: - segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs++); - ofs = ((segno / ofs_unit) * ofs_unit) + ofs_unit; + segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs); + ofs += sbi->segs_per_sec; + if (segno < TOTAL_SEGS(sbi)) { + int i; + /* skip intermediate segments in a section */ - if (segno % ofs_unit) + if (segno % sbi->segs_per_sec) goto next; - /* skip if whole section is not prefree */ - next_segno = find_next_zero_bit(prefree_segmap, - TOTAL_SEGS(sbi), segno + 1); - if (next_segno - segno < ofs_unit) + /* skip if the section is currently used */ + if (sec_usage_check(sbi, GET_SECNO(sbi, segno))) goto next; + /* skip if whole section is not prefree */ + for (i = 1; i < sbi->segs_per_sec; i++) + if (!test_bit(segno + i, prefree_segmap)) + goto next; + /* skip if whole section was not free at the last checkpoint */ - for (i = 0; i < ofs_unit; i++) - if (get_seg_entry(sbi, segno)->ckpt_valid_blocks) + for (i = 0; i < sbi->segs_per_sec; i++) + if (get_seg_entry(sbi, segno + i)->ckpt_valid_blocks) goto next; + return segno; } return NULL_SEGNO; @@ -561,15 +568,13 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, int type, bool force) { struct curseg_info *curseg = CURSEG_I(sbi, type); - unsigned int ofs_unit; if (force) { new_curseg(sbi, type, true); goto out; } - ofs_unit = need_SSR(sbi) ? 1 : sbi->segs_per_sec; - curseg->next_segno = check_prefree_segments(sbi, ofs_unit, type); + curseg->next_segno = check_prefree_segments(sbi, type); if (curseg->next_segno != NULL_SEGNO) change_curseg(sbi, type, false); @@ -1558,14 +1563,13 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) } } -static int init_victim_segmap(struct f2fs_sb_info *sbi) +static int init_victim_secmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); - dirty_i->victim_segmap[FG_GC] = kzalloc(bitmap_size, GFP_KERNEL); - dirty_i->victim_segmap[BG_GC] = kzalloc(bitmap_size, GFP_KERNEL); - if (!dirty_i->victim_segmap[FG_GC] || !dirty_i->victim_segmap[BG_GC]) + dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!dirty_i->victim_secmap) return -ENOMEM; return 0; } @@ -1592,7 +1596,7 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) } init_dirty_segmap(sbi); - return init_victim_segmap(sbi); + return init_victim_secmap(sbi); } /* @@ -1679,18 +1683,10 @@ static void discard_dirty_segmap(struct f2fs_sb_info *sbi, mutex_unlock(&dirty_i->seglist_lock); } -void reset_victim_segmap(struct f2fs_sb_info *sbi) -{ - unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); - memset(DIRTY_I(sbi)->victim_segmap[FG_GC], 0, bitmap_size); -} - -static void destroy_victim_segmap(struct f2fs_sb_info *sbi) +static void destroy_victim_secmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - - kfree(dirty_i->victim_segmap[FG_GC]); - kfree(dirty_i->victim_segmap[BG_GC]); + kfree(dirty_i->victim_secmap); } static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) @@ -1705,7 +1701,7 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) for (i = 0; i < NR_DIRTY_TYPE; i++) discard_dirty_segmap(sbi, i); - destroy_victim_segmap(sbi); + destroy_victim_secmap(sbi); SM_I(sbi)->dirty_info = NULL; kfree(dirty_i); } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index fea9245d4774..994bb7bd7b70 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -10,6 +10,7 @@ */ /* constant macro */ #define NULL_SEGNO ((unsigned int)(~0)) +#define NULL_SECNO ((unsigned int)(~0)) /* V: Logical segment # in volume, R: Relative segment # in main area */ #define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) @@ -214,7 +215,7 @@ struct dirty_seglist_info { unsigned long *dirty_segmap[NR_DIRTY_TYPE]; struct mutex seglist_lock; /* lock for segment bitmaps */ int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */ - unsigned long *victim_segmap[2]; /* BG_GC, FG_GC */ + unsigned long *victim_secmap; /* background GC victims */ }; /* victim selection function for cleaning and SSR */ @@ -616,3 +617,10 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count) - (base + 1) + type; } + +static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) +{ + if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) + return true; + return false; +} diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 252890ef8dbc..728c20a8e456 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -26,6 +26,7 @@ #include "f2fs.h" #include "node.h" +#include "segment.h" #include "xattr.h" static struct kmem_cache *f2fs_inode_cachep; @@ -458,6 +459,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->root_ino_num = le32_to_cpu(raw_super->root_ino); sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); + sbi->cur_victim_sec = NULL_SECNO; for (i = 0; i < NR_COUNT_TYPE; i++) atomic_set(&sbi->nr_pages[i], 0); -- GitLab From 4ebefc4443898f5429185ef96d85cfce0fbcc16a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 31 Mar 2013 13:49:18 +0900 Subject: [PATCH 0397/3163] f2fs: check completion of foreground GC The foreground GCs are triggered under not enough free sections. So, we should not skip moving valid blocks in the victim segments. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 46 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 09b8a907400b..136c0f7a670b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -131,7 +131,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - if (p->alloc_mode) { + if (p->alloc_mode == SSR) { p->gc_mode = GC_GREEDY; p->dirty_segmap = dirty_i->dirty_segmap[type]; p->ofs_unit = 1; @@ -404,8 +404,14 @@ static void gc_node_segment(struct f2fs_sb_info *sbi, continue; /* set page dirty and write it */ - if (!PageWriteback(node_page)) + if (gc_type == FG_GC) { + f2fs_submit_bio(sbi, NODE, true); + wait_on_page_writeback(node_page); set_page_dirty(node_page); + } else { + if (!PageWriteback(node_page)) + set_page_dirty(node_page); + } f2fs_put_page(node_page, 1); stat_inc_node_blk_count(sbi, 1); } @@ -421,6 +427,13 @@ static void gc_node_segment(struct f2fs_sb_info *sbi, .for_reclaim = 0, }; sync_node_pages(sbi, 0, &wbc); + + /* + * In the case of FG_GC, it'd be better to reclaim this victim + * completely. + */ + if (get_valid_blocks(sbi, segno, 1) != 0) + goto next_step; } } @@ -484,20 +497,19 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, static void move_data_page(struct inode *inode, struct page *page, int gc_type) { - if (page->mapping != inode->i_mapping) - goto out; - - if (inode != page->mapping->host) - goto out; - - if (PageWriteback(page)) - goto out; - if (gc_type == BG_GC) { + if (PageWriteback(page)) + goto out; set_page_dirty(page); set_cold_data(page); } else { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + + if (PageWriteback(page)) { + f2fs_submit_bio(sbi, DATA, true); + wait_on_page_writeback(page); + } + mutex_lock_op(sbi, DATA_WRITE); if (clear_page_dirty_for_io(page) && S_ISDIR(inode->i_mode)) { @@ -594,8 +606,18 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (++phase < 4) goto next_step; - if (gc_type == FG_GC) + if (gc_type == FG_GC) { f2fs_submit_bio(sbi, DATA, true); + + /* + * In the case of FG_GC, it'd be better to reclaim this victim + * completely. + */ + if (get_valid_blocks(sbi, segno, 1) != 0) { + phase = 2; + goto next_step; + } + } } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, -- GitLab From 60374688a1a1cc8ef173d3dab42574719b851ac4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 31 Mar 2013 13:58:51 +0900 Subject: [PATCH 0398/3163] f2fs: allocate remained free segments in the LFS mode This patch adds a new condition that allocates free segments in the current active section even if SSR is needed. Otherwise, f2fs cannot allocate remained free segments in the section since SSR finds dirty segments only. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d5244f6765a9..fe520d3448e0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -347,6 +347,17 @@ static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, int type) return NULL_SEGNO; } +static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno = curseg->segno; + struct free_segmap_info *free_i = FREE_I(sbi); + + if (segno + 1 < TOTAL_SEGS(sbi) && (segno + 1) % sbi->segs_per_sec) + return !test_bit(segno + 1, free_i->free_segmap); + return 0; +} + /* * Find a new segment from the free segments bitmap to right order * This function should be returned with success, otherwise BUG @@ -580,6 +591,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, change_curseg(sbi, type, false); else if (type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); + else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) + new_curseg(sbi, type, false); else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) change_curseg(sbi, type, true); else -- GitLab From b74737541c5190ab2ad3ee0d7b323e860b988df1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 1 Apr 2013 08:32:21 +0900 Subject: [PATCH 0399/3163] f2fs: avoid race for summary information In order to do GC more reliably, I'd like to lock the vicitm summary page until its GC is completed, and also prevent any checkpoint process. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 8 +------- fs/f2fs/node.c | 2 +- fs/f2fs/super.c | 7 +++++-- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 136c0f7a670b..e97f30157aa6 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -642,12 +642,6 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, if (IS_ERR(sum_page)) return; - /* - * CP needs to lock sum_page. In this time, we don't need - * to lock this page, because this summary page is not gone anywhere. - * Also, this page is not gonna be updated before GC is done. - */ - unlock_page(sum_page); sum = page_address(sum_page); switch (GET_SUM_TYPE((&sum->footer))) { @@ -661,7 +655,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); stat_inc_call_count(sbi->stat_info); - f2fs_put_page(sum_page, 0); + f2fs_put_page(sum_page, 1); } int f2fs_gc(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 8510c5ed402e..95298ef68262 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1149,7 +1149,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, /* First check balancing cached NAT entries */ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { - write_checkpoint(sbi, false); + f2fs_sync_fs(sbi->sb, true); return 0; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 728c20a8e456..ca5413346653 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -137,10 +137,13 @@ int f2fs_sync_fs(struct super_block *sb, int sync) if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) return 0; - if (sync) + if (sync) { + mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, false); - else + mutex_unlock(&sbi->gc_mutex); + } else { f2fs_balance_fs(sbi); + } return 0; } -- GitLab From b2f2c390c5612df97f0403e1ef1e4e41c24b7d4f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 1 Apr 2013 13:52:09 +0900 Subject: [PATCH 0400/3163] f2fs: fix the bitmap consistency of dirty segments Like below, there are 8 segment bitmaps for SSR victim candidates. enum dirty_type { DIRTY_HOT_DATA, /* dirty segments assigned as hot data logs */ DIRTY_WARM_DATA, /* dirty segments assigned as warm data logs */ DIRTY_COLD_DATA, /* dirty segments assigned as cold data logs */ DIRTY_HOT_NODE, /* dirty segments assigned as hot node logs */ DIRTY_WARM_NODE, /* dirty segments assigned as warm node logs */ DIRTY_COLD_NODE, /* dirty segments assigned as cold node logs */ DIRTY, /* to count # of dirty segments */ PRE, /* to count # of entirely obsolete segments */ NR_DIRTY_TYPE }; The upper 6 bitmaps indicates segments dirtied by active log areas respectively. And, the DIRTY bitmap integrates all the 6 bitmaps. For example, o DIRTY_HOT_DATA : 1010000 o DIRTY_WARM_DATA: 0100000 o DIRTY_COLD_DATA: 0001000 o DIRTY_HOT_NODE : 0000010 o DIRTY_WARM_NODE: 0000001 o DIRTY_COLD_NODE: 0000000 In this case, o DIRTY : 1111011, which means that we should guarantee the consistency between DIRTY and other bitmaps concreately. However, the SSR mode selects victims freely from any log types, which can set multiple bits across the various bitmap types. So, this patch eliminates this inconsistency. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fe520d3448e0..7c67ec2b63c0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -49,9 +49,20 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, if (dirty_type == DIRTY) { struct seg_entry *sentry = get_seg_entry(sbi, segno); + enum dirty_type t = DIRTY_HOT_DATA; + dirty_type = sentry->type; + if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) dirty_i->nr_dirty[dirty_type]++; + + /* Only one bitmap should be set */ + for (; t <= DIRTY_COLD_NODE; t++) { + if (t == dirty_type) + continue; + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]--; + } } } @@ -64,11 +75,13 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, dirty_i->nr_dirty[dirty_type]--; if (dirty_type == DIRTY) { - struct seg_entry *sentry = get_seg_entry(sbi, segno); - dirty_type = sentry->type; - if (test_and_clear_bit(segno, - dirty_i->dirty_segmap[dirty_type])) - dirty_i->nr_dirty[dirty_type]--; + enum dirty_type t = DIRTY_HOT_DATA; + + /* clear all the bitmaps */ + for (; t <= DIRTY_COLD_NODE; t++) + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]--; + if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) clear_bit(GET_SECNO(sbi, segno), dirty_i->victim_secmap); -- GitLab From cfb185a1488810fbae9256c7d52f66c558c6ea04 Mon Sep 17 00:00:00 2001 From: P J P Date: Wed, 3 Apr 2013 11:38:00 +0900 Subject: [PATCH 0401/3163] f2fs: add NULL pointer check Commit - fa9150a84c - replaces a call to generic_writepages() in f2fs_write_data_pages() with write_cache_pages(), with a function pointer argument pointing to routine: __f2fs_writepage. -> https://git.kernel.org/linus/fa9150a84ca333f68127097c4fa1eda4b3913a22 This patch adds a NULL pointer check in f2fs_write_data_pages() to avoid a possible NULL pointer dereference, in case if - mapping->a_ops->writepage - is NULL. Signed-off-by: P J P Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 47a2d7c87ea9..cf9ff5f76134 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -559,6 +559,10 @@ static int f2fs_write_data_pages(struct address_space *mapping, int ret; long excess_nrtw = 0, desired_nrtw; + /* deal with chardevs and other special file */ + if (!mapping->a_ops->writepage) + return 0; + if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) { desired_nrtw = MAX_DESIRED_PAGES_WP; excess_nrtw = desired_nrtw - wbc->nr_to_write; -- GitLab From 1571f84a1f65f6bf35d99d59ba52bdee6722e4b6 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Wed, 3 Apr 2013 15:26:49 +0900 Subject: [PATCH 0402/3163] f2fs: update f2fs.txt related with discard at mkfs o mkfs.f2fs supports no discard option. o fixed volume label size in 512 bytes. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index dcf338e62b71..bd3c56c67380 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -146,7 +146,7 @@ USAGE Format options -------------- --l [label] : Give a volume label, up to 256 unicode name. +-l [label] : Give a volume label, up to 512 unicode name. -a [0 or 1] : Split start location of each area for heap-based allocation. 1 is set by default, which performs this. -o [int] : Set overprovision ratio in percent over volume size. @@ -156,6 +156,8 @@ Format options -z [int] : Set the number of sections per zone. 1 is set by default. -e [str] : Set basic extension list. e.g. "mp3,gif,mov" +-t [0 or 1] : Disable discard command or not. + 1 is set by default, which conducts discard. ================================================================================ DESIGN -- GitLab From 49952fa182a2e9b3f40b974278c5b1144f0c918b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 3 Apr 2013 22:19:03 +0900 Subject: [PATCH 0403/3163] f2fs: reduce redundant spin_lock operations This patch reduces redundant spin_lock operations in alloc_nid_failed(). The alloc_nid_failed() does not need to delete entry and add one again by triggering spin_lock and spin_unlock redundantly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 95298ef68262..ad3adbee842a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1407,10 +1407,8 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->free_nid_list_lock); i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); - if (i) { - BUG_ON(i->state != NID_ALLOC); - __del_from_free_nid_list(i); - } + BUG_ON(!i || i->state != NID_ALLOC); + __del_from_free_nid_list(i); spin_unlock(&nm_i->free_nid_list_lock); } @@ -1419,8 +1417,15 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) */ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) { - alloc_nid_done(sbi, nid); - add_free_nid(NM_I(sbi), nid); + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i; + + spin_lock(&nm_i->free_nid_list_lock); + i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); + BUG_ON(!i || i->state != NID_ALLOC); + i->state = NID_NEW; + nm_i->fcnt++; + spin_unlock(&nm_i->free_nid_list_lock); } void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, -- GitLab From 814844871cb0ce6663579099ecb7fc80925df68c Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 1 Apr 2013 21:19:17 -0700 Subject: [PATCH 0404/3163] ARM: shmobile: r8a7778: remove pointless PLATFORM_INFO() remove pointless PLATFORM_INFO() macro from setup-r8a7778, and, used original platform_device_register_xxx() Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/setup-r8a7778.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/arch/arm/mach-shmobile/setup-r8a7778.c b/arch/arm/mach-shmobile/setup-r8a7778.c index 01c62bedf9cf..57d6b0eba445 100644 --- a/arch/arm/mach-shmobile/setup-r8a7778.c +++ b/arch/arm/mach-shmobile/setup-r8a7778.c @@ -78,21 +78,13 @@ static struct sh_timer_config sh_tmu1_platform_data = { .clocksource_rating = 200, }; -#define PLATFORM_INFO(n, i) \ -{ \ - .parent = &platform_bus, \ - .name = #n, \ - .id = i, \ - .res = n ## i ## _resources, \ - .num_res = ARRAY_SIZE(n ## i ##_resources), \ - .data = &n ## i ##_platform_data, \ - .size_data = sizeof(n ## i ## _platform_data), \ -} - -struct platform_device_info platform_devinfo[] = { - PLATFORM_INFO(sh_tmu, 0), - PLATFORM_INFO(sh_tmu, 1), -}; +#define r8a7778_register_tmu(idx) \ + platform_device_register_resndata( \ + &platform_bus, "sh_tmu", idx, \ + sh_tmu##idx##_resources, \ + ARRAY_SIZE(sh_tmu##idx##_resources), \ + &sh_tmu##idx##_platform_data, \ + sizeof(sh_tmu##idx##_platform_data)) void __init r8a7778_add_standard_devices(void) { @@ -114,8 +106,8 @@ void __init r8a7778_add_standard_devices(void) &scif_platform_data[i], sizeof(struct plat_sci_port)); - for (i = 0; i < ARRAY_SIZE(platform_devinfo); i++) - platform_device_register_full(&platform_devinfo[i]); + r8a7778_register_tmu(0); + r8a7778_register_tmu(1); } #define INT2SMSKCR0 0x82288 /* 0xfe782288 */ -- GitLab From 3a42fa20aba327857a5454aeecf54f38d4212a66 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 1 Apr 2013 21:19:37 -0700 Subject: [PATCH 0405/3163] ARM: shmobile: r8a7778: add r8a7778_init_irq_extpin() This patch adds r8a7778_init_irq_extpin() for IRQ0 - IRQ3. But this patch doesn't enable DT settings on r8a7778.dts, because R8A7778 chip external IRQ depends on IRQ0 - IRQ3 pin encoding which came from platform board implementation. Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/include/mach/r8a7778.h | 1 + arch/arm/mach-shmobile/setup-r8a7778.c | 44 +++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/arch/arm/mach-shmobile/include/mach/r8a7778.h b/arch/arm/mach-shmobile/include/mach/r8a7778.h index a755dcafef4d..e0c620571c33 100644 --- a/arch/arm/mach-shmobile/include/mach/r8a7778.h +++ b/arch/arm/mach-shmobile/include/mach/r8a7778.h @@ -24,5 +24,6 @@ extern void r8a7778_init_delay(void); extern void r8a7778_init_irq(void); extern void r8a7778_init_irq_dt(void); extern void r8a7778_clock_init(void); +extern void r8a7778_init_irq_extpin(int irlm); #endif /* __ASM_R8A7778_H__ */ diff --git a/arch/arm/mach-shmobile/setup-r8a7778.c b/arch/arm/mach-shmobile/setup-r8a7778.c index 57d6b0eba445..288230540d19 100644 --- a/arch/arm/mach-shmobile/setup-r8a7778.c +++ b/arch/arm/mach-shmobile/setup-r8a7778.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -110,6 +111,49 @@ void __init r8a7778_add_standard_devices(void) r8a7778_register_tmu(1); } +static struct renesas_intc_irqpin_config irqpin_platform_data = { + .irq_base = irq_pin(0), /* IRQ0 -> IRQ3 */ + .sense_bitfield_width = 2, +}; + +static struct resource irqpin_resources[] = { + DEFINE_RES_MEM(0xfe78001c, 4), /* ICR1 */ + DEFINE_RES_MEM(0xfe780010, 4), /* INTPRI */ + DEFINE_RES_MEM(0xfe780024, 4), /* INTREQ */ + DEFINE_RES_MEM(0xfe780044, 4), /* INTMSK0 */ + DEFINE_RES_MEM(0xfe780064, 4), /* INTMSKCLR0 */ + DEFINE_RES_IRQ(gic_iid(0x3b)), /* IRQ0 */ + DEFINE_RES_IRQ(gic_iid(0x3c)), /* IRQ1 */ + DEFINE_RES_IRQ(gic_iid(0x3d)), /* IRQ2 */ + DEFINE_RES_IRQ(gic_iid(0x3e)), /* IRQ3 */ +}; + +void __init r8a7778_init_irq_extpin(int irlm) +{ + void __iomem *icr0 = ioremap_nocache(0xfe780000, PAGE_SIZE); + unsigned long tmp; + + if (!icr0) { + pr_warn("r8a7778: unable to setup external irq pin mode\n"); + return; + } + + tmp = ioread32(icr0); + if (irlm) + tmp |= 1 << 23; /* IRQ0 -> IRQ3 as individual pins */ + else + tmp &= ~(1 << 23); /* IRL mode - not supported */ + tmp |= (1 << 21); /* LVLMODE = 1 */ + iowrite32(tmp, icr0); + iounmap(icr0); + + if (irlm) + platform_device_register_resndata( + &platform_bus, "renesas_intc_irqpin", -1, + irqpin_resources, ARRAY_SIZE(irqpin_resources), + &irqpin_platform_data, sizeof(irqpin_platform_data)); +} + #define INT2SMSKCR0 0x82288 /* 0xfe782288 */ #define INT2SMSKCR1 0x8228c /* 0xfe78228c */ -- GitLab From e2f77f6ff54865870870aaff0c07e2c35c4e0cfa Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 1 Apr 2013 21:20:49 -0700 Subject: [PATCH 0406/3163] ARM: shmobile: bockw: enable SMSC ethernet on defconfig This patch adds SMSC ethernet support on Bock-W defconfig Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Simon Horman --- arch/arm/configs/bockw_defconfig | 34 +++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/arch/arm/configs/bockw_defconfig b/arch/arm/configs/bockw_defconfig index 6037705b7b67..6524cdf3b08d 100644 --- a/arch/arm/configs/bockw_defconfig +++ b/arch/arm/configs/bockw_defconfig @@ -27,16 +27,40 @@ CONFIG_HIGHMEM=y CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_ARM_APPENDED_DTB=y -CONFIG_CMDLINE="console=ttySC0,115200 ignore_loglevel" +CONFIG_CMDLINE="console=ttySC0,115200 ignore_loglevel root=/dev/nfs ip=dhcp" CONFIG_CMDLINE_FORCE=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_SUSPEND is not set +CONFIG_NET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +# CONFIG_INET_DIAG is not set +# CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FW_LOADER is not set +CONFIG_NETDEVICES=y +# CONFIG_NET_CADENCE is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_CIRRUS is not set +# CONFIG_NET_VENDOR_FARADAY is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_SEEQ is not set +CONFIG_SMSC911X=y +# CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -55,12 +79,16 @@ CONFIG_UIO_PDRV_GENIRQ=y # CONFIG_INOTIFY_USER is not set CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_NFS_SWAP=y +CONFIG_NFS_V4_1=y +CONFIG_ROOT_NFS=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_BUGVERBOSE is not set # CONFIG_FTRACE is not set # CONFIG_ARM_UNWIND is not set -CONFIG_KEYS=y -CONFIG_CRYPTO=y CONFIG_AVERAGE=y -- GitLab From 732078c369f0b6ad9fe75c1faff721e91260bc5d Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 20 Mar 2013 11:11:43 +0100 Subject: [PATCH 0407/3163] arm: zynq: Load scu baseaddress at run time Use Cortex a9 cp15 to read scu baseaddress. Signed-off-by: Michal Simek --- arch/arm/mach-zynq/common.c | 34 ++++++++++++++++++++++------------ arch/arm/mach-zynq/common.h | 2 ++ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index 68e0907de5d0..f9e5f3ac8eec 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -33,10 +33,13 @@ #include #include #include +#include #include #include "common.h" +void __iomem *zynq_scu_base; + static struct of_device_id zynq_of_bus_ids[] __initdata = { { .compatible = "simple-bus", }, {} @@ -56,17 +59,6 @@ static void __init xilinx_init_machine(void) of_platform_bus_probe(NULL, zynq_of_bus_ids, NULL); } -#define SCU_PERIPH_PHYS 0xF8F00000 -#define SCU_PERIPH_SIZE SZ_8K -#define SCU_PERIPH_VIRT (VMALLOC_END - SCU_PERIPH_SIZE) - -static struct map_desc scu_desc __initdata = { - .virtual = SCU_PERIPH_VIRT, - .pfn = __phys_to_pfn(SCU_PERIPH_PHYS), - .length = SCU_PERIPH_SIZE, - .type = MT_DEVICE, -}; - static void __init xilinx_zynq_timer_init(void) { struct device_node *np; @@ -81,13 +73,31 @@ static void __init xilinx_zynq_timer_init(void) clocksource_of_init(); } +static struct map_desc zynq_cortex_a9_scu_map __initdata = { + .length = SZ_256, + .type = MT_DEVICE, +}; + +static void __init zynq_scu_map_io(void) +{ + unsigned long base; + + base = scu_a9_get_base(); + zynq_cortex_a9_scu_map.pfn = __phys_to_pfn(base); + /* Expected address is in vmalloc area that's why simple assign here */ + zynq_cortex_a9_scu_map.virtual = base; + iotable_init(&zynq_cortex_a9_scu_map, 1); + zynq_scu_base = (void __iomem *)base; + BUG_ON(!zynq_scu_base); +} + /** * xilinx_map_io() - Create memory mappings needed for early I/O. */ static void __init xilinx_map_io(void) { debug_ll_io_init(); - iotable_init(&scu_desc, 1); + zynq_scu_map_io(); } static const char *xilinx_dt_match[] = { diff --git a/arch/arm/mach-zynq/common.h b/arch/arm/mach-zynq/common.h index 5050bb10bb12..d7164d50cffe 100644 --- a/arch/arm/mach-zynq/common.h +++ b/arch/arm/mach-zynq/common.h @@ -17,4 +17,6 @@ #ifndef __MACH_ZYNQ_COMMON_H__ #define __MACH_ZYNQ_COMMON_H__ +extern void __iomem *zynq_scu_base; + #endif -- GitLab From 64b889b39e9958fdcfe5e9b7aa1ac0ffca3fc9a2 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 27 Mar 2013 12:37:53 +0100 Subject: [PATCH 0408/3163] arm: zynq: Move slcr initialization to separate file Create separate slcr driver instead of polluting common code. Signed-off-by: Michal Simek --- arch/arm/mach-zynq/Makefile | 2 +- arch/arm/mach-zynq/common.c | 10 +----- arch/arm/mach-zynq/common.h | 3 ++ arch/arm/mach-zynq/slcr.c | 69 +++++++++++++++++++++++++++++++++++++ 4 files changed, 74 insertions(+), 10 deletions(-) create mode 100644 arch/arm/mach-zynq/slcr.c diff --git a/arch/arm/mach-zynq/Makefile b/arch/arm/mach-zynq/Makefile index 320faedeb484..13ee09b563ad 100644 --- a/arch/arm/mach-zynq/Makefile +++ b/arch/arm/mach-zynq/Makefile @@ -3,4 +3,4 @@ # # Common support -obj-y := common.o +obj-y := common.o slcr.o diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index f9e5f3ac8eec..cd3968c28904 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -61,15 +61,7 @@ static void __init xilinx_init_machine(void) static void __init xilinx_zynq_timer_init(void) { - struct device_node *np; - void __iomem *slcr; - - np = of_find_compatible_node(NULL, NULL, "xlnx,zynq-slcr"); - slcr = of_iomap(np, 0); - WARN_ON(!slcr); - - xilinx_zynq_clocks_init(slcr); - + zynq_slcr_init(); clocksource_of_init(); } diff --git a/arch/arm/mach-zynq/common.h b/arch/arm/mach-zynq/common.h index d7164d50cffe..dd594e672ed4 100644 --- a/arch/arm/mach-zynq/common.h +++ b/arch/arm/mach-zynq/common.h @@ -17,6 +17,9 @@ #ifndef __MACH_ZYNQ_COMMON_H__ #define __MACH_ZYNQ_COMMON_H__ +extern int zynq_slcr_init(void); + +extern void __iomem *zynq_slcr_base; extern void __iomem *zynq_scu_base; #endif diff --git a/arch/arm/mach-zynq/slcr.c b/arch/arm/mach-zynq/slcr.c new file mode 100644 index 000000000000..f9f33496cee9 --- /dev/null +++ b/arch/arm/mach-zynq/slcr.c @@ -0,0 +1,69 @@ +/* + * Xilinx SLCR driver + * + * Copyright (c) 2011-2013 Xilinx Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA + * 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#define SLCR_UNLOCK_MAGIC 0xDF0D +#define SLCR_UNLOCK 0x8 /* SCLR unlock register */ + +void __iomem *zynq_slcr_base; + +/** + * zynq_slcr_init + * Returns 0 on success, negative errno otherwise. + * + * Called early during boot from platform code to remap SLCR area. + */ +int __init zynq_slcr_init(void) +{ + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "xlnx,zynq-slcr"); + if (!np) { + pr_err("%s: no slcr node found\n", __func__); + BUG(); + } + + zynq_slcr_base = of_iomap(np, 0); + if (!zynq_slcr_base) { + pr_err("%s: Unable to map I/O memory\n", __func__); + BUG(); + } + + /* unlock the SLCR so that registers can be changed */ + writel(SLCR_UNLOCK_MAGIC, zynq_slcr_base + SLCR_UNLOCK); + + pr_info("%s mapped to %p\n", np->name, zynq_slcr_base); + + xilinx_zynq_clocks_init(zynq_slcr_base); + + of_node_put(np); + + return 0; +} -- GitLab From 96790f0a283976bc59f68657237293fe97b02334 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 20 Mar 2013 11:42:15 +0100 Subject: [PATCH 0409/3163] arm: zynq: Add support for system reset Do system reset via slcr registers. Signed-off-by: Michal Simek --- arch/arm/mach-zynq/common.c | 6 ++++++ arch/arm/mach-zynq/common.h | 1 + arch/arm/mach-zynq/slcr.c | 27 +++++++++++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index cd3968c28904..f0a8533af1f9 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -92,6 +92,11 @@ static void __init xilinx_map_io(void) zynq_scu_map_io(); } +static void zynq_system_reset(char mode, const char *cmd) +{ + zynq_slcr_system_reset(); +} + static const char *xilinx_dt_match[] = { "xlnx,zynq-zc702", "xlnx,zynq-7000", @@ -104,4 +109,5 @@ MACHINE_START(XILINX_EP107, "Xilinx Zynq Platform") .init_machine = xilinx_init_machine, .init_time = xilinx_zynq_timer_init, .dt_compat = xilinx_dt_match, + .restart = zynq_system_reset, MACHINE_END diff --git a/arch/arm/mach-zynq/common.h b/arch/arm/mach-zynq/common.h index dd594e672ed4..d7ec3caaa1d7 100644 --- a/arch/arm/mach-zynq/common.h +++ b/arch/arm/mach-zynq/common.h @@ -18,6 +18,7 @@ #define __MACH_ZYNQ_COMMON_H__ extern int zynq_slcr_init(void); +extern void zynq_slcr_system_reset(void); extern void __iomem *zynq_slcr_base; extern void __iomem *zynq_scu_base; diff --git a/arch/arm/mach-zynq/slcr.c b/arch/arm/mach-zynq/slcr.c index f9f33496cee9..d58c9964e883 100644 --- a/arch/arm/mach-zynq/slcr.c +++ b/arch/arm/mach-zynq/slcr.c @@ -32,8 +32,35 @@ #define SLCR_UNLOCK_MAGIC 0xDF0D #define SLCR_UNLOCK 0x8 /* SCLR unlock register */ +#define SLCR_PS_RST_CTRL_OFFSET 0x200 /* PS Software Reset Control */ +#define SLCR_REBOOT_STATUS 0x258 /* PS Reboot Status */ + void __iomem *zynq_slcr_base; +/** + * zynq_slcr_system_reset - Reset the entire system. + */ +void zynq_slcr_system_reset(void) +{ + u32 reboot; + + /* + * Unlock the SLCR then reset the system. + * Note that this seems to require raw i/o + * functions or there's a lockup? + */ + writel(SLCR_UNLOCK_MAGIC, zynq_slcr_base + SLCR_UNLOCK); + + /* + * Clear 0x0F000000 bits of reboot status register to workaround + * the FSBL not loading the bitstream after soft-reboot + * This is a temporary solution until we know more. + */ + reboot = readl(zynq_slcr_base + SLCR_REBOOT_STATUS); + writel(reboot & 0xF0FFFFFF, zynq_slcr_base + SLCR_REBOOT_STATUS); + writel(1, zynq_slcr_base + SLCR_PS_RST_CTRL_OFFSET); +} + /** * zynq_slcr_init * Returns 0 on success, negative errno otherwise. -- GitLab From 889faa88142801ee6bec2de2b8fb4c606076d52f Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 27 Mar 2013 13:07:00 +0100 Subject: [PATCH 0410/3163] arm: zynq: Get rid of xilinx function prefix Xilinx is vendor name not SoC name. Use zynq instead. Also remove one checkpatch warning: WARNING: static const char * array should probably be static const char * const +static const char *xilinx_dt_match[] = { Signed-off-by: Michal Simek --- arch/arm/mach-zynq/common.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index f0a8533af1f9..e1b61317b86b 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -46,10 +46,10 @@ static struct of_device_id zynq_of_bus_ids[] __initdata = { }; /** - * xilinx_init_machine() - System specific initialization, intended to be - * called from board specific initialization. + * zynq_init_machine - System specific initialization, intended to be + * called from board specific initialization. */ -static void __init xilinx_init_machine(void) +static void __init zynq_init_machine(void) { /* * 64KB way size, 8-way associativity, parity disabled @@ -59,7 +59,7 @@ static void __init xilinx_init_machine(void) of_platform_bus_probe(NULL, zynq_of_bus_ids, NULL); } -static void __init xilinx_zynq_timer_init(void) +static void __init zynq_timer_init(void) { zynq_slcr_init(); clocksource_of_init(); @@ -84,9 +84,9 @@ static void __init zynq_scu_map_io(void) } /** - * xilinx_map_io() - Create memory mappings needed for early I/O. + * zynq_map_io - Create memory mappings needed for early I/O. */ -static void __init xilinx_map_io(void) +static void __init zynq_map_io(void) { debug_ll_io_init(); zynq_scu_map_io(); @@ -97,17 +97,17 @@ static void zynq_system_reset(char mode, const char *cmd) zynq_slcr_system_reset(); } -static const char *xilinx_dt_match[] = { +static const char * const zynq_dt_match[] = { "xlnx,zynq-zc702", "xlnx,zynq-7000", NULL }; MACHINE_START(XILINX_EP107, "Xilinx Zynq Platform") - .map_io = xilinx_map_io, + .map_io = zynq_map_io, .init_irq = irqchip_init, - .init_machine = xilinx_init_machine, - .init_time = xilinx_zynq_timer_init, - .dt_compat = xilinx_dt_match, + .init_machine = zynq_init_machine, + .init_time = zynq_timer_init, + .dt_compat = zynq_dt_match, .restart = zynq_system_reset, MACHINE_END -- GitLab From 2f34e0a58f34db094257e33b461d58b578004b67 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 27 Mar 2013 13:36:39 +0100 Subject: [PATCH 0411/3163] arm: zynq: Add smp_twd timer The zynq has a Cortex-A9 with the corresponding smp_twd timers. Use them. Signed-off-by: Steffen Trumtrar Signed-off-by: Michal Simek --- arch/arm/boot/dts/zynq-7000.dtsi | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/boot/dts/zynq-7000.dtsi b/arch/arm/boot/dts/zynq-7000.dtsi index 51243db2e9e4..2a1df1bc4b99 100644 --- a/arch/arm/boot/dts/zynq-7000.dtsi +++ b/arch/arm/boot/dts/zynq-7000.dtsi @@ -129,5 +129,12 @@ clock-names = "cpu_1x"; clock-ranges; }; + scutimer: scutimer@f8f00600 { + interrupt-parent = <&intc>; + interrupts = < 1 13 0x301 >; + compatible = "arm,cortex-a9-twd-timer"; + reg = < 0xf8f00600 0x20 >; + clocks = <&cpu_clk 1>; + } ; }; }; -- GitLab From aa7eb2bb4e4a22e41bbe4612ff46e5885b13c33e Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 20 Mar 2013 13:50:12 +0100 Subject: [PATCH 0412/3163] arm: zynq: Add smp support Zynq is dual core Cortex A9 which starts always at zero. Using simple trampoline ensure long jump to secondary_startup code. Signed-off-by: Michal Simek Signed-off-by: Steffen Trumtrar --- arch/arm/mach-zynq/Kconfig | 1 + arch/arm/mach-zynq/Makefile | 1 + arch/arm/mach-zynq/common.c | 1 + arch/arm/mach-zynq/common.h | 11 +++ arch/arm/mach-zynq/headsmp.S | 24 ++++++ arch/arm/mach-zynq/platsmp.c | 149 +++++++++++++++++++++++++++++++++++ arch/arm/mach-zynq/slcr.c | 29 +++++++ 7 files changed, 216 insertions(+) create mode 100644 arch/arm/mach-zynq/headsmp.S create mode 100644 arch/arm/mach-zynq/platsmp.c diff --git a/arch/arm/mach-zynq/Kconfig b/arch/arm/mach-zynq/Kconfig index d70651e8b705..f4a7e630bde0 100644 --- a/arch/arm/mach-zynq/Kconfig +++ b/arch/arm/mach-zynq/Kconfig @@ -8,6 +8,7 @@ config ARCH_ZYNQ select ICST select MIGHT_HAVE_CACHE_L2X0 select USE_OF + select HAVE_SMP select SPARSE_IRQ select CADENCE_TTC_TIMER help diff --git a/arch/arm/mach-zynq/Makefile b/arch/arm/mach-zynq/Makefile index 13ee09b563ad..b595d22134ec 100644 --- a/arch/arm/mach-zynq/Makefile +++ b/arch/arm/mach-zynq/Makefile @@ -4,3 +4,4 @@ # Common support obj-y := common.o slcr.o +obj-$(CONFIG_SMP) += headsmp.o platsmp.o diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index e1b61317b86b..5bfe7035b73d 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -104,6 +104,7 @@ static const char * const zynq_dt_match[] = { }; MACHINE_START(XILINX_EP107, "Xilinx Zynq Platform") + .smp = smp_ops(zynq_smp_ops), .map_io = zynq_map_io, .init_irq = irqchip_init, .init_machine = zynq_init_machine, diff --git a/arch/arm/mach-zynq/common.h b/arch/arm/mach-zynq/common.h index d7ec3caaa1d7..fd308f8b18d9 100644 --- a/arch/arm/mach-zynq/common.h +++ b/arch/arm/mach-zynq/common.h @@ -19,6 +19,17 @@ extern int zynq_slcr_init(void); extern void zynq_slcr_system_reset(void); +extern void zynq_slcr_cpu_stop(int cpu); +extern void zynq_slcr_cpu_start(int cpu); + +#ifdef CONFIG_SMP +extern void secondary_startup(void); +extern char zynq_secondary_trampoline; +extern char zynq_secondary_trampoline_jump; +extern char zynq_secondary_trampoline_end; +extern int __cpuinit zynq_cpun_start(u32 address, int cpu); +extern struct smp_operations zynq_smp_ops __initdata; +#endif extern void __iomem *zynq_slcr_base; extern void __iomem *zynq_scu_base; diff --git a/arch/arm/mach-zynq/headsmp.S b/arch/arm/mach-zynq/headsmp.S new file mode 100644 index 000000000000..d183cd234a9b --- /dev/null +++ b/arch/arm/mach-zynq/headsmp.S @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2013 Steffen Trumtrar + * Copyright (c) 2012-2013 Xilinx + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + + __CPUINIT + +ENTRY(zynq_secondary_trampoline) + ldr r0, [pc] + bx r0 +.globl zynq_secondary_trampoline_jump +zynq_secondary_trampoline_jump: + /* Space for jumping address */ + .word /* cpu 1 */ +.globl zynq_secondary_trampoline_end +zynq_secondary_trampoline_end: + +ENDPROC(zynq_secondary_trampoline) diff --git a/arch/arm/mach-zynq/platsmp.c b/arch/arm/mach-zynq/platsmp.c new file mode 100644 index 000000000000..cdfd888ca783 --- /dev/null +++ b/arch/arm/mach-zynq/platsmp.c @@ -0,0 +1,149 @@ +/* + * This file contains Xilinx specific SMP code, used to start up + * the second processor. + * + * Copyright (C) 2011-2013 Xilinx + * + * based on linux/arch/arm/mach-realview/platsmp.c + * + * Copyright (C) 2002 ARM Ltd. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +/* + * Store number of cores in the system + * Because of scu_get_core_count() must be in __init section and can't + * be called from zynq_cpun_start() because it is in __cpuinit section. + */ +static int ncores; + +/* Secondary CPU kernel startup is a 2 step process. The primary CPU + * starts the secondary CPU by giving it the address of the kernel and + * then sending it an event to wake it up. The secondary CPU then + * starts the kernel and tells the primary CPU it's up and running. + */ +static void __cpuinit zynq_secondary_init(unsigned int cpu) +{ + /* + * if any interrupts are already enabled for the primary + * core (e.g. timer irq), then they will not have been enabled + * for us: do so + */ + gic_secondary_init(0); +} + +int __cpuinit zynq_cpun_start(u32 address, int cpu) +{ + u32 trampoline_code_size = &zynq_secondary_trampoline_end - + &zynq_secondary_trampoline; + + if (cpu > ncores) { + pr_warn("CPU No. is not available in the system\n"); + return -1; + } + + /* MS: Expectation that SLCR are directly map and accessible */ + /* Not possible to jump to non aligned address */ + if (!(address & 3) && (!address || (address >= trampoline_code_size))) { + /* Store pointer to ioremap area which points to address 0x0 */ + static u8 __iomem *zero; + u32 trampoline_size = &zynq_secondary_trampoline_jump - + &zynq_secondary_trampoline; + + zynq_slcr_cpu_stop(cpu); + + if (__pa(PAGE_OFFSET)) { + zero = ioremap(0, trampoline_code_size); + if (!zero) { + pr_warn("BOOTUP jump vectors not accessible\n"); + return -1; + } + } else { + zero = (__force u8 __iomem *)PAGE_OFFSET; + } + + /* + * This is elegant way how to jump to any address + * 0x0: Load address at 0x8 to r0 + * 0x4: Jump by mov instruction + * 0x8: Jumping address + */ + memcpy((__force void *)zero, &zynq_secondary_trampoline, + trampoline_size); + writel(address, zero + trampoline_size); + + flush_cache_all(); + outer_flush_range(0, trampoline_code_size); + smp_wmb(); + + if (__pa(PAGE_OFFSET)) + iounmap(zero); + + zynq_slcr_cpu_start(cpu); + + return 0; + } + + pr_warn("Can't start CPU%d: Wrong starting address %x\n", cpu, address); + + return -1; +} +EXPORT_SYMBOL(zynq_cpun_start); + +static int __cpuinit zynq_boot_secondary(unsigned int cpu, + struct task_struct *idle) +{ + return zynq_cpun_start(virt_to_phys(secondary_startup), cpu); +} + +/* + * Initialise the CPU possible map early - this describes the CPUs + * which may be present or become present in the system. + */ +static void __init zynq_smp_init_cpus(void) +{ + int i; + + ncores = scu_get_core_count(zynq_scu_base); + + for (i = 0; i < ncores && i < CONFIG_NR_CPUS; i++) + set_cpu_possible(i, true); +} + +static void __init zynq_smp_prepare_cpus(unsigned int max_cpus) +{ + int i; + + /* + * Initialise the present map, which describes the set of CPUs + * actually populated at the present time. + */ + for (i = 0; i < max_cpus; i++) + set_cpu_present(i, true); + + scu_enable(zynq_scu_base); +} + +struct smp_operations zynq_smp_ops __initdata = { + .smp_init_cpus = zynq_smp_init_cpus, + .smp_prepare_cpus = zynq_smp_prepare_cpus, + .smp_secondary_init = zynq_secondary_init, + .smp_boot_secondary = zynq_boot_secondary, +}; diff --git a/arch/arm/mach-zynq/slcr.c b/arch/arm/mach-zynq/slcr.c index d58c9964e883..c70969b9c258 100644 --- a/arch/arm/mach-zynq/slcr.c +++ b/arch/arm/mach-zynq/slcr.c @@ -33,6 +33,11 @@ #define SLCR_UNLOCK 0x8 /* SCLR unlock register */ #define SLCR_PS_RST_CTRL_OFFSET 0x200 /* PS Software Reset Control */ + +#define SLCR_A9_CPU_CLKSTOP 0x10 +#define SLCR_A9_CPU_RST 0x1 + +#define SLCR_A9_CPU_RST_CTRL 0x244 /* CPU Software Reset Control */ #define SLCR_REBOOT_STATUS 0x258 /* PS Reboot Status */ void __iomem *zynq_slcr_base; @@ -61,6 +66,30 @@ void zynq_slcr_system_reset(void) writel(1, zynq_slcr_base + SLCR_PS_RST_CTRL_OFFSET); } +/** + * zynq_slcr_cpu_start - Start cpu + * @cpu: cpu number + */ +void zynq_slcr_cpu_start(int cpu) +{ + /* enable CPUn */ + writel(SLCR_A9_CPU_CLKSTOP << cpu, + zynq_slcr_base + SLCR_A9_CPU_RST_CTRL); + /* enable CLK for CPUn */ + writel(0x0 << cpu, zynq_slcr_base + SLCR_A9_CPU_RST_CTRL); +} + +/** + * zynq_slcr_cpu_stop - Stop cpu + * @cpu: cpu number + */ +void zynq_slcr_cpu_stop(int cpu) +{ + /* stop CLK and reset CPUn */ + writel((SLCR_A9_CPU_CLKSTOP | SLCR_A9_CPU_RST) << cpu, + zynq_slcr_base + SLCR_A9_CPU_RST_CTRL); +} + /** * zynq_slcr_init * Returns 0 on success, negative errno otherwise. -- GitLab From c7c28b0fdd06d8eb9414d21f8956b7c773ceea93 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 20 Mar 2013 13:56:15 +0100 Subject: [PATCH 0413/3163] arm: zynq: Add hotplug support Signed-off-by: Michal Simek --- arch/arm/mach-zynq/Makefile | 3 + arch/arm/mach-zynq/common.h | 3 + arch/arm/mach-zynq/hotplug.c | 104 +++++++++++++++++++++++++++++++++++ arch/arm/mach-zynq/platsmp.c | 3 + 4 files changed, 113 insertions(+) create mode 100644 arch/arm/mach-zynq/hotplug.c diff --git a/arch/arm/mach-zynq/Makefile b/arch/arm/mach-zynq/Makefile index b595d22134ec..1b25d92ebf22 100644 --- a/arch/arm/mach-zynq/Makefile +++ b/arch/arm/mach-zynq/Makefile @@ -4,4 +4,7 @@ # Common support obj-y := common.o slcr.o +CFLAGS_REMOVE_hotplug.o =-march=armv6k +CFLAGS_hotplug.o =-Wa,-march=armv7-a -mcpu=cortex-a9 +obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o obj-$(CONFIG_SMP) += headsmp.o platsmp.o diff --git a/arch/arm/mach-zynq/common.h b/arch/arm/mach-zynq/common.h index fd308f8b18d9..fbbd0e21c404 100644 --- a/arch/arm/mach-zynq/common.h +++ b/arch/arm/mach-zynq/common.h @@ -34,4 +34,7 @@ extern struct smp_operations zynq_smp_ops __initdata; extern void __iomem *zynq_slcr_base; extern void __iomem *zynq_scu_base; +/* Hotplug */ +extern void zynq_platform_cpu_die(unsigned int cpu); + #endif diff --git a/arch/arm/mach-zynq/hotplug.c b/arch/arm/mach-zynq/hotplug.c new file mode 100644 index 000000000000..c89672bd1de2 --- /dev/null +++ b/arch/arm/mach-zynq/hotplug.c @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2012-2013 Xilinx + * + * based on linux/arch/arm/mach-realview/hotplug.c + * + * Copyright (C) 2002 ARM Ltd. + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + +#include +#include +#include "common.h" + +static inline void zynq_cpu_enter_lowpower(void) +{ + unsigned int v; + + flush_cache_all(); + asm volatile( + " mcr p15, 0, %1, c7, c5, 0\n" + " dsb\n" + /* + * Turn off coherency + */ + " mrc p15, 0, %0, c1, c0, 1\n" + " bic %0, %0, #0x40\n" + " mcr p15, 0, %0, c1, c0, 1\n" + " mrc p15, 0, %0, c1, c0, 0\n" + " bic %0, %0, %2\n" + " mcr p15, 0, %0, c1, c0, 0\n" + : "=&r" (v) + : "r" (0), "Ir" (CR_C) + : "cc"); +} + +static inline void zynq_cpu_leave_lowpower(void) +{ + unsigned int v; + + asm volatile( + " mrc p15, 0, %0, c1, c0, 0\n" + " orr %0, %0, %1\n" + " mcr p15, 0, %0, c1, c0, 0\n" + " mrc p15, 0, %0, c1, c0, 1\n" + " orr %0, %0, #0x40\n" + " mcr p15, 0, %0, c1, c0, 1\n" + : "=&r" (v) + : "Ir" (CR_C) + : "cc"); +} + +static inline void zynq_platform_do_lowpower(unsigned int cpu, int *spurious) +{ + /* + * there is no power-control hardware on this platform, so all + * we can do is put the core into WFI; this is safe as the calling + * code will have already disabled interrupts + */ + for (;;) { + dsb(); + wfi(); + + /* + * Getting here, means that we have come out of WFI without + * having been woken up - this shouldn't happen + * + * Just note it happening - when we're woken, we can report + * its occurrence. + */ + (*spurious)++; + } +} + +/* + * platform-specific code to shutdown a CPU + * + * Called with IRQs disabled + */ +void zynq_platform_cpu_die(unsigned int cpu) +{ + int spurious = 0; + + /* + * we're ready for shutdown now, so do it + */ + zynq_cpu_enter_lowpower(); + zynq_platform_do_lowpower(cpu, &spurious); + + /* + * bring this CPU back into the world of cache + * coherency, and then restore interrupts + */ + zynq_cpu_leave_lowpower(); + + if (spurious) + pr_warn("CPU%u: %u spurious wakeup calls\n", cpu, spurious); +} diff --git a/arch/arm/mach-zynq/platsmp.c b/arch/arm/mach-zynq/platsmp.c index cdfd888ca783..3072cbd7ec6f 100644 --- a/arch/arm/mach-zynq/platsmp.c +++ b/arch/arm/mach-zynq/platsmp.c @@ -146,4 +146,7 @@ struct smp_operations zynq_smp_ops __initdata = { .smp_prepare_cpus = zynq_smp_prepare_cpus, .smp_secondary_init = zynq_secondary_init, .smp_boot_secondary = zynq_boot_secondary, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_die = zynq_platform_cpu_die, +#endif }; -- GitLab From f0ff5a0a82b0c1be8e14584aa66a7890e08361d9 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 4 Apr 2013 00:07:14 -0700 Subject: [PATCH 0414/3163] ARM: shmobile: r8a7779: add each clocks ratio on comment area Adding comment describing the r8a7779 clock frequencies depending on MD pin settings. Acked-by: Magnus Damm Signed-off-by: Kuninori Morimoto Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-r8a7779.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/arm/mach-shmobile/clock-r8a7779.c b/arch/arm/mach-shmobile/clock-r8a7779.c index 7d86bfbb5b06..5436b512da7a 100644 --- a/arch/arm/mach-shmobile/clock-r8a7779.c +++ b/arch/arm/mach-shmobile/clock-r8a7779.c @@ -26,6 +26,25 @@ #include #include +/* + * MD1 = 1 MD1 = 0 + * (PLLA = 1500) (PLLA = 1600) + * (MHz) (MHz) + *------------------------------------------------+-------------------- + * clkz 1000 (2/3) 800 (1/2) + * clkzs 250 (1/6) 200 (1/8) + * clki 750 (1/2) 800 (1/2) + * clks 250 (1/6) 200 (1/8) + * clks1 125 (1/12) 100 (1/16) + * clks3 187.5 (1/8) 200 (1/8) + * clks4 93.7 (1/16) 100 (1/16) + * clkp 62.5 (1/24) 50 (1/32) + * clkg 62.5 (1/24) 66.6 (1/24) + * clkb, CLKOUT + * (MD2 = 0) 62.5 (1/24) 66.6 (1/24) + * (MD2 = 1) 41.6 (1/36) 50 (1/32) +*/ + #define MD(nr) BIT(nr) #define FRQMR IOMEM(0xffc80014) -- GitLab From c1c88137764748d74f3d5eb4637d49648716279b Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 11 Mar 2013 13:57:00 +0200 Subject: [PATCH 0415/3163] ARM: OMAP: zoom: Use pwm stack for lcd and keyboard backlight Use pwm_leds driver for the keyboard light and pwm-backlight for the lcd backlight control (instead of implementing the PWM driver part in the board file). Signed-off-by: Peter Ujfalusi Signed-off-by: Tomi Valkeinen --- arch/arm/mach-omap2/board-zoom-display.c | 56 -------------------- arch/arm/mach-omap2/board-zoom-peripherals.c | 53 +++++++++++++++++- 2 files changed, 52 insertions(+), 57 deletions(-) diff --git a/arch/arm/mach-omap2/board-zoom-display.c b/arch/arm/mach-omap2/board-zoom-display.c index 8cef477d6b00..9a7174faac51 100644 --- a/arch/arm/mach-omap2/board-zoom-display.c +++ b/arch/arm/mach-omap2/board-zoom-display.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include