From 14d0e347ea2db51144a8800d7c7576db96f69983 Mon Sep 17 00:00:00 2001 From: Zoran Markovic Date: Wed, 26 Jun 2013 16:09:13 -0700 Subject: [PATCH 00001/10742] rtc: Keep system awake until all expired RTC timers are handled Current implementation of RTC interface allows for system suspend to occur in the following cases: (a) if a timer is set in the past and rtc_timer_do_work() is scheduled to handle it, and (b) if rtc_timer_do_work() is called to handle expired timers whose handlers implement a preemption point. A pending suspend request may be honoured in the above cases causing timer handling to be delayed until after the next resume. This is undesirable since timer handlers may have time-critical code to execute. This patch makes sure that the system stays awake until all expired timers are handled. Note that all calls to pm_stay_awake() are eventually paired with the single pm_relax() call in rtc_timer_do_work(), which is launched using schedule_work(). Cc: Alessandro Zummo Cc: John Stultz Cc: Arve Hjonnevag Cc: Todd Poynor Signed-off-by: Zoran Markovic Signed-off-by: John Stultz --- drivers/rtc/interface.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 72c5cdbe0791..544be722937c 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -72,6 +72,7 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm) } else err = -EINVAL; + pm_stay_awake(rtc->dev.parent); mutex_unlock(&rtc->ops_lock); /* A timer might have just expired */ schedule_work(&rtc->irqwork); @@ -113,6 +114,7 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs) err = -EINVAL; } + pm_stay_awake(rtc->dev.parent); mutex_unlock(&rtc->ops_lock); /* A timer might have just expired */ schedule_work(&rtc->irqwork); @@ -771,9 +773,10 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) alarm.time = rtc_ktime_to_tm(timer->node.expires); alarm.enabled = 1; err = __rtc_set_alarm(rtc, &alarm); - if (err == -ETIME) + if (err == -ETIME) { + pm_stay_awake(rtc->dev.parent); schedule_work(&rtc->irqwork); - else if (err) { + } else if (err) { timerqueue_del(&rtc->timerqueue, &timer->node); timer->enabled = 0; return err; @@ -818,8 +821,10 @@ static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer) alarm.time = rtc_ktime_to_tm(next->expires); alarm.enabled = 1; err = __rtc_set_alarm(rtc, &alarm); - if (err == -ETIME) + if (err == -ETIME) { + pm_stay_awake(rtc->dev.parent); schedule_work(&rtc->irqwork); + } } } @@ -845,7 +850,6 @@ void rtc_timer_do_work(struct work_struct *work) mutex_lock(&rtc->ops_lock); again: - pm_relax(rtc->dev.parent); __rtc_read_time(rtc, &tm); now = rtc_tm_to_ktime(tm); while ((next = timerqueue_getnext(&rtc->timerqueue))) { @@ -880,6 +884,7 @@ void rtc_timer_do_work(struct work_struct *work) } else rtc_alarm_disable(rtc); + pm_relax(rtc->dev.parent); mutex_unlock(&rtc->ops_lock); } -- GitLab From 397bbf6dee50bb1f07cbdb464c41b0f5b7a85493 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Fri, 22 Feb 2013 15:08:56 -0500 Subject: [PATCH 00002/10742] clocksource: Fix !CONFIG_CLOCKSOURCE_WATCHDOG compile If I explicitly disable the clocksource watchdog in the x86 Kconfig, the x86 kernel will not compile unless this is properly defined. Cc: John Stultz Cc: Thomas Gleixner Cc: x86@kernel.org Signed-off-by: Prarit Bhargava Signed-off-by: John Stultz --- kernel/time/clocksource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 50a8736757f3..a2e72b8d28d0 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -479,6 +479,7 @@ static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } static inline void clocksource_resume_watchdog(void) { } static inline int __clocksource_watchdog_kthread(void) { return 0; } static bool clocksource_is_watchdog(struct clocksource *cs) { return false; } +void clocksource_mark_unstable(struct clocksource *cs) { } #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ -- GitLab From 71a8986d7e4845b6fca1298fe6e3233ce6fde0b7 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 18 Jul 2013 16:50:59 -0400 Subject: [PATCH 00003/10742] ARM: suspend: use hash of cpu_logical_map value to index into save array Currently we hash the MPIDR of the CPU being suspended to determine which entry in the sleep_save_sp array to use. In some situations, such as when we want to resume on another physical CPU, the MPIDR of another CPU should be used instead. So let's use the value of cpu_logical_map(smp_processor_id()) in place of the MPIDR in the suspend path. This will result in the same index being used as with the previous code unless the caller has modified cpu_logical_map() beforehand with the MPIDR of the physical CPU the suspending logical CPU will resume on. Consequently, if doing a physical CPU migration, cpu_logical_map() must be updated appropriately somewhere between cpu_pm_enter() and cpu_suspend(). The register allocation in __cpu_suspend is reworked in order to better accommodate the additional argument. Signed-off-by: Nicolas Pitre Acked-by: Lorenzo Pieralisi Reviewed-by: Dave Martin --- arch/arm/kernel/sleep.S | 26 +++++++++++--------------- arch/arm/kernel/suspend.c | 8 +++++--- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index db1536b8b30b..622460201911 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -55,6 +55,7 @@ * specific registers and some other data for resume. * r0 = suspend function arg0 * r1 = suspend function + * r2 = MPIDR value the resuming CPU will use */ ENTRY(__cpu_suspend) stmfd sp!, {r4 - r11, lr} @@ -67,23 +68,18 @@ ENTRY(__cpu_suspend) mov r5, sp @ current virtual SP add r4, r4, #12 @ Space for pgd, virt sp, phys resume fn sub sp, sp, r4 @ allocate CPU state on stack - stmfd sp!, {r0, r1} @ save suspend func arg and pointer - add r0, sp, #8 @ save pointer to save block - mov r1, r4 @ size of save block - mov r2, r5 @ virtual SP ldr r3, =sleep_save_sp + stmfd sp!, {r0, r1} @ save suspend func arg and pointer ldr r3, [r3, #SLEEP_SAVE_SP_VIRT] - ALT_SMP(mrc p15, 0, r9, c0, c0, 5) - ALT_UP_B(1f) - ldr r8, =mpidr_hash - /* - * This ldmia relies on the memory layout of the mpidr_hash - * struct mpidr_hash. - */ - ldmia r8, {r4-r7} @ r4 = mpidr mask (r5,r6,r7) = l[0,1,2] shifts - compute_mpidr_hash lr, r5, r6, r7, r9, r4 - add r3, r3, lr, lsl #2 -1: + ALT_SMP(ldr r0, =mpidr_hash) + ALT_UP_B(1f) + /* This ldmia relies on the memory layout of the mpidr_hash struct */ + ldmia r0, {r1, r6-r8} @ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts + compute_mpidr_hash r0, r6, r7, r8, r2, r1 + add r3, r3, r0, lsl #2 +1: mov r2, r5 @ virtual SP + mov r1, r4 @ size of save block + add r0, sp, #8 @ pointer to save block bl __cpu_suspend_save adr lr, BSYM(cpu_suspend_abort) ldmfd sp!, {r0, pc} @ call suspend fn diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c index 41cf3cbf756d..2835d35234ca 100644 --- a/arch/arm/kernel/suspend.c +++ b/arch/arm/kernel/suspend.c @@ -10,7 +10,7 @@ #include #include -extern int __cpu_suspend(unsigned long, int (*)(unsigned long)); +extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid); extern void cpu_resume_mmu(void); #ifdef CONFIG_MMU @@ -21,6 +21,7 @@ extern void cpu_resume_mmu(void); int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { struct mm_struct *mm = current->active_mm; + u32 __mpidr = cpu_logical_map(smp_processor_id()); int ret; if (!idmap_pgd) @@ -32,7 +33,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) * resume (indicated by a zero return code), we need to switch * back to the correct page tables. */ - ret = __cpu_suspend(arg, fn); + ret = __cpu_suspend(arg, fn, __mpidr); if (ret == 0) { cpu_switch_mm(mm->pgd, mm); local_flush_bp_all(); @@ -44,7 +45,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) #else int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { - return __cpu_suspend(arg, fn); + u32 __mpidr = cpu_logical_map(smp_processor_id()); + return __cpu_suspend(arg, fn, __mpidr); } #define idmap_pgd NULL #endif -- GitLab From 1a6b69b6548cd0dd82549393f30dd982ceeb79d2 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 12 Apr 2012 01:40:31 -0400 Subject: [PATCH 00004/10742] ARM: gic: add CPU migration support This is required by the big.LITTLE switcher code. The gic_migrate_target() changes the CPU interface mapping for the current CPU to redirect SGIs to the specified interface, and it also updates the target CPU for each interrupts to that CPU interface if they were targeting the current interface. Finally, pending SGIs for the current CPU are forwarded to the new interface. Because Linux does not use it, the SGI source information for the forwarded SGIs is not preserved. Neither is the source information for the SGIs sent by the current CPU to other CPUs adjusted to match the new CPU interface mapping. The required registers are banked so only the target CPU could do it. Signed-off-by: Nicolas Pitre --- drivers/irqchip/irq-gic.c | 87 +++++++++++++++++++++++++++++++-- include/linux/irqchip/arm-gic.h | 4 ++ 2 files changed, 88 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index ee7c50312066..268874ac75e6 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -253,10 +253,9 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids) return -EINVAL; + raw_spin_lock(&irq_controller_lock); mask = 0xff << shift; bit = gic_cpu_map[cpu] << shift; - - raw_spin_lock(&irq_controller_lock); val = readl_relaxed(reg) & ~mask; writel_relaxed(val | bit, reg); raw_spin_unlock(&irq_controller_lock); @@ -646,7 +645,9 @@ static void __init gic_pm_init(struct gic_chip_data *gic) void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) { int cpu; - unsigned long map = 0; + unsigned long flags, map = 0; + + raw_spin_lock_irqsave(&irq_controller_lock, flags); /* Convert our logical CPU mask into a physical one. */ for_each_cpu(cpu, mask) @@ -660,6 +661,86 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) /* this always happens on GIC0 */ writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT); + + raw_spin_unlock_irqrestore(&irq_controller_lock, flags); +} +#endif + +#ifdef CONFIG_BL_SWITCHER +/* + * gic_migrate_target - migrate IRQs to another CPU interface + * + * @new_cpu_id: the CPU target ID to migrate IRQs to + * + * Migrate all peripheral interrupts with a target matching the current CPU + * to the interface corresponding to @new_cpu_id. The CPU interface mapping + * is also updated. Targets to other CPU interfaces are unchanged. + * This must be called with IRQs locally disabled. + */ +void gic_migrate_target(unsigned int new_cpu_id) +{ + unsigned int cur_cpu_id, gic_irqs, gic_nr = 0; + void __iomem *dist_base; + int i, ror_val, cpu = smp_processor_id(); + u32 val, cur_target_mask, active_mask; + + if (gic_nr >= MAX_GIC_NR) + BUG(); + + dist_base = gic_data_dist_base(&gic_data[gic_nr]); + if (!dist_base) + return; + gic_irqs = gic_data[gic_nr].gic_irqs; + + cur_cpu_id = __ffs(gic_cpu_map[cpu]); + cur_target_mask = 0x01010101 << cur_cpu_id; + ror_val = (cur_cpu_id - new_cpu_id) & 31; + + raw_spin_lock(&irq_controller_lock); + + /* Update the target interface for this logical CPU */ + gic_cpu_map[cpu] = 1 << new_cpu_id; + + /* + * Find all the peripheral interrupts targetting the current + * CPU interface and migrate them to the new CPU interface. + * We skip DIST_TARGET 0 to 7 as they are read-only. + */ + for (i = 8; i < DIV_ROUND_UP(gic_irqs, 4); i++) { + val = readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4); + active_mask = val & cur_target_mask; + if (active_mask) { + val &= ~active_mask; + val |= ror32(active_mask, ror_val); + writel_relaxed(val, dist_base + GIC_DIST_TARGET + i*4); + } + } + + raw_spin_unlock(&irq_controller_lock); + + /* + * Now let's migrate and clear any potential SGIs that might be + * pending for us (cur_cpu_id). Since GIC_DIST_SGI_PENDING_SET + * is a banked register, we can only forward the SGI using + * GIC_DIST_SOFTINT. The original SGI source is lost but Linux + * doesn't use that information anyway. + * + * For the same reason we do not adjust SGI source information + * for previously sent SGIs by us to other CPUs either. + */ + for (i = 0; i < 16; i += 4) { + int j; + val = readl_relaxed(dist_base + GIC_DIST_SGI_PENDING_SET + i); + if (!val) + continue; + writel_relaxed(val, dist_base + GIC_DIST_SGI_PENDING_CLEAR + i); + for (j = i; j < i + 4; j++) { + if (val & 0xff) + writel_relaxed((1 << (new_cpu_id + 16)) | j, + dist_base + GIC_DIST_SOFTINT); + val >>= 8; + } + } } #endif diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 3e203eb23cc7..40bfcac95940 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -31,6 +31,8 @@ #define GIC_DIST_TARGET 0x800 #define GIC_DIST_CONFIG 0xc00 #define GIC_DIST_SOFTINT 0xf00 +#define GIC_DIST_SGI_PENDING_CLEAR 0xf10 +#define GIC_DIST_SGI_PENDING_SET 0xf20 #define GICH_HCR 0x0 #define GICH_VTR 0x4 @@ -73,6 +75,8 @@ static inline void gic_init(unsigned int nr, int start, gic_init_bases(nr, start, dist, cpu, 0, NULL); } +void gic_migrate_target(unsigned int new_cpu_id); + #endif /* __ASSEMBLY */ #endif -- GitLab From 1c33be57496d927ce05b2513ff0c108f69db4345 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 12 Apr 2012 02:56:10 -0400 Subject: [PATCH 00005/10742] ARM: b.L: core switcher code This is the core code implementing big.LITTLE switcher functionality. Rationale for this code is available here: http://lwn.net/Articles/481055/ The main entry point for a switch request is: void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id) If the calling CPU is not the wanted one, this wrapper takes care of sending the request to the appropriate CPU with schedule_work_on(). At the moment the core switch operation is handled by bL_switch_to() which must be called on the CPU for which a switch is requested. What this code does: * Return early if the current cluster is the wanted one. * Close the gate in the kernel entry vector for both the inbound and outbound CPUs. * Wake up the inbound CPU so it can perform its reset sequence in parallel up to the kernel entry vector gate. * Migrate all interrupts in the GIC targeting the outbound CPU interface to the inbound CPU interface, including SGIs. This is performed by gic_migrate_target() in drivers/irqchip/irq-gic.c. * Call cpu_pm_enter() which takes care of flushing the VFP state to RAM and save the CPU interface config from the GIC to RAM. * Modify the cpu_logical_map to refer to the inbound physical CPU. * Call cpu_suspend() which saves the CPU state (general purpose registers, page table address) onto the stack and store the resulting stack pointer in an array indexed by the updated cpu_logical_map, then call the provided shutdown function. This happens in arch/arm/kernel/sleep.S. At this point, the provided shutdown function executed by the outbound CPU ungates the inbound CPU. Therefore the inbound CPU: * Picks up the saved stack pointer in the array indexed by its MPIDR in arch/arm/kernel/sleep.S. * The MMU and caches are re-enabled using the saved state on the provided stack, just like if this was a resume operation from a suspended state. * Then cpu_suspend() returns, although this is on the inbound CPU rather than the outbound CPU which called it initially. * The function cpu_pm_exit() is called which effect is to restore the CPU interface state in the GIC using the state previously saved by the outbound CPU. * Exit of bL_switch_to() to resume normal kernel execution on the new CPU. However, the outbound CPU is potentially still running in parallel while the inbound CPU is resuming normal kernel execution, hence we need per CPU stack isolation to execute bL_do_switch(). After the outbound CPU has ungated the inbound CPU, it calls mcpm_cpu_power_down() to: * Clean its L1 cache. * If it is the last CPU still alive in its cluster (last man standing), it also cleans its L2 cache and disables cache snooping from the other cluster. * Power down the CPU (or whole cluster). Code called from bL_do_switch() might end up referencing 'current' for some reasons. However, 'current' is derived from the stack pointer. With any arbitrary stack, the returned value for 'current' and any dereferenced values through it are just random garbage which may lead to segmentation faults. The active page table during the execution of bL_do_switch() is also a problem. There is no guarantee that the inbound CPU won't destroy the corresponding task which would free the attached page table while the outbound CPU is still running and relying on it. To solve both issues, we borrow some of the task space belonging to the init/idle task which, by its nature, is lightly used and therefore is unlikely to clash with our usage. The init task is also never going away. Right now the logical CPU number is assumed to be equivalent to the physical CPU number within each cluster. The kernel should also be booted with only one cluster active. These limitations will be lifted eventually. Signed-off-by: Nicolas Pitre --- arch/arm/Kconfig | 18 +++ arch/arm/common/Makefile | 1 + arch/arm/common/bL_switcher.c | 241 +++++++++++++++++++++++++++++ arch/arm/include/asm/bL_switcher.h | 17 ++ 4 files changed, 277 insertions(+) create mode 100644 arch/arm/common/bL_switcher.c create mode 100644 arch/arm/include/asm/bL_switcher.h diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 37c0f4e978d4..ade70017fd65 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1538,6 +1538,24 @@ config MCPM for (multi-)cluster based systems, such as big.LITTLE based systems. +config BIG_LITTLE + bool "big.LITTLE support (Experimental)" + depends on CPU_V7 && SMP + select MCPM + help + This option enables support selections for the big.LITTLE + system architecture. + +config BL_SWITCHER + bool "big.LITTLE switcher support" + depends on BIG_LITTLE && MCPM && HOTPLUG_CPU + select CPU_PM + select ARM_CPU_SUSPEND + help + The big.LITTLE "switcher" provides the core functionality to + transparently handle transition between a cluster of A15's + and a cluster of A7's in a big.LITTLE system. + choice prompt "Memory split" default VMSPLIT_3G diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 8c60f473e976..2586240d5a45 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o AFLAGS_mcpm_head.o := -march=armv7-a AFLAGS_vlock.o := -march=armv7-a obj-$(CONFIG_TI_PRIV_EDMA) += edma.o +obj-$(CONFIG_BL_SWITCHER) += bL_switcher.o diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c new file mode 100644 index 000000000000..f8f2e96b1466 --- /dev/null +++ b/arch/arm/common/bL_switcher.c @@ -0,0 +1,241 @@ +/* + * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver + * + * Created by: Nicolas Pitre, March 2012 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +/* + * Use our own MPIDR accessors as the generic ones in asm/cputype.h have + * __attribute_const__ and we don't want the compiler to assume any + * constness here as the value _does_ change along some code paths. + */ + +static int read_mpidr(void) +{ + unsigned int id; + asm volatile ("mrc p15, 0, %0, c0, c0, 5" : "=r" (id)); + return id & MPIDR_HWID_BITMASK; +} + +/* + * bL switcher core code. + */ + +static void bL_do_switch(void *_unused) +{ + unsigned mpidr, cpuid, clusterid, ob_cluster, ib_cluster; + + /* + * We now have a piece of stack borrowed from the init task's. + * Let's also switch to init_mm right away to match it. + */ + cpu_switch_mm(init_mm.pgd, &init_mm); + + pr_debug("%s\n", __func__); + + mpidr = read_mpidr(); + cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0); + clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); + ob_cluster = clusterid; + ib_cluster = clusterid ^ 1; + + /* + * Our state has been saved at this point. Let's release our + * inbound CPU. + */ + mcpm_set_entry_vector(cpuid, ib_cluster, cpu_resume); + sev(); + + /* + * From this point, we must assume that our counterpart CPU might + * have taken over in its parallel world already, as if execution + * just returned from cpu_suspend(). It is therefore important to + * be very careful not to make any change the other guy is not + * expecting. This is why we need stack isolation. + * + * Fancy under cover tasks could be performed here. For now + * we have none. + */ + + /* Let's put ourself down. */ + mcpm_cpu_power_down(); + + /* should never get here */ + BUG(); +} + +/* + * Stack isolation. To ensure 'current' remains valid, we just borrow + * a slice of the init/idle task which should be fairly lightly used. + * The borrowed area starts just above the thread_info structure located + * at the very bottom of the stack, aligned to a cache line. + */ +#define STACK_SIZE 256 +extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); +static int bL_switchpoint(unsigned long _arg) +{ + unsigned int mpidr = read_mpidr(); + unsigned int cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0); + unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); + unsigned int cpu_index = cpuid + clusterid * MAX_CPUS_PER_CLUSTER; + void *stack = &init_thread_info + 1; + stack = PTR_ALIGN(stack, L1_CACHE_BYTES); + stack += cpu_index * STACK_SIZE + STACK_SIZE; + call_with_stack(bL_do_switch, (void *)_arg, stack); + BUG(); +} + +/* + * Generic switcher interface + */ + +/* + * bL_switch_to - Switch to a specific cluster for the current CPU + * @new_cluster_id: the ID of the cluster to switch to. + * + * This function must be called on the CPU to be switched. + * Returns 0 on success, else a negative status code. + */ +static int bL_switch_to(unsigned int new_cluster_id) +{ + unsigned int mpidr, cpuid, clusterid, ob_cluster, ib_cluster, this_cpu; + int ret; + + mpidr = read_mpidr(); + cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0); + clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); + ob_cluster = clusterid; + ib_cluster = clusterid ^ 1; + + if (new_cluster_id == clusterid) + return 0; + + pr_debug("before switch: CPU %d in cluster %d\n", cpuid, clusterid); + + /* Close the gate for our entry vectors */ + mcpm_set_entry_vector(cpuid, ob_cluster, NULL); + mcpm_set_entry_vector(cpuid, ib_cluster, NULL); + + /* + * Let's wake up the inbound CPU now in case it requires some delay + * to come online, but leave it gated in our entry vector code. + */ + ret = mcpm_cpu_power_up(cpuid, ib_cluster); + if (ret) { + pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret); + return ret; + } + + /* + * From this point we are entering the switch critical zone + * and can't take any interrupts anymore. + */ + local_irq_disable(); + local_fiq_disable(); + + this_cpu = smp_processor_id(); + + /* redirect GIC's SGIs to our counterpart */ + gic_migrate_target(cpuid + ib_cluster*4); + + /* + * Raise a SGI on the inbound CPU to make sure it doesn't stall + * in a possible WFI, such as in mcpm_power_down(). + */ + arch_send_wakeup_ipi_mask(cpumask_of(this_cpu)); + + ret = cpu_pm_enter(); + + /* we can not tolerate errors at this point */ + if (ret) + panic("%s: cpu_pm_enter() returned %d\n", __func__, ret); + + /* Flip the cluster in the CPU logical map for this CPU. */ + cpu_logical_map(this_cpu) ^= (1 << 8); + + /* Let's do the actual CPU switch. */ + ret = cpu_suspend(0, bL_switchpoint); + if (ret > 0) + panic("%s: cpu_suspend() returned %d\n", __func__, ret); + + /* We are executing on the inbound CPU at this point */ + mpidr = read_mpidr(); + cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0); + clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); + pr_debug("after switch: CPU %d in cluster %d\n", cpuid, clusterid); + BUG_ON(clusterid != ib_cluster); + + mcpm_cpu_powered_up(); + + ret = cpu_pm_exit(); + + local_fiq_enable(); + local_irq_enable(); + + if (ret) + pr_err("%s exiting with error %d\n", __func__, ret); + return ret; +} + +struct switch_args { + unsigned int cluster; + struct work_struct work; +}; + +static void __bL_switch_to(struct work_struct *work) +{ + struct switch_args *args = container_of(work, struct switch_args, work); + bL_switch_to(args->cluster); +} + +/* + * bL_switch_request - Switch to a specific cluster for the given CPU + * + * @cpu: the CPU to switch + * @new_cluster_id: the ID of the cluster to switch to. + * + * This function causes a cluster switch on the given CPU. If the given + * CPU is the same as the calling CPU then the switch happens right away. + * Otherwise the request is put on a work queue to be scheduled on the + * remote CPU. + */ +void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id) +{ + unsigned int this_cpu = get_cpu(); + struct switch_args args; + + if (cpu == this_cpu) { + bL_switch_to(new_cluster_id); + put_cpu(); + return; + } + put_cpu(); + + args.cluster = new_cluster_id; + INIT_WORK_ONSTACK(&args.work, __bL_switch_to); + schedule_work_on(cpu, &args.work); + flush_work(&args.work); +} +EXPORT_SYMBOL_GPL(bL_switch_request); diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h new file mode 100644 index 000000000000..72efe3f349b9 --- /dev/null +++ b/arch/arm/include/asm/bL_switcher.h @@ -0,0 +1,17 @@ +/* + * arch/arm/include/asm/bL_switcher.h + * + * Created by: Nicolas Pitre, April 2012 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ASM_BL_SWITCHER_H +#define ASM_BL_SWITCHER_H + +void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id); + +#endif -- GitLab From 3f09d4799ecc076cccc11ab2333a36ec849d24f5 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 16 May 2012 15:55:54 +0100 Subject: [PATCH 00006/10742] ARM: bL_switcher: add clockevent save/restore support Per-CPU timers that are shutdown when a CPU is switched over must be disabled upon switching and reprogrammed on the inbound CPU by relying on the clock events management API. save/restore sequence is executed with irqs disabled as mandated by the clock events API. The next_event is an absolute time, hence, when the inbound CPU resumes, if the timer has expired the min delta is forced into the tick device to fire after few cycles. This patch adds switching support for clock events that are per-CPU and have to be migrated when a switch takes place; the cpumask of the clock event device is checked against the cpumask of the current cpu, and if they match, the clockevent device mode is saved and it is put in shutdown mode. Resume code reprogrammes the tick device accordingly. Tested on A15/A7 fast models and architected timers. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Nicolas Pitre --- arch/arm/common/bL_switcher.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index f8f2e96b1466..ca04b5384bb0 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -15,7 +15,11 @@ #include #include #include +#include #include +#include +#include +#include #include #include #include @@ -121,6 +125,8 @@ static int bL_switchpoint(unsigned long _arg) static int bL_switch_to(unsigned int new_cluster_id) { unsigned int mpidr, cpuid, clusterid, ob_cluster, ib_cluster, this_cpu; + struct tick_device *tdev; + enum clock_event_mode tdev_mode; int ret; mpidr = read_mpidr(); @@ -166,6 +172,14 @@ static int bL_switch_to(unsigned int new_cluster_id) */ arch_send_wakeup_ipi_mask(cpumask_of(this_cpu)); + tdev = tick_get_device(this_cpu); + if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu))) + tdev = NULL; + if (tdev) { + tdev_mode = tdev->evtdev->mode; + clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN); + } + ret = cpu_pm_enter(); /* we can not tolerate errors at this point */ @@ -191,6 +205,12 @@ static int bL_switch_to(unsigned int new_cluster_id) ret = cpu_pm_exit(); + if (tdev) { + clockevents_set_mode(tdev->evtdev, tdev_mode); + clockevents_program_event(tdev->evtdev, + tdev->evtdev->next_event, 1); + } + local_fiq_enable(); local_irq_enable(); -- GitLab From 71ce1deeff8f9341ae3b21983e9bdde28e8c96fe Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 26 Oct 2012 02:36:17 -0400 Subject: [PATCH 00007/10742] ARM: bL_switcher: move to dedicated threads rather than workqueues The workqueues are problematic as they may be contended. They can't be scheduled with top priority either. Also the optimization in bL_switch_request() to skip the workqueue entirely when the target CPU and the calling CPU were the same didn't allow for bL_switch_request() to be called from atomic context, as might be the case for some cpufreq drivers. Let's move to dedicated kthreads instead. Signed-off-by: Nicolas Pitre --- arch/arm/common/bL_switcher.c | 101 ++++++++++++++++++++++------- arch/arm/include/asm/bL_switcher.h | 2 +- 2 files changed, 79 insertions(+), 24 deletions(-) diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index ca04b5384bb0..407c4cc64c0b 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -15,8 +15,10 @@ #include #include #include +#include #include -#include +#include +#include #include #include #include @@ -219,15 +221,48 @@ static int bL_switch_to(unsigned int new_cluster_id) return ret; } -struct switch_args { - unsigned int cluster; - struct work_struct work; +struct bL_thread { + struct task_struct *task; + wait_queue_head_t wq; + int wanted_cluster; }; -static void __bL_switch_to(struct work_struct *work) +static struct bL_thread bL_threads[NR_CPUS]; + +static int bL_switcher_thread(void *arg) +{ + struct bL_thread *t = arg; + struct sched_param param = { .sched_priority = 1 }; + int cluster; + + sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); + + do { + if (signal_pending(current)) + flush_signals(current); + wait_event_interruptible(t->wq, + t->wanted_cluster != -1 || + kthread_should_stop()); + cluster = xchg(&t->wanted_cluster, -1); + if (cluster != -1) + bL_switch_to(cluster); + } while (!kthread_should_stop()); + + return 0; +} + +static struct task_struct * __init bL_switcher_thread_create(int cpu, void *arg) { - struct switch_args *args = container_of(work, struct switch_args, work); - bL_switch_to(args->cluster); + struct task_struct *task; + + task = kthread_create_on_node(bL_switcher_thread, arg, + cpu_to_node(cpu), "kswitcher_%d", cpu); + if (!IS_ERR(task)) { + kthread_bind(task, cpu); + wake_up_process(task); + } else + pr_err("%s failed for CPU %d\n", __func__, cpu); + return task; } /* @@ -236,26 +271,46 @@ static void __bL_switch_to(struct work_struct *work) * @cpu: the CPU to switch * @new_cluster_id: the ID of the cluster to switch to. * - * This function causes a cluster switch on the given CPU. If the given - * CPU is the same as the calling CPU then the switch happens right away. - * Otherwise the request is put on a work queue to be scheduled on the - * remote CPU. + * This function causes a cluster switch on the given CPU by waking up + * the appropriate switcher thread. This function may or may not return + * before the switch has occurred. */ -void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id) +int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id) { - unsigned int this_cpu = get_cpu(); - struct switch_args args; + struct bL_thread *t; - if (cpu == this_cpu) { - bL_switch_to(new_cluster_id); - put_cpu(); - return; + if (cpu >= ARRAY_SIZE(bL_threads)) { + pr_err("%s: cpu %d out of bounds\n", __func__, cpu); + return -EINVAL; } - put_cpu(); - args.cluster = new_cluster_id; - INIT_WORK_ONSTACK(&args.work, __bL_switch_to); - schedule_work_on(cpu, &args.work); - flush_work(&args.work); + t = &bL_threads[cpu]; + if (IS_ERR(t->task)) + return PTR_ERR(t->task); + if (!t->task) + return -ESRCH; + + t->wanted_cluster = new_cluster_id; + wake_up(&t->wq); + return 0; } EXPORT_SYMBOL_GPL(bL_switch_request); + +static int __init bL_switcher_init(void) +{ + int cpu; + + pr_info("big.LITTLE switcher initializing\n"); + + for_each_online_cpu(cpu) { + struct bL_thread *t = &bL_threads[cpu]; + init_waitqueue_head(&t->wq); + t->wanted_cluster = -1; + t->task = bL_switcher_thread_create(cpu, t); + } + + pr_info("big.LITTLE switcher initialized\n"); + return 0; +} + +late_initcall(bL_switcher_init); diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h index 72efe3f349b9..e0c0bba70bbf 100644 --- a/arch/arm/include/asm/bL_switcher.h +++ b/arch/arm/include/asm/bL_switcher.h @@ -12,6 +12,6 @@ #ifndef ASM_BL_SWITCHER_H #define ASM_BL_SWITCHER_H -void bL_switch_request(unsigned int cpu, unsigned int new_cluster_id); +int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id); #endif -- GitLab From c052de2693498bee6ff2e1b5bcd32309c4f8780b Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 27 Nov 2012 15:55:33 -0500 Subject: [PATCH 00008/10742] ARM: bL_switcher: simplify stack isolation We now have a dedicated thread for each logical CPU. That's plenty of stack space for our needs. Signed-off-by: Nicolas Pitre --- arch/arm/common/bL_switcher.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index 407c4cc64c0b..4caca71c906f 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -53,12 +53,6 @@ static void bL_do_switch(void *_unused) { unsigned mpidr, cpuid, clusterid, ob_cluster, ib_cluster; - /* - * We now have a piece of stack borrowed from the init task's. - * Let's also switch to init_mm right away to match it. - */ - cpu_switch_mm(init_mm.pgd, &init_mm); - pr_debug("%s\n", __func__); mpidr = read_mpidr(); @@ -93,22 +87,21 @@ static void bL_do_switch(void *_unused) } /* - * Stack isolation. To ensure 'current' remains valid, we just borrow - * a slice of the init/idle task which should be fairly lightly used. - * The borrowed area starts just above the thread_info structure located - * at the very bottom of the stack, aligned to a cache line. + * Stack isolation. To ensure 'current' remains valid, we just use another + * piece of our thread's stack space which should be fairly lightly used. + * The selected area starts just above the thread_info structure located + * at the very bottom of the stack, aligned to a cache line, and indexed + * with the cluster number. */ -#define STACK_SIZE 256 +#define STACK_SIZE 512 extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); static int bL_switchpoint(unsigned long _arg) { unsigned int mpidr = read_mpidr(); - unsigned int cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0); unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); - unsigned int cpu_index = cpuid + clusterid * MAX_CPUS_PER_CLUSTER; - void *stack = &init_thread_info + 1; + void *stack = current_thread_info() + 1; stack = PTR_ALIGN(stack, L1_CACHE_BYTES); - stack += cpu_index * STACK_SIZE + STACK_SIZE; + stack += clusterid * STACK_SIZE + STACK_SIZE; call_with_stack(bL_do_switch, (void *)_arg, stack); BUG(); } -- GitLab From 9797a0e95ead7bfe52260c369ee9fe6ba445afaf Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 21 Nov 2012 11:53:27 -0500 Subject: [PATCH 00009/10742] ARM: bL_switcher: hot-unplug half of the available CPUs In a regular kernel configuration, all the CPUs are initially available. But the switcher execution model uses half of them at any time. Instead of hacking the DTB to remove half of the CPUs, let's remove them at run time and make sure we still have a working switcher configuration. This way, the same DTB can be used whether or not the switcher is used. Signed-off-by: Nicolas Pitre --- arch/arm/common/bL_switcher.c | 78 ++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index 4caca71c906f..50e95d894e35 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -289,18 +289,94 @@ int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id) } EXPORT_SYMBOL_GPL(bL_switch_request); +/* + * Activation and configuration code. + */ + +static cpumask_t bL_switcher_removed_logical_cpus; + +static void __init bL_switcher_restore_cpus(void) +{ + int i; + + for_each_cpu(i, &bL_switcher_removed_logical_cpus) + cpu_up(i); +} + +static int __init bL_switcher_halve_cpus(void) +{ + int cpu, cluster, i, ret; + cpumask_t cluster_mask[2], common_mask; + + cpumask_clear(&bL_switcher_removed_logical_cpus); + cpumask_clear(&cluster_mask[0]); + cpumask_clear(&cluster_mask[1]); + + for_each_online_cpu(i) { + cpu = cpu_logical_map(i) & 0xff; + cluster = (cpu_logical_map(i) >> 8) & 0xff; + if (cluster >= 2) { + pr_err("%s: only dual cluster systems are supported\n", __func__); + return -EINVAL; + } + cpumask_set_cpu(cpu, &cluster_mask[cluster]); + } + + if (!cpumask_and(&common_mask, &cluster_mask[0], &cluster_mask[1])) { + pr_err("%s: no common set of CPUs\n", __func__); + return -EINVAL; + } + + for_each_online_cpu(i) { + cpu = cpu_logical_map(i) & 0xff; + cluster = (cpu_logical_map(i) >> 8) & 0xff; + + if (cpumask_test_cpu(cpu, &common_mask)) { + /* + * We keep only those logical CPUs which number + * is equal to their physical CPU number. This is + * not perfect but good enough for now. + */ + if (cpu == i) + continue; + } + + ret = cpu_down(i); + if (ret) { + bL_switcher_restore_cpus(); + return ret; + } + cpumask_set_cpu(i, &bL_switcher_removed_logical_cpus); + } + + return 0; +} + static int __init bL_switcher_init(void) { - int cpu; + int cpu, ret; pr_info("big.LITTLE switcher initializing\n"); + if (MAX_NR_CLUSTERS != 2) { + pr_err("%s: only dual cluster systems are supported\n", __func__); + return -EINVAL; + } + + cpu_hotplug_driver_lock(); + ret = bL_switcher_halve_cpus(); + if (ret) { + cpu_hotplug_driver_unlock(); + return ret; + } + for_each_online_cpu(cpu) { struct bL_thread *t = &bL_threads[cpu]; init_waitqueue_head(&t->wq); t->wanted_cluster = -1; t->task = bL_switcher_thread_create(cpu, t); } + cpu_hotplug_driver_unlock(); pr_info("big.LITTLE switcher initialized\n"); return 0; -- GitLab From ed96762e3241f57aa812977cf1920d3ee0363f4d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 5 Jul 2012 21:33:26 -0400 Subject: [PATCH 00010/10742] ARM: bL_switcher: do not hardcode GIC IDs in the code Currently, GIC IDs are hardcoded making the code dependent on the 4+4 b.L configuration. Let's allow for GIC IDs to be discovered upon switcher initialization to support other b.L configurations such as the 1+1 one, or 2+3 as on the VExpress TC2. Signed-off-by: Nicolas Pitre --- arch/arm/common/bL_switcher.c | 14 +++++++++++++- drivers/irqchip/irq-gic.c | 21 +++++++++++++++++++++ include/linux/irqchip/arm-gic.h | 1 + 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index 50e95d894e35..1c2e5bcfb1f7 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -110,6 +110,8 @@ static int bL_switchpoint(unsigned long _arg) * Generic switcher interface */ +static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS]; + /* * bL_switch_to - Switch to a specific cluster for the current CPU * @new_cluster_id: the ID of the cluster to switch to. @@ -159,7 +161,7 @@ static int bL_switch_to(unsigned int new_cluster_id) this_cpu = smp_processor_id(); /* redirect GIC's SGIs to our counterpart */ - gic_migrate_target(cpuid + ib_cluster*4); + gic_migrate_target(bL_gic_id[cpuid][ib_cluster]); /* * Raise a SGI on the inbound CPU to make sure it doesn't stall @@ -332,6 +334,16 @@ static int __init bL_switcher_halve_cpus(void) cluster = (cpu_logical_map(i) >> 8) & 0xff; if (cpumask_test_cpu(cpu, &common_mask)) { + /* Let's take note of the GIC ID for this CPU */ + int gic_id = gic_get_cpu_id(i); + if (gic_id < 0) { + pr_err("%s: bad GIC ID for CPU %d\n", __func__, i); + return -EINVAL; + } + bL_gic_id[cpu][cluster] = gic_id; + pr_info("GIC ID for CPU %u cluster %u is %u\n", + cpu, cluster, gic_id); + /* * We keep only those logical CPUs which number * is equal to their physical CPU number. This is diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 268874ac75e6..3862cb54c714 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -667,6 +667,27 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) #endif #ifdef CONFIG_BL_SWITCHER +/* + * gic_get_cpu_id - get the CPU interface ID for the specified CPU + * + * @cpu: the logical CPU number to get the GIC ID for. + * + * Return the CPU interface ID for the given logical CPU number, + * or -1 if the CPU number is too large or the interface ID is + * unknown (more than one bit set). + */ +int gic_get_cpu_id(unsigned int cpu) +{ + unsigned int cpu_bit; + + if (cpu >= NR_GIC_CPU_IF) + return -1; + cpu_bit = gic_cpu_map[cpu]; + if (cpu_bit & (cpu_bit - 1)) + return -1; + return __ffs(cpu_bit); +} + /* * gic_migrate_target - migrate IRQs to another CPU interface * diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 40bfcac95940..2d7d47e8dfaf 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -75,6 +75,7 @@ static inline void gic_init(unsigned int nr, int start, gic_init_bases(nr, start, dist, cpu, 0, NULL); } +int gic_get_cpu_id(unsigned int cpu); void gic_migrate_target(unsigned int new_cpu_id); #endif /* __ASSEMBLY */ -- GitLab From 6b7437aed1568076cefa4d42747b1515dcb848db Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 22 Nov 2012 00:05:07 -0500 Subject: [PATCH 00011/10742] ARM: bL_switcher: ability to enable and disable the switcher via sysfs The /sys/kernel/bL_switcher/enable file allows to enable or disable the switcher by writing 1 or 0 to it respectively. It is still enabled by default on boot. Signed-off-by: Nicolas Pitre --- arch/arm/common/bL_switcher.c | 171 +++++++++++++++++++++++++++++++--- 1 file changed, 160 insertions(+), 11 deletions(-) diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index 1c2e5bcfb1f7..395f60f6292b 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -220,6 +221,7 @@ struct bL_thread { struct task_struct *task; wait_queue_head_t wq; int wanted_cluster; + struct completion started; }; static struct bL_thread bL_threads[NR_CPUS]; @@ -231,6 +233,7 @@ static int bL_switcher_thread(void *arg) int cluster; sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); + complete(&t->started); do { if (signal_pending(current)) @@ -246,7 +249,7 @@ static int bL_switcher_thread(void *arg) return 0; } -static struct task_struct * __init bL_switcher_thread_create(int cpu, void *arg) +static struct task_struct *bL_switcher_thread_create(int cpu, void *arg) { struct task_struct *task; @@ -295,9 +298,11 @@ EXPORT_SYMBOL_GPL(bL_switch_request); * Activation and configuration code. */ +static unsigned int bL_switcher_active; +static unsigned int bL_switcher_cpu_original_cluster[MAX_CPUS_PER_CLUSTER]; static cpumask_t bL_switcher_removed_logical_cpus; -static void __init bL_switcher_restore_cpus(void) +static void bL_switcher_restore_cpus(void) { int i; @@ -305,7 +310,7 @@ static void __init bL_switcher_restore_cpus(void) cpu_up(i); } -static int __init bL_switcher_halve_cpus(void) +static int bL_switcher_halve_cpus(void) { int cpu, cluster, i, ret; cpumask_t cluster_mask[2], common_mask; @@ -349,8 +354,10 @@ static int __init bL_switcher_halve_cpus(void) * is equal to their physical CPU number. This is * not perfect but good enough for now. */ - if (cpu == i) + if (cpu == i) { + bL_switcher_cpu_original_cluster[cpu] = cluster; continue; + } } ret = cpu_down(i); @@ -364,18 +371,18 @@ static int __init bL_switcher_halve_cpus(void) return 0; } -static int __init bL_switcher_init(void) +static int bL_switcher_enable(void) { int cpu, ret; - pr_info("big.LITTLE switcher initializing\n"); - - if (MAX_NR_CLUSTERS != 2) { - pr_err("%s: only dual cluster systems are supported\n", __func__); - return -EINVAL; + cpu_hotplug_driver_lock(); + if (bL_switcher_active) { + cpu_hotplug_driver_unlock(); + return 0; } - cpu_hotplug_driver_lock(); + pr_info("big.LITTLE switcher initializing\n"); + ret = bL_switcher_halve_cpus(); if (ret) { cpu_hotplug_driver_unlock(); @@ -385,13 +392,155 @@ static int __init bL_switcher_init(void) for_each_online_cpu(cpu) { struct bL_thread *t = &bL_threads[cpu]; init_waitqueue_head(&t->wq); + init_completion(&t->started); t->wanted_cluster = -1; t->task = bL_switcher_thread_create(cpu, t); } + + bL_switcher_active = 1; cpu_hotplug_driver_unlock(); pr_info("big.LITTLE switcher initialized\n"); return 0; } +#ifdef CONFIG_SYSFS + +static void bL_switcher_disable(void) +{ + unsigned int cpu, cluster, i; + struct bL_thread *t; + struct task_struct *task; + + cpu_hotplug_driver_lock(); + if (!bL_switcher_active) { + cpu_hotplug_driver_unlock(); + return; + } + bL_switcher_active = 0; + + /* + * To deactivate the switcher, we must shut down the switcher + * threads to prevent any other requests from being accepted. + * Then, if the final cluster for given logical CPU is not the + * same as the original one, we'll recreate a switcher thread + * just for the purpose of switching the CPU back without any + * possibility for interference from external requests. + */ + for_each_online_cpu(cpu) { + BUG_ON(cpu != (cpu_logical_map(cpu) & 0xff)); + t = &bL_threads[cpu]; + task = t->task; + t->task = NULL; + if (!task || IS_ERR(task)) + continue; + kthread_stop(task); + /* no more switch may happen on this CPU at this point */ + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1); + if (cluster == bL_switcher_cpu_original_cluster[cpu]) + continue; + init_completion(&t->started); + t->wanted_cluster = bL_switcher_cpu_original_cluster[cpu]; + task = bL_switcher_thread_create(cpu, t); + if (!IS_ERR(task)) { + wait_for_completion(&t->started); + kthread_stop(task); + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1); + if (cluster == bL_switcher_cpu_original_cluster[cpu]) + continue; + } + /* If execution gets here, we're in trouble. */ + pr_crit("%s: unable to restore original cluster for CPU %d\n", + __func__, cpu); + for_each_cpu(i, &bL_switcher_removed_logical_cpus) { + if ((cpu_logical_map(i) & 0xff) != cpu) + continue; + pr_crit("%s: CPU %d can't be restored\n", + __func__, i); + cpumask_clear_cpu(i, &bL_switcher_removed_logical_cpus); + break; + } + } + + bL_switcher_restore_cpus(); + cpu_hotplug_driver_unlock(); +} + +static ssize_t bL_switcher_active_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", bL_switcher_active); +} + +static ssize_t bL_switcher_active_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret; + + switch (buf[0]) { + case '0': + bL_switcher_disable(); + ret = 0; + break; + case '1': + ret = bL_switcher_enable(); + break; + default: + ret = -EINVAL; + } + + return (ret >= 0) ? count : ret; +} + +static struct kobj_attribute bL_switcher_active_attr = + __ATTR(active, 0644, bL_switcher_active_show, bL_switcher_active_store); + +static struct attribute *bL_switcher_attrs[] = { + &bL_switcher_active_attr.attr, + NULL, +}; + +static struct attribute_group bL_switcher_attr_group = { + .attrs = bL_switcher_attrs, +}; + +static struct kobject *bL_switcher_kobj; + +static int __init bL_switcher_sysfs_init(void) +{ + int ret; + + bL_switcher_kobj = kobject_create_and_add("bL_switcher", kernel_kobj); + if (!bL_switcher_kobj) + return -ENOMEM; + ret = sysfs_create_group(bL_switcher_kobj, &bL_switcher_attr_group); + if (ret) + kobject_put(bL_switcher_kobj); + return ret; +} + +#endif /* CONFIG_SYSFS */ + +static int __init bL_switcher_init(void) +{ + int ret; + + if (MAX_NR_CLUSTERS != 2) { + pr_err("%s: only dual cluster systems are supported\n", __func__); + return -EINVAL; + } + + ret = bL_switcher_enable(); + if (ret) + return ret; + +#ifdef CONFIG_SYSFS + ret = bL_switcher_sysfs_init(); + if (ret) + pr_err("%s: unable to create sysfs entry\n", __func__); +#endif + + return 0; +} + late_initcall(bL_switcher_init); -- GitLab From c4821c0575a3b1bf26f100230dc2938297d7043b Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 22 Nov 2012 13:33:35 -0500 Subject: [PATCH 00012/10742] ARM: bL_switcher: add kernel cmdline param to disable the switcher on boot By adding no_bL_switcher to the kernel cmdline string, the switcher won't be activated automatically at boot time. It is still possible to activate it later with: echo 1 > /sys/kernel/bL_switcher/active Signed-off-by: Nicolas Pitre --- arch/arm/common/bL_switcher.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index 395f60f6292b..cec825ef392b 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -521,6 +522,9 @@ static int __init bL_switcher_sysfs_init(void) #endif /* CONFIG_SYSFS */ +static bool no_bL_switcher; +core_param(no_bL_switcher, no_bL_switcher, bool, 0644); + static int __init bL_switcher_init(void) { int ret; @@ -530,9 +534,11 @@ static int __init bL_switcher_init(void) return -EINVAL; } - ret = bL_switcher_enable(); - if (ret) - return ret; + if (!no_bL_switcher) { + ret = bL_switcher_enable(); + if (ret) + return ret; + } #ifdef CONFIG_SYSFS ret = bL_switcher_sysfs_init(); -- GitLab From 87d8b9eb7eb6669aad6435a51e9862362141ba76 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:21:14 -0700 Subject: [PATCH 00013/10742] clocksource: Extract max nsec calculation into separate function We need to calculate the same number in the clocksource code and the sched_clock code, so extract this code into its own function. We also drop the min_t and just use min() because the two types are the same. Signed-off-by: Stephen Boyd Signed-off-by: John Stultz --- include/linux/clocksource.h | 2 ++ kernel/time/clocksource.c | 45 ++++++++++++++++++++++++------------- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index dbbf8aa7731b..67301a405712 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -292,6 +292,8 @@ extern void clocksource_resume(void); extern struct clocksource * __init __weak clocksource_default_clock(void); extern void clocksource_mark_unstable(struct clocksource *cs); +extern u64 +clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask); extern void clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 50a8736757f3..637a14af6c21 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -537,40 +537,55 @@ static u32 clocksource_max_adjustment(struct clocksource *cs) } /** - * clocksource_max_deferment - Returns max time the clocksource can be deferred - * @cs: Pointer to clocksource - * + * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted + * @mult: cycle to nanosecond multiplier + * @shift: cycle to nanosecond divisor (power of two) + * @maxadj: maximum adjustment value to mult (~11%) + * @mask: bitmask for two's complement subtraction of non 64 bit counters */ -static u64 clocksource_max_deferment(struct clocksource *cs) +u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) { u64 max_nsecs, max_cycles; /* * Calculate the maximum number of cycles that we can pass to the * cyc2ns function without overflowing a 64-bit signed result. The - * maximum number of cycles is equal to ULLONG_MAX/(cs->mult+cs->maxadj) + * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj) * which is equivalent to the below. - * max_cycles < (2^63)/(cs->mult + cs->maxadj) - * max_cycles < 2^(log2((2^63)/(cs->mult + cs->maxadj))) - * max_cycles < 2^(log2(2^63) - log2(cs->mult + cs->maxadj)) - * max_cycles < 2^(63 - log2(cs->mult + cs->maxadj)) - * max_cycles < 1 << (63 - log2(cs->mult + cs->maxadj)) + * max_cycles < (2^63)/(mult + maxadj) + * max_cycles < 2^(log2((2^63)/(mult + maxadj))) + * max_cycles < 2^(log2(2^63) - log2(mult + maxadj)) + * max_cycles < 2^(63 - log2(mult + maxadj)) + * max_cycles < 1 << (63 - log2(mult + maxadj)) * Please note that we add 1 to the result of the log2 to account for * any rounding errors, ensure the above inequality is satisfied and * no overflow will occur. */ - max_cycles = 1ULL << (63 - (ilog2(cs->mult + cs->maxadj) + 1)); + max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1)); /* * The actual maximum number of cycles we can defer the clocksource is - * determined by the minimum of max_cycles and cs->mask. + * determined by the minimum of max_cycles and mask. * Note: Here we subtract the maxadj to make sure we don't sleep for * too long if there's a large negative adjustment. */ - max_cycles = min_t(u64, max_cycles, (u64) cs->mask); - max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult - cs->maxadj, - cs->shift); + max_cycles = min(max_cycles, mask); + max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); + + return max_nsecs; +} + +/** + * clocksource_max_deferment - Returns max time the clocksource can be deferred + * @cs: Pointer to clocksource + * + */ +static u64 clocksource_max_deferment(struct clocksource *cs) +{ + u64 max_nsecs; + max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj, + cs->mask); /* * To ensure that the clocksource does not wrap whilst we are idle, * limit the time the clocksource can be deferred by 12.5%. Please -- GitLab From 85c3d2dd15be4d577a37ffb8bbbd019fc8e3280a Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:21:15 -0700 Subject: [PATCH 00014/10742] sched_clock: Use seqcount instead of rolling our own We're going to increase the cyc value to 64 bits in the near future. Doing that is going to break the custom seqcount implementation in the sched_clock code because 64 bit numbers aren't guaranteed to be atomic. Replace the cyc_copy with a seqcount to avoid this problem. Cc: Russell King Acked-by: Will Deacon Signed-off-by: Stephen Boyd Signed-off-by: John Stultz --- kernel/time/sched_clock.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index a326f27d7f09..396f7b9dccc9 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -14,11 +14,12 @@ #include #include #include +#include struct clock_data { u64 epoch_ns; u32 epoch_cyc; - u32 epoch_cyc_copy; + seqcount_t seq; unsigned long rate; u32 mult; u32 shift; @@ -54,23 +55,16 @@ static unsigned long long notrace sched_clock_32(void) u64 epoch_ns; u32 epoch_cyc; u32 cyc; + unsigned long seq; if (cd.suspended) return cd.epoch_ns; - /* - * Load the epoch_cyc and epoch_ns atomically. We do this by - * ensuring that we always write epoch_cyc, epoch_ns and - * epoch_cyc_copy in strict order, and read them in strict order. - * If epoch_cyc and epoch_cyc_copy are not equal, then we're in - * the middle of an update, and we should repeat the load. - */ do { + seq = read_seqcount_begin(&cd.seq); epoch_cyc = cd.epoch_cyc; - smp_rmb(); epoch_ns = cd.epoch_ns; - smp_rmb(); - } while (epoch_cyc != cd.epoch_cyc_copy); + } while (read_seqcount_retry(&cd.seq, seq)); cyc = read_sched_clock(); cyc = (cyc - epoch_cyc) & sched_clock_mask; @@ -90,16 +84,12 @@ static void notrace update_sched_clock(void) ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, cd.mult, cd.shift); - /* - * Write epoch_cyc and epoch_ns in a way that the update is - * detectable in cyc_to_fixed_sched_clock(). - */ + raw_local_irq_save(flags); - cd.epoch_cyc_copy = cyc; - smp_wmb(); + write_seqcount_begin(&cd.seq); cd.epoch_ns = ns; - smp_wmb(); cd.epoch_cyc = cyc; + write_seqcount_end(&cd.seq); raw_local_irq_restore(flags); } @@ -195,7 +185,6 @@ static int sched_clock_suspend(void) static void sched_clock_resume(void) { cd.epoch_cyc = read_sched_clock(); - cd.epoch_cyc_copy = cd.epoch_cyc; cd.suspended = false; } -- GitLab From a08ca5d1089da03724f96fa0870c64968e66765b Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:21:16 -0700 Subject: [PATCH 00015/10742] sched_clock: Use an hrtimer instead of timer In the next patch we're going to increase the number of bits that the generic sched_clock can handle to be greater than 32. With more than 32 bits the wraparound time can be larger than what can fit into the units that msecs_to_jiffies takes (unsigned int). Luckily, the wraparound is initially calculated in nanoseconds which we can easily use with hrtimers, so switch to using an hrtimer. Cc: Russell King Signed-off-by: Stephen Boyd [jstultz: Fixup hrtimer intitialization order issue] Signed-off-by: John Stultz --- kernel/time/sched_clock.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 396f7b9dccc9..c018ffc59937 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -8,15 +8,17 @@ #include #include #include +#include #include #include #include #include -#include +#include #include #include struct clock_data { + ktime_t wrap_kt; u64 epoch_ns; u32 epoch_cyc; seqcount_t seq; @@ -26,8 +28,7 @@ struct clock_data { bool suspended; }; -static void sched_clock_poll(unsigned long wrap_ticks); -static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0); +static struct hrtimer sched_clock_timer; static int irqtime = -1; core_param(irqtime, irqtime, int, 0400); @@ -93,15 +94,16 @@ static void notrace update_sched_clock(void) raw_local_irq_restore(flags); } -static void sched_clock_poll(unsigned long wrap_ticks) +static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt) { - mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks)); update_sched_clock(); + hrtimer_forward_now(hrt, cd.wrap_kt); + return HRTIMER_RESTART; } void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) { - unsigned long r, w; + unsigned long r; u64 res, wrap; char r_unit; @@ -129,19 +131,13 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) /* calculate how many ns until we wrap */ wrap = cyc_to_ns((1ULL << bits) - 1, cd.mult, cd.shift); - do_div(wrap, NSEC_PER_MSEC); - w = wrap; + cd.wrap_kt = ns_to_ktime(wrap - (wrap >> 3)); /* calculate the ns resolution of this counter */ res = cyc_to_ns(1ULL, cd.mult, cd.shift); - pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n", - bits, r, r_unit, res, w); + pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n", + bits, r, r_unit, res, wrap); - /* - * Start the timer to keep sched_clock() properly updated and - * sets the initial epoch. - */ - sched_clock_timer.data = msecs_to_jiffies(w - (w / 10)); update_sched_clock(); /* @@ -172,12 +168,20 @@ void __init sched_clock_postinit(void) if (read_sched_clock == jiffy_sched_clock_read) setup_sched_clock(jiffy_sched_clock_read, 32, HZ); - sched_clock_poll(sched_clock_timer.data); + update_sched_clock(); + + /* + * Start the timer to keep sched_clock() properly updated and + * sets the initial epoch. + */ + hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + sched_clock_timer.function = sched_clock_poll; + hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); } static int sched_clock_suspend(void) { - sched_clock_poll(sched_clock_timer.data); + sched_clock_poll(&sched_clock_timer); cd.suspended = true; return 0; } -- GitLab From e7e3ff1bfe9c42ee31172e9afdc0383a9e595e29 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:21:17 -0700 Subject: [PATCH 00016/10742] sched_clock: Add support for >32 bit sched_clock The ARM architected system counter has at least 56 usable bits. Add support for counters with more than 32 bits to the generic sched_clock implementation so we can increase the time between wakeups due to dealing with wrap-around on these devices while benefiting from the irqtime accounting and suspend/resume handling that the generic sched_clock code already has. On my system using 56 bits over 32 bits changes the wraparound time from a few minutes to an hour. For faster running counters (GHz range) this is even more important because we may not be able to execute the timer in time to deal with the wraparound if only 32 bits are used. We choose a maxsec value of 3600 seconds because we assume no system will go idle for more than an hour. In the future we may need to increase this value. Note: All users should switch over to the 64-bit read function so we can remove setup_sched_clock() in favor of sched_clock_register(). Cc: Russell King Signed-off-by: Stephen Boyd Signed-off-by: John Stultz --- include/linux/sched_clock.h | 2 ++ kernel/time/sched_clock.c | 46 ++++++++++++++++++++++++++----------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h index fa7922c80a41..eca7abeb86fc 100644 --- a/include/linux/sched_clock.h +++ b/include/linux/sched_clock.h @@ -15,6 +15,8 @@ static inline void sched_clock_postinit(void) { } #endif extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate); +extern void sched_clock_register(u64 (*read)(void), int bits, + unsigned long rate); extern unsigned long long (*sched_clock_func)(void); diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index c018ffc59937..f388baeaf2b6 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -16,11 +16,12 @@ #include #include #include +#include struct clock_data { ktime_t wrap_kt; u64 epoch_ns; - u32 epoch_cyc; + u64 epoch_cyc; seqcount_t seq; unsigned long rate; u32 mult; @@ -37,14 +38,25 @@ static struct clock_data cd = { .mult = NSEC_PER_SEC / HZ, }; -static u32 __read_mostly sched_clock_mask = 0xffffffff; +static u64 __read_mostly sched_clock_mask; -static u32 notrace jiffy_sched_clock_read(void) +static u64 notrace jiffy_sched_clock_read(void) { - return (u32)(jiffies - INITIAL_JIFFIES); + /* + * We don't need to use get_jiffies_64 on 32-bit arches here + * because we register with BITS_PER_LONG + */ + return (u64)(jiffies - INITIAL_JIFFIES); +} + +static u32 __read_mostly (*read_sched_clock_32)(void); + +static u64 notrace read_sched_clock_32_wrapper(void) +{ + return read_sched_clock_32(); } -static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; +static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) { @@ -54,8 +66,8 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) static unsigned long long notrace sched_clock_32(void) { u64 epoch_ns; - u32 epoch_cyc; - u32 cyc; + u64 epoch_cyc; + u64 cyc; unsigned long seq; if (cd.suspended) @@ -78,7 +90,7 @@ static unsigned long long notrace sched_clock_32(void) static void notrace update_sched_clock(void) { unsigned long flags; - u32 cyc; + u64 cyc; u64 ns; cyc = read_sched_clock(); @@ -101,7 +113,8 @@ static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt) return HRTIMER_RESTART; } -void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) +void __init sched_clock_register(u64 (*read)(void), int bits, + unsigned long rate) { unsigned long r; u64 res, wrap; @@ -110,14 +123,13 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) if (cd.rate > rate) return; - BUG_ON(bits > 32); WARN_ON(!irqs_disabled()); read_sched_clock = read; - sched_clock_mask = (1 << bits) - 1; + sched_clock_mask = CLOCKSOURCE_MASK(bits); cd.rate = rate; /* calculate the mult/shift to convert counter ticks to ns. */ - clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 0); + clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 3600); r = rate; if (r >= 4000000) { @@ -130,7 +142,7 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) r_unit = ' '; /* calculate how many ns until we wrap */ - wrap = cyc_to_ns((1ULL << bits) - 1, cd.mult, cd.shift); + wrap = clocks_calc_max_nsecs(cd.mult, cd.shift, 0, sched_clock_mask); cd.wrap_kt = ns_to_ktime(wrap - (wrap >> 3)); /* calculate the ns resolution of this counter */ @@ -152,6 +164,12 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) pr_debug("Registered %pF as sched_clock source\n", read); } +void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) +{ + read_sched_clock_32 = read; + sched_clock_register(read_sched_clock_32_wrapper, bits, rate); +} + unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32; unsigned long long notrace sched_clock(void) @@ -166,7 +184,7 @@ void __init sched_clock_postinit(void) * make it the final one one. */ if (read_sched_clock == jiffy_sched_clock_read) - setup_sched_clock(jiffy_sched_clock_read, 32, HZ); + sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ); update_sched_clock(); -- GitLab From 18952f20fadef0a5e099f5c4cac34b97644ccc35 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:21:20 -0700 Subject: [PATCH 00017/10742] clocksource: bcm2835: Switch to sched_clock_register() The 32 bit sched_clock interface now supports 64 bits. Upgrade to the 64 bit function to allow us to remove the 32 bit registration interface. Cc: Stephen Warren Acked-by: Stephen Warren Signed-off-by: Stephen Boyd Signed-off-by: John Stultz --- drivers/clocksource/bcm2835_timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c index 07ea7ce900dc..26ed331b1aad 100644 --- a/drivers/clocksource/bcm2835_timer.c +++ b/drivers/clocksource/bcm2835_timer.c @@ -49,7 +49,7 @@ struct bcm2835_timer { static void __iomem *system_clock __read_mostly; -static u32 notrace bcm2835_sched_read(void) +static u64 notrace bcm2835_sched_read(void) { return readl_relaxed(system_clock); } @@ -110,7 +110,7 @@ static void __init bcm2835_timer_init(struct device_node *node) panic("Can't read clock-frequency"); system_clock = base + REG_COUNTER_LO; - setup_sched_clock(bcm2835_sched_read, 32, freq); + sched_clock_register(bcm2835_sched_read, 32, freq); clocksource_mmio_init(base + REG_COUNTER_LO, node->name, freq, 300, 32, clocksource_mmio_readl_up); -- GitLab From 5602d7c808aa99230ab1ef1598e2425cf2acedc5 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:21:21 -0700 Subject: [PATCH 00018/10742] clocksource: dbx500-prcmu: Switch to sched_clock_register() The 32 bit sched_clock interface now supports 64 bits. Upgrade to the 64 bit function to allow us to remove the 32 bit registration interface. Cc: Srinidhi Kasagar Cc: Linus Walleij Acked-by: Linus Walleij Signed-off-by: Stephen Boyd Signed-off-by: John Stultz --- drivers/clocksource/clksrc-dbx500-prcmu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/clocksource/clksrc-dbx500-prcmu.c b/drivers/clocksource/clksrc-dbx500-prcmu.c index a9fd4ad25674..b375106844d8 100644 --- a/drivers/clocksource/clksrc-dbx500-prcmu.c +++ b/drivers/clocksource/clksrc-dbx500-prcmu.c @@ -53,7 +53,7 @@ static struct clocksource clocksource_dbx500_prcmu = { #ifdef CONFIG_CLKSRC_DBX500_PRCMU_SCHED_CLOCK -static u32 notrace dbx500_prcmu_sched_clock_read(void) +static u64 notrace dbx500_prcmu_sched_clock_read(void) { if (unlikely(!clksrc_dbx500_timer_base)) return 0; @@ -81,8 +81,7 @@ void __init clksrc_dbx500_prcmu_init(void __iomem *base) clksrc_dbx500_timer_base + PRCMU_TIMER_REF); } #ifdef CONFIG_CLKSRC_DBX500_PRCMU_SCHED_CLOCK - setup_sched_clock(dbx500_prcmu_sched_clock_read, - 32, RATE_32K); + sched_clock_register(dbx500_prcmu_sched_clock_read, 32, RATE_32K); #endif clocksource_register_hz(&clocksource_dbx500_prcmu, RATE_32K); } -- GitLab From fa8296ae62364d80bb82c4c011469ae3e423d509 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:21:22 -0700 Subject: [PATCH 00019/10742] clocksource: dw_apb_timer_of: Switch to sched_clock_register() The 32 bit sched_clock interface now supports 64 bits. Upgrade to the 64 bit function to allow us to remove the 32 bit registration interface. Signed-off-by: Stephen Boyd Signed-off-by: John Stultz --- drivers/clocksource/dw_apb_timer_of.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c index 4cbae4f762b1..003b2309f463 100644 --- a/drivers/clocksource/dw_apb_timer_of.c +++ b/drivers/clocksource/dw_apb_timer_of.c @@ -106,7 +106,7 @@ static void add_clocksource(struct device_node *source_timer) sched_rate = rate; } -static u32 read_sched_clock(void) +static u64 read_sched_clock(void) { return __raw_readl(sched_io_base); } @@ -128,7 +128,7 @@ static void init_sched_clock(void) of_node_put(sched_timer); } - setup_sched_clock(read_sched_clock, 32, sched_rate); + sched_clock_register(read_sched_clock, 32, sched_rate); } static int num_called; -- GitLab From fcfca6ef6a35690648de6529b607674d4132b10e Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:21:23 -0700 Subject: [PATCH 00020/10742] clocksource: mxs_timer: Switch to sched_clock_register() The 32 bit sched_clock interface now supports 64 bits. Upgrade to the 64 bit function to allow us to remove the 32 bit registration interface. Cc: Shawn Guo Acked-by: Shawn Guo Signed-off-by: Stephen Boyd Signed-off-by: John Stultz --- drivers/clocksource/mxs_timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/mxs_timer.c b/drivers/clocksource/mxs_timer.c index 0f5e65f74dc3..445b68a01dc5 100644 --- a/drivers/clocksource/mxs_timer.c +++ b/drivers/clocksource/mxs_timer.c @@ -222,7 +222,7 @@ static struct clocksource clocksource_mxs = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static u32 notrace mxs_read_sched_clock_v2(void) +static u64 notrace mxs_read_sched_clock_v2(void) { return ~readl_relaxed(mxs_timrot_base + HW_TIMROT_RUNNING_COUNTn(1)); } @@ -236,7 +236,7 @@ static int __init mxs_clocksource_init(struct clk *timer_clk) else { clocksource_mmio_init(mxs_timrot_base + HW_TIMROT_RUNNING_COUNTn(1), "mxs_timer", c, 200, 32, clocksource_mmio_readl_down); - setup_sched_clock(mxs_read_sched_clock_v2, 32, c); + sched_clock_register(mxs_read_sched_clock_v2, 32, c); } return 0; -- GitLab From e25bc5f5ad192cf8782db64acb83962a0b27c4e0 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:21:24 -0700 Subject: [PATCH 00021/10742] clocksource: nomadik: Switch to sched_clock_register() The 32 bit sched_clock interface now supports 64 bits. Upgrade to the 64 bit function to allow us to remove the 32 bit registration interface. Cc: Linus Walleij Acked-by: Linus Walleij Signed-off-by: Stephen Boyd Signed-off-by: John Stultz --- drivers/clocksource/nomadik-mtu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/nomadik-mtu.c b/drivers/clocksource/nomadik-mtu.c index 7d2c2c56f73c..2242cd3d618d 100644 --- a/drivers/clocksource/nomadik-mtu.c +++ b/drivers/clocksource/nomadik-mtu.c @@ -76,7 +76,7 @@ static struct delay_timer mtu_delay_timer; * local implementation which uses the clocksource to get some * better resolution when scheduling the kernel. */ -static u32 notrace nomadik_read_sched_clock(void) +static u64 notrace nomadik_read_sched_clock(void) { if (unlikely(!mtu_base)) return 0; @@ -230,7 +230,7 @@ static void __init __nmdk_timer_init(void __iomem *base, int irq, "mtu_0"); #ifdef CONFIG_CLKSRC_NOMADIK_MTU_SCHED_CLOCK - setup_sched_clock(nomadik_read_sched_clock, 32, rate); + sched_clock_register(nomadik_read_sched_clock, 32, rate); #endif /* Timer 1 is used for events, register irq and clockevents */ -- GitLab From 2902b30e0bd78686e7d891519dbfe2e4e43825fd Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:21:25 -0700 Subject: [PATCH 00022/10742] clocksource: samsung_pwm_timer: Switch to sched_clock_register() The 32 bit sched_clock interface now supports 64 bits. Upgrade to the 64 bit function to allow us to remove the 32 bit registration interface. Cc: Tomasz Figa Cc: Kyungmin Park Cc: Kukjin Kim Signed-off-by: Stephen Boyd Signed-off-by: John Stultz --- drivers/clocksource/samsung_pwm_timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c index 584b5472eea3..09e8bc7bc92f 100644 --- a/drivers/clocksource/samsung_pwm_timer.c +++ b/drivers/clocksource/samsung_pwm_timer.c @@ -310,7 +310,7 @@ static void __iomem *samsung_timer_reg(void) * this wraps around for now, since it is just a relative time * stamp. (Inspired by U300 implementation.) */ -static u32 notrace samsung_read_sched_clock(void) +static u64 notrace samsung_read_sched_clock(void) { void __iomem *reg = samsung_timer_reg(); @@ -337,7 +337,7 @@ static void __init samsung_clocksource_init(void) samsung_time_setup(pwm.source_id, pwm.tcnt_max); samsung_time_start(pwm.source_id, true); - setup_sched_clock(samsung_read_sched_clock, + sched_clock_register(samsung_read_sched_clock, pwm.variant.bits, clock_rate); ret = clocksource_mmio_init(reg, "samsung_clocksource_timer", -- GitLab From 35702999b1f366ed80fc4bd94bd8ebc8a1c46954 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:21:26 -0700 Subject: [PATCH 00023/10742] clocksource: tegra: Switch to sched_clock_register() The 32 bit sched_clock interface now supports 64 bits. Upgrade to the 64 bit function to allow us to remove the 32 bit registration interface. Cc: Stephen Warren Acked-by: Stephen Warren Tested-by: Stephen Warren Signed-off-by: Stephen Boyd Signed-off-by: John Stultz --- drivers/clocksource/tegra20_timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c index 93961703b887..5cff61677b6c 100644 --- a/drivers/clocksource/tegra20_timer.c +++ b/drivers/clocksource/tegra20_timer.c @@ -98,7 +98,7 @@ static struct clock_event_device tegra_clockevent = { .set_mode = tegra_timer_set_mode, }; -static u32 notrace tegra_read_sched_clock(void) +static u64 notrace tegra_read_sched_clock(void) { return timer_readl(TIMERUS_CNTR_1US); } @@ -200,7 +200,7 @@ static void __init tegra20_init_timer(struct device_node *np) WARN(1, "Unknown clock rate"); } - setup_sched_clock(tegra_read_sched_clock, 32, 1000000); + sched_clock_register(tegra_read_sched_clock, 32, 1000000); if (clocksource_mmio_init(timer_reg_base + TIMERUS_CNTR_1US, "timer_us", 1000000, 300, 32, clocksource_mmio_readl_up)) { -- GitLab From d9dbcbe0ea29dac15a9ca70c274fec4ef100e187 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:21:27 -0700 Subject: [PATCH 00024/10742] clocksource: time-armada-370-xp: Switch to sched_clock_register() The 32 bit sched_clock interface now supports 64 bits. Upgrade to the 64 bit function to allow us to remove the 32 bit registration interface. Cc: Gregory CLEMENT Signed-off-by: Stephen Boyd Signed-off-by: John Stultz --- drivers/clocksource/time-armada-370-xp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c index efdca3263afe..2bec8dca74b6 100644 --- a/drivers/clocksource/time-armada-370-xp.c +++ b/drivers/clocksource/time-armada-370-xp.c @@ -71,7 +71,7 @@ static u32 ticks_per_jiffy; static struct clock_event_device __percpu **percpu_armada_370_xp_evt; -static u32 notrace armada_370_xp_read_sched_clock(void) +static u64 notrace armada_370_xp_read_sched_clock(void) { return ~readl(timer_base + TIMER0_VAL_OFF); } @@ -258,7 +258,7 @@ void __init armada_370_xp_timer_init(void) /* * Set scale and timer for sched_clock. */ - setup_sched_clock(armada_370_xp_read_sched_clock, 32, timer_clk); + sched_clock_register(armada_370_xp_read_sched_clock, 32, timer_clk); /* * Setup free-running clocksource timer (interrupts -- GitLab From 130e6b25a28ff5b2421d6cae5f2bac1f5afdcfb0 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:21:28 -0700 Subject: [PATCH 00025/10742] clocksource: sirf: Switch to sched_clock_register() and use 64 bits The 32 bit sched_clock interface now supports 64 bits. Upgrade to the 64 bit function to allow us to remove the 32 bit registration interface and use all 64 bits of this timer. Cc: Barry Song Signed-off-by: Stephen Boyd Signed-off-by: John Stultz --- drivers/clocksource/timer-prima2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clocksource/timer-prima2.c b/drivers/clocksource/timer-prima2.c index ef3cfb269d8b..8a492d34ff9f 100644 --- a/drivers/clocksource/timer-prima2.c +++ b/drivers/clocksource/timer-prima2.c @@ -165,9 +165,9 @@ static struct irqaction sirfsoc_timer_irq = { }; /* Overwrite weak default sched_clock with more precise one */ -static u32 notrace sirfsoc_read_sched_clock(void) +static u64 notrace sirfsoc_read_sched_clock(void) { - return (u32)(sirfsoc_timer_read(NULL) & 0xffffffff); + return sirfsoc_timer_read(NULL); } static void __init sirfsoc_clockevent_init(void) @@ -206,7 +206,7 @@ static void __init sirfsoc_prima2_timer_init(struct device_node *np) BUG_ON(clocksource_register_hz(&sirfsoc_clocksource, CLOCK_TICK_RATE)); - setup_sched_clock(sirfsoc_read_sched_clock, 32, CLOCK_TICK_RATE); + sched_clock_register(sirfsoc_read_sched_clock, 64, CLOCK_TICK_RATE); BUG_ON(setup_irq(sirfsoc_timer_irq.irq, &sirfsoc_timer_irq)); -- GitLab From f4e6e1ea19737077d958f2bc6c196eb579d97544 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Jul 2013 16:21:29 -0700 Subject: [PATCH 00026/10742] clocksource: vf_pit_timer: Switch to sched_clock_register() The 32 bit sched_clock interface now supports 64 bits. Upgrade to the 64 bit function to allow us to remove the 32 bit registration interface. Cc: Jingchang Lu Cc: Fabio Estevam Signed-off-by: Stephen Boyd Signed-off-by: John Stultz --- drivers/clocksource/vf_pit_timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/vf_pit_timer.c b/drivers/clocksource/vf_pit_timer.c index 587e0202a70b..02821b06a39e 100644 --- a/drivers/clocksource/vf_pit_timer.c +++ b/drivers/clocksource/vf_pit_timer.c @@ -52,7 +52,7 @@ static inline void pit_irq_acknowledge(void) __raw_writel(PITTFLG_TIF, clkevt_base + PITTFLG); } -static unsigned int pit_read_sched_clock(void) +static u64 pit_read_sched_clock(void) { return __raw_readl(clksrc_base + PITCVAL); } @@ -64,7 +64,7 @@ static int __init pit_clocksource_init(unsigned long rate) __raw_writel(~0UL, clksrc_base + PITLDVAL); __raw_writel(PITTCTRL_TEN, clksrc_base + PITTCTRL); - setup_sched_clock(pit_read_sched_clock, 32, rate); + sched_clock_register(pit_read_sched_clock, 32, rate); return clocksource_mmio_init(clksrc_base + PITCVAL, "vf-pit", rate, 300, 32, clocksource_mmio_readl_down); } -- GitLab From 38c35d4f2e408c369e3030f0717d35ad443d9223 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 13 Jun 2013 23:42:46 -0400 Subject: [PATCH 00027/10742] ARM: bL_switcher: remove assumptions between logical and physical CPUs Up to now, the logical CPU was somehow tied to the physical CPU number within a cluster. This causes problems when forcing the boot CPU to be different from the first enumerated CPU in the device tree creating a discrepancy between logical and physical CPU numbers. Let's make the pairing completely independent from physical CPU numbers. Let's keep only those logical CPUs with same initial CPU cluster to create a uniform scheduler profile without having to modify any of the probed topology and compute capacity data. This has the potential to create a non contiguous CPU numbering space when the switcher is active with potential impact on buggy user space tools. It is however better to fix those tools rather than making the switcher code more intrusive. Signed-off-by: Nicolas Pitre Reviewed-by: Lorenzo Pieralisi --- arch/arm/common/bL_switcher.c | 176 ++++++++++++++++++++-------------- 1 file changed, 104 insertions(+), 72 deletions(-) diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index cec825ef392b..0e4fb3e5e99c 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -53,21 +53,19 @@ static int read_mpidr(void) static void bL_do_switch(void *_unused) { - unsigned mpidr, cpuid, clusterid, ob_cluster, ib_cluster; + unsigned ib_mpidr, ib_cpu, ib_cluster; pr_debug("%s\n", __func__); - mpidr = read_mpidr(); - cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0); - clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); - ob_cluster = clusterid; - ib_cluster = clusterid ^ 1; + ib_mpidr = cpu_logical_map(smp_processor_id()); + ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0); + ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1); /* * Our state has been saved at this point. Let's release our * inbound CPU. */ - mcpm_set_entry_vector(cpuid, ib_cluster, cpu_resume); + mcpm_set_entry_vector(ib_cpu, ib_cluster, cpu_resume); sev(); /* @@ -113,6 +111,7 @@ static int bL_switchpoint(unsigned long _arg) */ static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS]; +static int bL_switcher_cpu_pairing[NR_CPUS]; /* * bL_switch_to - Switch to a specific cluster for the current CPU @@ -123,31 +122,38 @@ static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS]; */ static int bL_switch_to(unsigned int new_cluster_id) { - unsigned int mpidr, cpuid, clusterid, ob_cluster, ib_cluster, this_cpu; + unsigned int mpidr, this_cpu, that_cpu; + unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster; struct tick_device *tdev; enum clock_event_mode tdev_mode; int ret; - mpidr = read_mpidr(); - cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0); - clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); - ob_cluster = clusterid; - ib_cluster = clusterid ^ 1; + this_cpu = smp_processor_id(); + ob_mpidr = read_mpidr(); + ob_cpu = MPIDR_AFFINITY_LEVEL(ob_mpidr, 0); + ob_cluster = MPIDR_AFFINITY_LEVEL(ob_mpidr, 1); + BUG_ON(cpu_logical_map(this_cpu) != ob_mpidr); - if (new_cluster_id == clusterid) + if (new_cluster_id == ob_cluster) return 0; - pr_debug("before switch: CPU %d in cluster %d\n", cpuid, clusterid); + that_cpu = bL_switcher_cpu_pairing[this_cpu]; + ib_mpidr = cpu_logical_map(that_cpu); + ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0); + ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1); + + pr_debug("before switch: CPU %d MPIDR %#x -> %#x\n", + this_cpu, ob_mpidr, ib_mpidr); /* Close the gate for our entry vectors */ - mcpm_set_entry_vector(cpuid, ob_cluster, NULL); - mcpm_set_entry_vector(cpuid, ib_cluster, NULL); + mcpm_set_entry_vector(ob_cpu, ob_cluster, NULL); + mcpm_set_entry_vector(ib_cpu, ib_cluster, NULL); /* * Let's wake up the inbound CPU now in case it requires some delay * to come online, but leave it gated in our entry vector code. */ - ret = mcpm_cpu_power_up(cpuid, ib_cluster); + ret = mcpm_cpu_power_up(ib_cpu, ib_cluster); if (ret) { pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret); return ret; @@ -160,10 +166,8 @@ static int bL_switch_to(unsigned int new_cluster_id) local_irq_disable(); local_fiq_disable(); - this_cpu = smp_processor_id(); - /* redirect GIC's SGIs to our counterpart */ - gic_migrate_target(bL_gic_id[cpuid][ib_cluster]); + gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]); /* * Raise a SGI on the inbound CPU to make sure it doesn't stall @@ -185,8 +189,9 @@ static int bL_switch_to(unsigned int new_cluster_id) if (ret) panic("%s: cpu_pm_enter() returned %d\n", __func__, ret); - /* Flip the cluster in the CPU logical map for this CPU. */ - cpu_logical_map(this_cpu) ^= (1 << 8); + /* Swap the physical CPUs in the logical map for this logical CPU. */ + cpu_logical_map(this_cpu) = ib_mpidr; + cpu_logical_map(that_cpu) = ob_mpidr; /* Let's do the actual CPU switch. */ ret = cpu_suspend(0, bL_switchpoint); @@ -195,10 +200,8 @@ static int bL_switch_to(unsigned int new_cluster_id) /* We are executing on the inbound CPU at this point */ mpidr = read_mpidr(); - cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0); - clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); - pr_debug("after switch: CPU %d in cluster %d\n", cpuid, clusterid); - BUG_ON(clusterid != ib_cluster); + pr_debug("after switch: CPU %d MPIDR %#x\n", this_cpu, mpidr); + BUG_ON(mpidr != ib_mpidr); mcpm_cpu_powered_up(); @@ -300,7 +303,7 @@ EXPORT_SYMBOL_GPL(bL_switch_request); */ static unsigned int bL_switcher_active; -static unsigned int bL_switcher_cpu_original_cluster[MAX_CPUS_PER_CLUSTER]; +static unsigned int bL_switcher_cpu_original_cluster[NR_CPUS]; static cpumask_t bL_switcher_removed_logical_cpus; static void bL_switcher_restore_cpus(void) @@ -313,52 +316,86 @@ static void bL_switcher_restore_cpus(void) static int bL_switcher_halve_cpus(void) { - int cpu, cluster, i, ret; - cpumask_t cluster_mask[2], common_mask; - - cpumask_clear(&bL_switcher_removed_logical_cpus); - cpumask_clear(&cluster_mask[0]); - cpumask_clear(&cluster_mask[1]); + int i, j, cluster_0, gic_id, ret; + unsigned int cpu, cluster, mask; + cpumask_t available_cpus; + /* First pass to validate what we have */ + mask = 0; for_each_online_cpu(i) { - cpu = cpu_logical_map(i) & 0xff; - cluster = (cpu_logical_map(i) >> 8) & 0xff; + cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0); + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); if (cluster >= 2) { pr_err("%s: only dual cluster systems are supported\n", __func__); return -EINVAL; } - cpumask_set_cpu(cpu, &cluster_mask[cluster]); + if (WARN_ON(cpu >= MAX_CPUS_PER_CLUSTER)) + return -EINVAL; + mask |= (1 << cluster); } - - if (!cpumask_and(&common_mask, &cluster_mask[0], &cluster_mask[1])) { - pr_err("%s: no common set of CPUs\n", __func__); + if (mask != 3) { + pr_err("%s: no CPU pairing possible\n", __func__); return -EINVAL; } - for_each_online_cpu(i) { - cpu = cpu_logical_map(i) & 0xff; - cluster = (cpu_logical_map(i) >> 8) & 0xff; - - if (cpumask_test_cpu(cpu, &common_mask)) { - /* Let's take note of the GIC ID for this CPU */ - int gic_id = gic_get_cpu_id(i); - if (gic_id < 0) { - pr_err("%s: bad GIC ID for CPU %d\n", __func__, i); - return -EINVAL; - } - bL_gic_id[cpu][cluster] = gic_id; - pr_info("GIC ID for CPU %u cluster %u is %u\n", - cpu, cluster, gic_id); - + /* + * Now let's do the pairing. We match each CPU with another CPU + * from a different cluster. To get a uniform scheduling behavior + * without fiddling with CPU topology and compute capacity data, + * we'll use logical CPUs initially belonging to the same cluster. + */ + memset(bL_switcher_cpu_pairing, -1, sizeof(bL_switcher_cpu_pairing)); + cpumask_copy(&available_cpus, cpu_online_mask); + cluster_0 = -1; + for_each_cpu(i, &available_cpus) { + int match = -1; + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); + if (cluster_0 == -1) + cluster_0 = cluster; + if (cluster != cluster_0) + continue; + cpumask_clear_cpu(i, &available_cpus); + for_each_cpu(j, &available_cpus) { + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(j), 1); /* - * We keep only those logical CPUs which number - * is equal to their physical CPU number. This is - * not perfect but good enough for now. + * Let's remember the last match to create "odd" + * pairings on purpose in order for other code not + * to assume any relation between physical and + * logical CPU numbers. */ - if (cpu == i) { - bL_switcher_cpu_original_cluster[cpu] = cluster; - continue; - } + if (cluster != cluster_0) + match = j; + } + if (match != -1) { + bL_switcher_cpu_pairing[i] = match; + cpumask_clear_cpu(match, &available_cpus); + pr_info("CPU%d paired with CPU%d\n", i, match); + } + } + + /* + * Now we disable the unwanted CPUs i.e. everything that has no + * pairing information (that includes the pairing counterparts). + */ + cpumask_clear(&bL_switcher_removed_logical_cpus); + for_each_online_cpu(i) { + cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0); + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); + + /* Let's take note of the GIC ID for this CPU */ + gic_id = gic_get_cpu_id(i); + if (gic_id < 0) { + pr_err("%s: bad GIC ID for CPU %d\n", __func__, i); + bL_switcher_restore_cpus(); + return -EINVAL; + } + bL_gic_id[cpu][cluster] = gic_id; + pr_info("GIC ID for CPU %u cluster %u is %u\n", + cpu, cluster, gic_id); + + if (bL_switcher_cpu_pairing[i] != -1) { + bL_switcher_cpu_original_cluster[i] = cluster; + continue; } ret = cpu_down(i); @@ -409,7 +446,7 @@ static int bL_switcher_enable(void) static void bL_switcher_disable(void) { - unsigned int cpu, cluster, i; + unsigned int cpu, cluster; struct bL_thread *t; struct task_struct *task; @@ -429,7 +466,6 @@ static void bL_switcher_disable(void) * possibility for interference from external requests. */ for_each_online_cpu(cpu) { - BUG_ON(cpu != (cpu_logical_map(cpu) & 0xff)); t = &bL_threads[cpu]; task = t->task; t->task = NULL; @@ -453,14 +489,10 @@ static void bL_switcher_disable(void) /* If execution gets here, we're in trouble. */ pr_crit("%s: unable to restore original cluster for CPU %d\n", __func__, cpu); - for_each_cpu(i, &bL_switcher_removed_logical_cpus) { - if ((cpu_logical_map(i) & 0xff) != cpu) - continue; - pr_crit("%s: CPU %d can't be restored\n", - __func__, i); - cpumask_clear_cpu(i, &bL_switcher_removed_logical_cpus); - break; - } + pr_crit("%s: CPU %d can't be restored\n", + __func__, bL_switcher_cpu_pairing[cpu]); + cpumask_clear_cpu(bL_switcher_cpu_pairing[cpu], + &bL_switcher_removed_logical_cpus); } bL_switcher_restore_cpus(); -- GitLab From 272614351423ce8c37ff730efc130e5b73fe64f5 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 26 Nov 2012 22:48:55 -0500 Subject: [PATCH 00028/10742] ARM: bL_switcher: filter CPU hotplug requests when the switcher is active Trying to support both the switcher and CPU hotplug at the same time is tricky due to ambiguous semantics. So let's at least prevent users from messing around with those logical CPUs the switcher has removed and those which were not active when the switcher was activated. Signed-off-by: Nicolas Pitre --- arch/arm/common/bL_switcher.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index 0e4fb3e5e99c..335ff76d4c5a 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -554,6 +554,26 @@ static int __init bL_switcher_sysfs_init(void) #endif /* CONFIG_SYSFS */ +/* + * Veto any CPU hotplug operation on those CPUs we've removed + * while the switcher is active. + * We're just not ready to deal with that given the trickery involved. + */ +static int bL_switcher_hotplug_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + if (bL_switcher_active) { + int pairing = bL_switcher_cpu_pairing[(unsigned long)hcpu]; + switch (action & 0xf) { + case CPU_UP_PREPARE: + case CPU_DOWN_PREPARE: + if (pairing == -1) + return NOTIFY_BAD; + } + } + return NOTIFY_DONE; +} + static bool no_bL_switcher; core_param(no_bL_switcher, no_bL_switcher, bool, 0644); @@ -566,6 +586,8 @@ static int __init bL_switcher_init(void) return -EINVAL; } + cpu_notifier(bL_switcher_hotplug_callback, 0); + if (!no_bL_switcher) { ret = bL_switcher_enable(); if (ret) -- GitLab From b22537c682671de97c932d5addb6b7d087352aa1 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 12 Apr 2012 03:04:28 -0400 Subject: [PATCH 00029/10742] ARM: bL_switcher: add a simple /dev user interface for debugging purposes Only the basic call to aid debugging. *** NOT FOR PRODUCTION *** Usage: echo , > /dev/b.L_switcher where is the logical CPU number, and is 0 for the first cluster and 1 for the second cluster. Signed-off-by: nicolas Pitre --- arch/arm/Kconfig | 8 +++ arch/arm/common/Makefile | 1 + arch/arm/common/bL_switcher_dummy_if.c | 71 ++++++++++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 arch/arm/common/bL_switcher_dummy_if.c diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ade70017fd65..cfcb0d8d6f85 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1556,6 +1556,14 @@ config BL_SWITCHER transparently handle transition between a cluster of A15's and a cluster of A7's in a big.LITTLE system. +config BL_SWITCHER_DUMMY_IF + tristate "Simple big.LITTLE switcher user interface" + depends on BL_SWITCHER && DEBUG_KERNEL + help + This is a simple and dummy char dev interface to control + the big.LITTLE switcher core code. It is meant for + debugging purposes only. + choice prompt "Memory split" default VMSPLIT_3G diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 2586240d5a45..5c8584c4944d 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -18,3 +18,4 @@ AFLAGS_mcpm_head.o := -march=armv7-a AFLAGS_vlock.o := -march=armv7-a obj-$(CONFIG_TI_PRIV_EDMA) += edma.o obj-$(CONFIG_BL_SWITCHER) += bL_switcher.o +obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c new file mode 100644 index 000000000000..3f47f1203c6b --- /dev/null +++ b/arch/arm/common/bL_switcher_dummy_if.c @@ -0,0 +1,71 @@ +/* + * arch/arm/common/bL_switcher_dummy_if.c -- b.L switcher dummy interface + * + * Created by: Nicolas Pitre, November 2012 + * Copyright: (C) 2012-2013 Linaro Limited + * + * Dummy interface to user space for debugging purpose only. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +static ssize_t bL_switcher_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + unsigned char val[3]; + unsigned int cpu, cluster; + int ret; + + pr_debug("%s\n", __func__); + + if (len < 3) + return -EINVAL; + + if (copy_from_user(val, buf, 3)) + return -EFAULT; + + /* format: , */ + if (val[0] < '0' || val[0] > '9' || + val[1] != ',' || + val[2] < '0' || val[2] > '1') + return -EINVAL; + + cpu = val[0] - '0'; + cluster = val[2] - '0'; + ret = bL_switch_request(cpu, cluster); + + return ret ? : len; +} + +static const struct file_operations bL_switcher_fops = { + .write = bL_switcher_write, + .owner = THIS_MODULE, +}; + +static struct miscdevice bL_switcher_device = { + MISC_DYNAMIC_MINOR, + "b.L_switcher", + &bL_switcher_fops +}; + +static int __init bL_switcher_dummy_if_init(void) +{ + return misc_register(&bL_switcher_device); +} + +static void __exit bL_switcher_dummy_if_exit(void) +{ + misc_deregister(&bL_switcher_device); +} + +module_init(bL_switcher_dummy_if_init); +module_exit(bL_switcher_dummy_if_exit); -- GitLab From a97ad0c4b447a132a322cedc3a5f7fa4cab4b304 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Thu, 1 Aug 2013 19:31:35 +0200 Subject: [PATCH 00030/10742] ntp: Make periodic RTC update more reliable The current code requires that the scheduled update of the RTC happens in the closest tick to the half of the second. This seems to be difficult to achieve reliably. The scheduled work may be missing the target time by a tick or two and be constantly rescheduled every second. Relax the limit to 10 ticks. As a typical RTC drifts in the 11-minute update interval by several milliseconds, this shouldn't affect the overall accuracy of the RTC much. Signed-off-by: Miroslav Lichvar Signed-off-by: John Stultz --- kernel/time/ntp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 8f5b3b98577b..ab1fa7cb8912 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -475,6 +475,7 @@ static void sync_cmos_clock(struct work_struct *work) * called as close as possible to 500 ms before the new second starts. * This code is run on a timer. If the clock is set, that timer * may not expire at the correct time. Thus, we adjust... + * We want the clock to be within a couple of ticks from the target. */ if (!ntp_synced()) { /* @@ -485,7 +486,7 @@ static void sync_cmos_clock(struct work_struct *work) } getnstimeofday(&now); - if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) { + if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec * 5) { struct timespec adjust = now; fail = -ENODEV; -- GitLab From 0806ae4cc8722b2d2822fe3fa3f516f2da6b9459 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 23 Aug 2013 15:46:08 +0200 Subject: [PATCH 00031/10742] xfrm: announce deleation of temporary SA Creation of temporary SA are announced by netlink, but there is no notification for the deletion. This patch fix this asymmetric situation. Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 4f8ace855864..3fd65b73df7e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -471,7 +471,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) } err = __xfrm_state_delete(x); - if (!err && x->id.spi) + if (!err) km_state_expired(x, 1, 0); xfrm_audit_state_delete(x, err ? 0 : 1, -- GitLab From aba8269588301f7778bea811d6f7ec74c2e37279 Mon Sep 17 00:00:00 2001 From: Fan Du Date: Wed, 28 Aug 2013 15:09:40 +0800 Subject: [PATCH 00032/10742] {ipv4,xfrm}: Introduce xfrm_tunnel_notifier for xfrm tunnel mode callback Some thoughts on IPv4 VTI implementation: The connection between VTI receiving part and xfrm tunnel mode input process is hardly a "xfrm_tunnel", xfrm_tunnel is used in places where, e.g ipip/sit and xfrm4_tunnel, acts like a true "tunnel" device. In addition, IMHO, VTI doesn't need vti_err to do something meaningful, as all VTI needs is just a notifier to be called whenever xfrm_input ingress a packet to update statistics. A IPsec protected packet is first handled by protocol handlers, e.g AH/ESP, to check packet authentication or encryption rightness. PMTU update is taken care of in this stage by protocol error handler. Then the packet is rearranged properly depending on whether it's transport mode or tunnel mode packed by mode "input" handler. The VTI handler code takes effects in this stage in tunnel mode only. So it neither need propagate PMTU, as it has already been done if necessary, nor the VTI handler is qualified as a xfrm_tunnel. So this patch introduces xfrm_tunnel_notifier and meanwhile wipe out vti_err code. Signed-off-by: Fan Du Cc: Steffen Klassert Cc: David S. Miller Reviewed-by: Saurabh Mohan Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 10 ++++-- net/ipv4/ip_vti.c | 67 +----------------------------------- net/ipv4/xfrm4_mode_tunnel.c | 16 ++++----- 3 files changed, 17 insertions(+), 76 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 89d3d8ae204e..c7afa6e476c8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1352,6 +1352,12 @@ struct xfrm_tunnel { int priority; }; +struct xfrm_tunnel_notifier { + int (*handler)(struct sk_buff *skb); + struct xfrm_tunnel_notifier __rcu *next; + int priority; +}; + struct xfrm6_tunnel { int (*handler)(struct sk_buff *skb); int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, @@ -1495,8 +1501,8 @@ extern int xfrm4_output(struct sk_buff *skb); extern int xfrm4_output_finish(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); -extern int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel *handler); -extern int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel *handler); +extern int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler); +extern int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler); extern int xfrm6_extract_header(struct sk_buff *skb); extern int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index e805e7b3030e..91f69bc883fe 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -49,70 +49,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly; static int vti_net_id __read_mostly; static int vti_tunnel_init(struct net_device *dev); -static int vti_err(struct sk_buff *skb, u32 info) -{ - - /* All the routers (except for Linux) return only - * 8 bytes of packet payload. It means, that precise relaying of - * ICMP in the real Internet is absolutely infeasible. - */ - struct net *net = dev_net(skb->dev); - struct ip_tunnel_net *itn = net_generic(net, vti_net_id); - struct iphdr *iph = (struct iphdr *)skb->data; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - struct ip_tunnel *t; - int err; - - switch (type) { - default: - case ICMP_PARAMETERPROB: - return 0; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return 0; - default: - /* All others are translated to HOST_UNREACH. */ - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return 0; - break; - } - - err = -ENOENT; - - t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, - iph->daddr, iph->saddr, 0); - if (t == NULL) - goto out; - - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, IPPROTO_IPIP, 0); - err = 0; - goto out; - } - - err = 0; - if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) - goto out; - - if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) - t->err_count++; - else - t->err_count = 1; - t->err_time = jiffies; -out: - return err; -} - /* We dont digest the packet therefore let the packet pass */ static int vti_rcv(struct sk_buff *skb) { @@ -296,9 +232,8 @@ static void __net_init vti_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; } -static struct xfrm_tunnel vti_handler __read_mostly = { +static struct xfrm_tunnel_notifier vti_handler __read_mostly = { .handler = vti_rcv, - .err_handler = vti_err, .priority = 1, }; diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index eb1dd4d643f2..b82cde1ea1b6 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -16,13 +16,13 @@ #include /* Informational hook. The decap is still done here. */ -static struct xfrm_tunnel __rcu *rcv_notify_handlers __read_mostly; +static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly; static DEFINE_MUTEX(xfrm4_mode_tunnel_input_mutex); -int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel *handler) +int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler) { - struct xfrm_tunnel __rcu **pprev; - struct xfrm_tunnel *t; + struct xfrm_tunnel_notifier __rcu **pprev; + struct xfrm_tunnel_notifier *t; int ret = -EEXIST; int priority = handler->priority; @@ -50,10 +50,10 @@ int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel *handler) } EXPORT_SYMBOL_GPL(xfrm4_mode_tunnel_input_register); -int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel *handler) +int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler) { - struct xfrm_tunnel __rcu **pprev; - struct xfrm_tunnel *t; + struct xfrm_tunnel_notifier __rcu **pprev; + struct xfrm_tunnel_notifier *t; int ret = -ENOENT; mutex_lock(&xfrm4_mode_tunnel_input_mutex); @@ -134,7 +134,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_tunnel *handler; + struct xfrm_tunnel_notifier *handler; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) -- GitLab From 6898eb89655a8ac7d098b2fada95c1c91870365c Mon Sep 17 00:00:00 2001 From: Peter Meerwald Date: Wed, 21 Aug 2013 00:15:00 +0100 Subject: [PATCH 00033/10742] iio: Remove trailing ; from function definitions Signed-off-by: Peter Meerwald Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 2103cc32a5fb..8e7a8132b109 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -457,7 +457,7 @@ static inline void iio_device_put(struct iio_dev *indio_dev) { if (indio_dev) put_device(&indio_dev->dev); -}; +} /** * dev_to_iio_dev() - Get IIO device struct from a device struct @@ -593,7 +593,7 @@ static inline bool iio_buffer_enabled(struct iio_dev *indio_dev) { return indio_dev->currentmode & (INDIO_BUFFER_TRIGGERED | INDIO_BUFFER_HARDWARE); -}; +} /** * iio_get_debugfs_dentry() - helper function to get the debugfs_dentry @@ -603,12 +603,12 @@ static inline bool iio_buffer_enabled(struct iio_dev *indio_dev) static inline struct dentry *iio_get_debugfs_dentry(struct iio_dev *indio_dev) { return indio_dev->debugfs_dentry; -}; +} #else static inline struct dentry *iio_get_debugfs_dentry(struct iio_dev *indio_dev) { return NULL; -}; +} #endif int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer, -- GitLab From 7752572f18f98ee796e173334b088a1d621d2da4 Mon Sep 17 00:00:00 2001 From: Yanchuan Nian Date: Wed, 4 Sep 2013 09:25:24 +0800 Subject: [PATCH 00034/10742] x86/irq: Correct comment about i8259 initialization 0x30-0x3f have been used for ISA interrupts on i386 as well since 5 years ago, but old comments about i8259 initialization were still referring to the old i386 usage of this port range. Signed-off-by: Yanchuan Nian Cc: yinghai@kernel.org Cc: pavel@suse.cz Link: http://lkml.kernel.org/r/1378257924-29446-1-git-send-email-ycnian@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8259.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 9a5c460404dc..2e977b5d61dd 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -312,8 +312,7 @@ static void init_8259A(int auto_eoi) */ outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64, - to 0x20-0x27 on i386 */ + /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ -- GitLab From 0f45aa84b33b7a9bfeb41845c723dc51f82f1cf0 Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Wed, 4 Sep 2013 15:20:06 +0530 Subject: [PATCH 00035/10742] regulator: palmas: fix the n_voltages for smps to 122 With the following change --------- commit ad02e846878ca35e9d3fa584be8ee770e9e14fce Author: Axel Lin regulator: palmas: Return raw register values as the selectors in [get|set]_voltage_sel Don't adjust the selector in [get|set]_voltage_sel, fix it in list_voltage() instead. For smps*(except smps10), the vsel reg-value and voltage mapping as below: ---------- The list_voltage() takes the true value of selector which is programmed in the register. As per smsp voltage table reg-value volt (uV) ( Assume RANGE is x1 ) 0 0 1 500000 2 500000 3 500000 4 500000 5 500000 6 500000 (0.49V + 1 * 0.01V) * RANGE 7 510000 (0.49V + 2 * 0.01V) * RANGE 8 520000 (0.49V + 3 * 0.01V) * RANGE 9 530000 (0.49V + 4 * 0.01V) * RANGE .... 121 1650000 (0.49V + 116 * 0.1) * RANGE Hence making n_voltages for smps to 122. Signed-off-by: Laxman Dewangan Signed-off-by: Mark Brown --- drivers/regulator/palmas-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c index 488dfe7ce9a6..cb65c6f6b0b3 100644 --- a/drivers/regulator/palmas-regulator.c +++ b/drivers/regulator/palmas-regulator.c @@ -207,7 +207,7 @@ static unsigned int palmas_smps_ramp_delay[4] = {0, 10000, 5000, 2500}; * * So they are basically (maxV-minV)/stepV */ -#define PALMAS_SMPS_NUM_VOLTAGES 117 +#define PALMAS_SMPS_NUM_VOLTAGES 122 #define PALMAS_SMPS10_NUM_VOLTAGES 2 #define PALMAS_LDO_NUM_VOLTAGES 50 -- GitLab From 27173f1f95db5e74ceb35fe9a2f2f348ea11bac9 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 14 Aug 2013 11:38:36 +0200 Subject: [PATCH 00036/10742] drm/i915: Convert execbuf code to use vmas In order to transition more of our code over to using a VMA instead of an pair - we must have the vma accessible at execbuf time. Up until now, we've only had a VMA when actually binding an object. The previous patch helped handle the distinction on bound vs. unbound. This patch will help us catch leaks, and other issues before we actually shuffle a bunch of stuff around. This attempts to convert all the execbuf code to speak in vmas. Since the execbuf code is very self contained it was a nice isolated conversion. The meat of the code is about turning eb_objects into eb_vma, and then wiring up the rest of the code to use vmas instead of obj, vm pairs. Unfortunately, to do this, we must move the exec_list link from the obj structure. This list is reused in the eviction code, so we must also modify the eviction code to make this work. WARNING: This patch makes an already hotly profiled path slower. The cost is unavoidable. In reply to this mail, I will attach the extra data. v2: Release table lock early, and two a 2 phase vma lookup to avoid having to use a GFP_ATOMIC. (Chris) v3: s/obj_exec_list/obj_exec_link/ Updates to address commit 6d2b888569d366beb4be72cacfde41adee2c25e1 Author: Chris Wilson Date: Wed Aug 7 18:30:54 2013 +0100 drm/i915: List objects allocated from stolen memory in debugfs v4: Use obj = vma->obj for neatness in some places (Chris) need_reloc_mappable() should return false if ppgtt (Chris) Signed-off-by: Ben Widawsky [danvet: Split out prep patches. Also remove a FIXME comment which is now taken care of.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 16 +- drivers/gpu/drm/i915/i915_gem.c | 1 - drivers/gpu/drm/i915/i915_gem_execbuffer.c | 320 +++++++++++---------- 3 files changed, 183 insertions(+), 154 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 35874b3a86dc..b6494b24098d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -568,6 +568,13 @@ struct i915_vma { /** This vma's place in the batchbuffer or on the eviction list */ struct list_head exec_list; + /** + * Used for performing relocations during execbuffer insertion. + */ + struct hlist_node exec_node; + unsigned long exec_handle; + struct drm_i915_gem_exec_object2 *exec_entry; + }; struct i915_ctx_hang_stats { @@ -1398,8 +1405,6 @@ struct drm_i915_gem_object { struct list_head ring_list; /** Used in execbuf to temporarily hold a ref */ struct list_head obj_exec_link; - /** This object's place in the batchbuffer or on the eviction list */ - struct list_head exec_list; /** * This is set if the object is on the active lists (has pending @@ -1485,13 +1490,6 @@ struct drm_i915_gem_object { void *dma_buf_vmapping; int vmapping_count; - /** - * Used for performing relocations during execbuffer insertion. - */ - struct hlist_node exec_node; - unsigned long exec_handle; - struct drm_i915_gem_exec_object2 *exec_entry; - struct intel_ring_buffer *ring; /** Breadcrumb of last rendering to the buffer. */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f21a0c36a40b..c4c56f98d6e6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3978,7 +3978,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, { INIT_LIST_HEAD(&obj->global_list); INIT_LIST_HEAD(&obj->ring_list); - INIT_LIST_HEAD(&obj->exec_list); INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index bf345777ae9f..5dcfd4f59f87 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -33,24 +33,24 @@ #include "intel_drv.h" #include -struct eb_objects { - struct list_head objects; +struct eb_vmas { + struct list_head vmas; int and; union { - struct drm_i915_gem_object *lut[0]; + struct i915_vma *lut[0]; struct hlist_head buckets[0]; }; }; -static struct eb_objects * -eb_create(struct drm_i915_gem_execbuffer2 *args) +static struct eb_vmas * +eb_create(struct drm_i915_gem_execbuffer2 *args, struct i915_address_space *vm) { - struct eb_objects *eb = NULL; + struct eb_vmas *eb = NULL; if (args->flags & I915_EXEC_HANDLE_LUT) { int size = args->buffer_count; - size *= sizeof(struct drm_i915_gem_object *); - size += sizeof(struct eb_objects); + size *= sizeof(struct i915_vma *); + size += sizeof(struct eb_vmas); eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); } @@ -61,7 +61,7 @@ eb_create(struct drm_i915_gem_execbuffer2 *args) while (count > 2*size) count >>= 1; eb = kzalloc(count*sizeof(struct hlist_head) + - sizeof(struct eb_objects), + sizeof(struct eb_vmas), GFP_TEMPORARY); if (eb == NULL) return eb; @@ -70,64 +70,97 @@ eb_create(struct drm_i915_gem_execbuffer2 *args) } else eb->and = -args->buffer_count; - INIT_LIST_HEAD(&eb->objects); + INIT_LIST_HEAD(&eb->vmas); return eb; } static void -eb_reset(struct eb_objects *eb) +eb_reset(struct eb_vmas *eb) { if (eb->and >= 0) memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); } static int -eb_lookup_objects(struct eb_objects *eb, - struct drm_i915_gem_exec_object2 *exec, - const struct drm_i915_gem_execbuffer2 *args, - struct drm_file *file) +eb_lookup_vmas(struct eb_vmas *eb, + struct drm_i915_gem_exec_object2 *exec, + const struct drm_i915_gem_execbuffer2 *args, + struct i915_address_space *vm, + struct drm_file *file) { - int i; + struct drm_i915_gem_object *obj; + struct list_head objects; + int i, ret = 0; + INIT_LIST_HEAD(&objects); spin_lock(&file->table_lock); + /* Grab a reference to the object and release the lock so we can lookup + * or create the VMA without using GFP_ATOMIC */ for (i = 0; i < args->buffer_count; i++) { - struct drm_i915_gem_object *obj; - obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); if (obj == NULL) { spin_unlock(&file->table_lock); DRM_DEBUG("Invalid object handle %d at index %d\n", exec[i].handle, i); - return -ENOENT; + ret = -ENOENT; + goto out; } - if (!list_empty(&obj->exec_list)) { + if (!list_empty(&obj->obj_exec_link)) { spin_unlock(&file->table_lock); DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", obj, exec[i].handle, i); - return -EINVAL; + ret = -EINVAL; + goto out; } drm_gem_object_reference(&obj->base); - list_add_tail(&obj->exec_list, &eb->objects); + list_add_tail(&obj->obj_exec_link, &objects); + } + spin_unlock(&file->table_lock); - obj->exec_entry = &exec[i]; + i = 0; + list_for_each_entry(obj, &objects, obj_exec_link) { + struct i915_vma *vma; + + vma = i915_gem_obj_lookup_or_create_vma(obj, vm); + if (IS_ERR(vma)) { + /* XXX: We don't need an error path fro vma because if + * the vma was created just for this execbuf, object + * unreference should kill it off.*/ + DRM_DEBUG("Failed to lookup VMA\n"); + ret = PTR_ERR(vma); + goto out; + } + + list_add_tail(&vma->exec_list, &eb->vmas); + + vma->exec_entry = &exec[i]; if (eb->and < 0) { - eb->lut[i] = obj; + eb->lut[i] = vma; } else { uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle; - obj->exec_handle = handle; - hlist_add_head(&obj->exec_node, + vma->exec_handle = handle; + hlist_add_head(&vma->exec_node, &eb->buckets[handle & eb->and]); } + ++i; } - spin_unlock(&file->table_lock); - return 0; + +out: + while (!list_empty(&objects)) { + obj = list_first_entry(&objects, + struct drm_i915_gem_object, + obj_exec_link); + list_del_init(&obj->obj_exec_link); + if (ret) + drm_gem_object_unreference(&obj->base); + } + return ret; } -static struct drm_i915_gem_object * -eb_get_object(struct eb_objects *eb, unsigned long handle) +static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) { if (eb->and < 0) { if (handle >= -eb->and) @@ -139,27 +172,25 @@ eb_get_object(struct eb_objects *eb, unsigned long handle) head = &eb->buckets[handle & eb->and]; hlist_for_each(node, head) { - struct drm_i915_gem_object *obj; + struct i915_vma *vma; - obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); - if (obj->exec_handle == handle) - return obj; + vma = hlist_entry(node, struct i915_vma, exec_node); + if (vma->exec_handle == handle) + return vma; } return NULL; } } -static void -eb_destroy(struct eb_objects *eb) -{ - while (!list_empty(&eb->objects)) { - struct drm_i915_gem_object *obj; +static void eb_destroy(struct eb_vmas *eb) { + while (!list_empty(&eb->vmas)) { + struct i915_vma *vma; - obj = list_first_entry(&eb->objects, - struct drm_i915_gem_object, + vma = list_first_entry(&eb->vmas, + struct i915_vma, exec_list); - list_del_init(&obj->exec_list); - drm_gem_object_unreference(&obj->base); + list_del_init(&vma->exec_list); + drm_gem_object_unreference(&vma->obj->base); } kfree(eb); } @@ -223,22 +254,24 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj, static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, - struct eb_objects *eb, + struct eb_vmas *eb, struct drm_i915_gem_relocation_entry *reloc, struct i915_address_space *vm) { struct drm_device *dev = obj->base.dev; struct drm_gem_object *target_obj; struct drm_i915_gem_object *target_i915_obj; + struct i915_vma *target_vma; uint32_t target_offset; int ret = -EINVAL; /* we've already hold a reference to all valid objects */ - target_obj = &eb_get_object(eb, reloc->target_handle)->base; - if (unlikely(target_obj == NULL)) + target_vma = eb_get_vma(eb, reloc->target_handle); + if (unlikely(target_vma == NULL)) return -ENOENT; + target_i915_obj = target_vma->obj; + target_obj = &target_vma->obj->base; - target_i915_obj = to_intel_bo(target_obj); target_offset = i915_gem_obj_ggtt_offset(target_i915_obj); /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and @@ -320,14 +353,13 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, } static int -i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, - struct eb_objects *eb, - struct i915_address_space *vm) +i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, + struct eb_vmas *eb) { #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; struct drm_i915_gem_relocation_entry __user *user_relocs; - struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; int remain, ret; user_relocs = to_user_ptr(entry->relocs_ptr); @@ -346,8 +378,8 @@ i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, do { u64 offset = r->presumed_offset; - ret = i915_gem_execbuffer_relocate_entry(obj, eb, r, - vm); + ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, + vma->vm); if (ret) return ret; @@ -368,17 +400,16 @@ i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, } static int -i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, - struct eb_objects *eb, - struct drm_i915_gem_relocation_entry *relocs, - struct i915_address_space *vm) +i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma, + struct eb_vmas *eb, + struct drm_i915_gem_relocation_entry *relocs) { - const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; + const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; int i, ret; for (i = 0; i < entry->relocation_count; i++) { - ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i], - vm); + ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], + vma->vm); if (ret) return ret; } @@ -387,10 +418,10 @@ i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, } static int -i915_gem_execbuffer_relocate(struct eb_objects *eb, +i915_gem_execbuffer_relocate(struct eb_vmas *eb, struct i915_address_space *vm) { - struct drm_i915_gem_object *obj; + struct i915_vma *vma; int ret = 0; /* This is the fast path and we cannot handle a pagefault whilst @@ -401,8 +432,8 @@ i915_gem_execbuffer_relocate(struct eb_objects *eb, * lockdep complains vehemently. */ pagefault_disable(); - list_for_each_entry(obj, &eb->objects, exec_list) { - ret = i915_gem_execbuffer_relocate_object(obj, eb, vm); + list_for_each_entry(vma, &eb->vmas, exec_list) { + ret = i915_gem_execbuffer_relocate_vma(vma, eb); if (ret) break; } @@ -415,31 +446,32 @@ i915_gem_execbuffer_relocate(struct eb_objects *eb, #define __EXEC_OBJECT_HAS_FENCE (1<<30) static int -need_reloc_mappable(struct drm_i915_gem_object *obj) +need_reloc_mappable(struct i915_vma *vma) { - struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; - return entry->relocation_count && !use_cpu_reloc(obj); + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; + return entry->relocation_count && !use_cpu_reloc(vma->obj) && + i915_is_ggtt(vma->vm); } static int -i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, - struct intel_ring_buffer *ring, - struct i915_address_space *vm, - bool *need_reloc) +i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, + struct intel_ring_buffer *ring, + bool *need_reloc) { - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; + struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; bool need_fence, need_mappable; + struct drm_i915_gem_object *obj = vma->obj; int ret; need_fence = has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(obj); + need_mappable = need_fence || need_reloc_mappable(vma); - ret = i915_gem_object_pin(obj, vm, entry->alignment, need_mappable, + ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, need_mappable, false); if (ret) return ret; @@ -467,8 +499,8 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, obj->has_aliasing_ppgtt_mapping = 1; } - if (entry->offset != i915_gem_obj_offset(obj, vm)) { - entry->offset = i915_gem_obj_offset(obj, vm); + if (entry->offset != vma->node.start) { + entry->offset = vma->node.start; *need_reloc = true; } @@ -485,14 +517,15 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, } static void -i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj) +i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry; + struct drm_i915_gem_object *obj = vma->obj; - if (!i915_gem_obj_bound_any(obj)) + if (!drm_mm_node_allocated(&vma->node)) return; - entry = obj->exec_entry; + entry = vma->exec_entry; if (entry->flags & __EXEC_OBJECT_HAS_FENCE) i915_gem_object_unpin_fence(obj); @@ -505,41 +538,40 @@ i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj) static int i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, - struct list_head *objects, - struct i915_address_space *vm, + struct list_head *vmas, bool *need_relocs) { struct drm_i915_gem_object *obj; - struct list_head ordered_objects; + struct i915_vma *vma; + struct list_head ordered_vmas; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; int retry; - INIT_LIST_HEAD(&ordered_objects); - while (!list_empty(objects)) { + INIT_LIST_HEAD(&ordered_vmas); + while (!list_empty(vmas)) { struct drm_i915_gem_exec_object2 *entry; bool need_fence, need_mappable; - obj = list_first_entry(objects, - struct drm_i915_gem_object, - exec_list); - entry = obj->exec_entry; + vma = list_first_entry(vmas, struct i915_vma, exec_list); + obj = vma->obj; + entry = vma->exec_entry; need_fence = has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(obj); + need_mappable = need_fence || need_reloc_mappable(vma); if (need_mappable) - list_move(&obj->exec_list, &ordered_objects); + list_move(&vma->exec_list, &ordered_vmas); else - list_move_tail(&obj->exec_list, &ordered_objects); + list_move_tail(&vma->exec_list, &ordered_vmas); obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; obj->base.pending_write_domain = 0; obj->pending_fenced_gpu_access = false; } - list_splice(&ordered_objects, objects); + list_splice(&ordered_vmas, vmas); /* Attempt to pin all of the buffers into the GTT. * This is done in 3 phases: @@ -558,47 +590,47 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, int ret = 0; /* Unbind any ill-fitting objects or pin. */ - list_for_each_entry(obj, objects, exec_list) { - struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; + list_for_each_entry(vma, vmas, exec_list) { + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; bool need_fence, need_mappable; - u32 obj_offset; - if (!i915_gem_obj_bound(obj, vm)) + obj = vma->obj; + + if (!drm_mm_node_allocated(&vma->node)) continue; - obj_offset = i915_gem_obj_offset(obj, vm); need_fence = has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(obj); + need_mappable = need_fence || need_reloc_mappable(vma); WARN_ON((need_mappable || need_fence) && - !i915_is_ggtt(vm)); + !i915_is_ggtt(vma->vm)); if ((entry->alignment && - obj_offset & (entry->alignment - 1)) || + vma->node.start & (entry->alignment - 1)) || (need_mappable && !obj->map_and_fenceable)) - ret = i915_vma_unbind(i915_gem_obj_to_vma(obj, vm)); + ret = i915_vma_unbind(vma); else - ret = i915_gem_execbuffer_reserve_object(obj, ring, vm, need_relocs); + ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); if (ret) goto err; } /* Bind fresh objects */ - list_for_each_entry(obj, objects, exec_list) { - if (i915_gem_obj_bound(obj, vm)) + list_for_each_entry(vma, vmas, exec_list) { + if (drm_mm_node_allocated(&vma->node)) continue; - ret = i915_gem_execbuffer_reserve_object(obj, ring, vm, need_relocs); + ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); if (ret) goto err; } err: /* Decrement pin count for bound objects */ - list_for_each_entry(obj, objects, exec_list) - i915_gem_execbuffer_unreserve_object(obj); + list_for_each_entry(vma, vmas, exec_list) + i915_gem_execbuffer_unreserve_vma(vma); if (ret != -ENOSPC || retry++) return ret; @@ -614,24 +646,27 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, struct drm_i915_gem_execbuffer2 *args, struct drm_file *file, struct intel_ring_buffer *ring, - struct eb_objects *eb, - struct drm_i915_gem_exec_object2 *exec, - struct i915_address_space *vm) + struct eb_vmas *eb, + struct drm_i915_gem_exec_object2 *exec) { struct drm_i915_gem_relocation_entry *reloc; - struct drm_i915_gem_object *obj; + struct i915_address_space *vm; + struct i915_vma *vma; bool need_relocs; int *reloc_offset; int i, total, ret; int count = args->buffer_count; + if (WARN_ON(list_empty(&eb->vmas))) + return 0; + + vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm; + /* We may process another execbuffer during the unlock... */ - while (!list_empty(&eb->objects)) { - obj = list_first_entry(&eb->objects, - struct drm_i915_gem_object, - exec_list); - list_del_init(&obj->exec_list); - drm_gem_object_unreference(&obj->base); + while (!list_empty(&eb->vmas)) { + vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); + list_del_init(&vma->exec_list); + drm_gem_object_unreference(&vma->obj->base); } mutex_unlock(&dev->struct_mutex); @@ -695,20 +730,19 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, /* reacquire the objects */ eb_reset(eb); - ret = eb_lookup_objects(eb, exec, args, file); + ret = eb_lookup_vmas(eb, exec, args, vm, file); if (ret) goto err; need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; - ret = i915_gem_execbuffer_reserve(ring, &eb->objects, vm, &need_relocs); + ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs); if (ret) goto err; - list_for_each_entry(obj, &eb->objects, exec_list) { - int offset = obj->exec_entry - exec; - ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, - reloc + reloc_offset[offset], - vm); + list_for_each_entry(vma, &eb->vmas, exec_list) { + int offset = vma->exec_entry - exec; + ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb, + reloc + reloc_offset[offset]); if (ret) goto err; } @@ -727,14 +761,15 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, static int i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, - struct list_head *objects) + struct list_head *vmas) { - struct drm_i915_gem_object *obj; + struct i915_vma *vma; uint32_t flush_domains = 0; bool flush_chipset = false; int ret; - list_for_each_entry(obj, objects, exec_list) { + list_for_each_entry(vma, vmas, exec_list) { + struct drm_i915_gem_object *obj = vma->obj; ret = i915_gem_object_sync(obj, ring); if (ret) return ret; @@ -809,13 +844,13 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec, } static void -i915_gem_execbuffer_move_to_active(struct list_head *objects, - struct i915_address_space *vm, +i915_gem_execbuffer_move_to_active(struct list_head *vmas, struct intel_ring_buffer *ring) { - struct drm_i915_gem_object *obj; + struct i915_vma *vma; - list_for_each_entry(obj, objects, exec_list) { + list_for_each_entry(vma, vmas, exec_list) { + struct drm_i915_gem_object *obj = vma->obj; u32 old_read = obj->base.read_domains; u32 old_write = obj->base.write_domain; @@ -825,8 +860,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, obj->base.read_domains = obj->base.pending_read_domains; obj->fenced_gpu_access = obj->pending_fenced_gpu_access; - /* FIXME: This lookup gets fixed later <-- danvet */ - list_move_tail(&i915_gem_obj_to_vma(obj, vm)->mm_list, &vm->active_list); + list_move_tail(&vma->mm_list, &vma->vm->active_list); i915_gem_object_move_to_active(obj, ring); if (obj->base.write_domain) { obj->dirty = 1; @@ -885,7 +919,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct i915_address_space *vm) { drm_i915_private_t *dev_priv = dev->dev_private; - struct eb_objects *eb; + struct eb_vmas *eb; struct drm_i915_gem_object *batch_obj; struct drm_clip_rect *cliprects = NULL; struct intel_ring_buffer *ring; @@ -1025,7 +1059,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto pre_mutex_err; } - eb = eb_create(args); + eb = eb_create(args, vm); if (eb == NULL) { mutex_unlock(&dev->struct_mutex); ret = -ENOMEM; @@ -1033,18 +1067,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } /* Look up object handles */ - ret = eb_lookup_objects(eb, exec, args, file); + ret = eb_lookup_vmas(eb, exec, args, vm, file); if (ret) goto err; /* take note of the batch buffer before we might reorder the lists */ - batch_obj = list_entry(eb->objects.prev, - struct drm_i915_gem_object, - exec_list); + batch_obj = list_entry(eb->vmas.prev, struct i915_vma, exec_list)->obj; /* Move the objects en-masse into the GTT, evicting if necessary. */ need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; - ret = i915_gem_execbuffer_reserve(ring, &eb->objects, vm, &need_relocs); + ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs); if (ret) goto err; @@ -1054,7 +1086,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) { if (ret == -EFAULT) { ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring, - eb, exec, vm); + eb, exec); BUG_ON(!mutex_is_locked(&dev->struct_mutex)); } if (ret) @@ -1076,7 +1108,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping) i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level); - ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->objects); + ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas); if (ret) goto err; @@ -1131,7 +1163,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags); - i915_gem_execbuffer_move_to_active(&eb->objects, vm, ring); + i915_gem_execbuffer_move_to_active(&eb->vmas, ring); i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); err: -- GitLab From e656a6cba0febf12a9838882b891e1c5917c7a8b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 14 Aug 2013 14:14:04 +0200 Subject: [PATCH 00037/10742] drm/i915: inline vma_create into lookup_or_create_vma In the execbuf code we don't clean up any vmas which ended up not getting bound for code simplicity. To make sure that we don't end up creating multiple vma for the same vm kill the somewhat dangerous vma_create function and inline it into lookup_or_create. This is just a safety measure to prevent surprises in the future. Also update the somewhat confused comment in the execbuf code and clarify what kind of magic is going on with a new one. v2: Keep the function separate as requested by Chris. But give it a __ prefix for paranoia and move it tighter together with the other vma stuff. Cc: Chris Wilson Cc: Ben Widawsky Acked-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 - drivers/gpu/drm/i915/i915_gem.c | 50 +++++++++++----------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 +++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 2 +- 4 files changed, 34 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b6494b24098d..d5cff9718103 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1826,8 +1826,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, size_t size); void i915_gem_free_object(struct drm_gem_object *obj); -struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm); void i915_gem_vma_destroy(struct i915_vma *vma); int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c4c56f98d6e6..6820d0a1231f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4109,8 +4109,19 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) i915_gem_object_free(obj); } -struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj, +struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm) +{ + struct i915_vma *vma; + list_for_each_entry(vma, &obj->vma_list, vma_link) + if (vma->vm == vm) + return vma; + + return NULL; +} + +static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm) { struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); if (vma == NULL) @@ -4131,6 +4142,19 @@ struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj, return vma; } +struct i915_vma * +i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, + struct i915_address_space *vm) +{ + struct i915_vma *vma; + + vma = i915_gem_obj_to_vma(obj, vm); + if (!vma) + vma = __i915_gem_vma_create(obj, vm); + + return vma; +} + void i915_gem_vma_destroy(struct i915_vma *vma) { WARN_ON(vma->node.allocated); @@ -4857,27 +4881,3 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, return 0; } - -struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm) -{ - struct i915_vma *vma; - list_for_each_entry(vma, &obj->vma_list, vma_link) - if (vma->vm == vm) - return vma; - - return NULL; -} - -struct i915_vma * -i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm) -{ - struct i915_vma *vma; - - vma = i915_gem_obj_to_vma(obj, vm); - if (!vma) - vma = i915_gem_vma_create(obj, vm); - - return vma; -} diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 5dcfd4f59f87..93c8b28ff2e1 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -123,11 +123,16 @@ eb_lookup_vmas(struct eb_vmas *eb, list_for_each_entry(obj, &objects, obj_exec_link) { struct i915_vma *vma; + /* + * NOTE: We can leak any vmas created here when something fails + * later on. But that's no issue since vma_unbind can deal with + * vmas which are not actually bound. And since only + * lookup_or_create exists as an interface to get at the vma + * from the (obj, vm) we don't run the risk of creating + * duplicated vmas for the same vm. + */ vma = i915_gem_obj_lookup_or_create_vma(obj, vm); if (IS_ERR(vma)) { - /* XXX: We don't need an error path fro vma because if - * the vma was created just for this execbuf, object - * unreference should kill it off.*/ DRM_DEBUG("Failed to lookup VMA\n"); ret = PTR_ERR(vma); goto out; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 9969d10b80f5..b902f2afc8e2 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -392,7 +392,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, if (gtt_offset == I915_GTT_OFFSET_NONE) return obj; - vma = i915_gem_vma_create(obj, ggtt); + vma = i915_gem_obj_lookup_or_create_vma(obj, ggtt); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err_out; -- GitLab From aaa0566792dc7ae68deb1959663581ea8c75d311 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 20 Aug 2013 12:56:40 +0100 Subject: [PATCH 00038/10742] drm/i915: Don't destroy the vma placeholder during execbuffer reservation The execbuffer handle and exec_link were moved from the object into the vma. As the vma may be unbound and destroyed whilst attempting to reserve the execbuffer objects (either through a forced unbind to fix up a misalignment or through an evict-everything call) we need to prevent the free of the i915_vma itself. Otherwise not only is the list of objects to reserve corrupt, but we continue to reference stale vma entries. Fixes kernel crash with i-g-t/gem_evict_everything This regression has been introduced in commit 04038a515d6eda6dd0857c0ade0b3950d372f4c0 Author: Ben Widawsky AuthorDate: Wed Aug 14 11:38:36 2013 +0200 drm/i915: Convert execbuf code to use vmas Reported-by: Dan Carpenter References: http://www.spinics.net/lists/intel-gfx/msg32038.html Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68298 Signed-off-by: Chris Wilson Cc: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6820d0a1231f..29b7e1c32b84 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4159,6 +4159,11 @@ void i915_gem_vma_destroy(struct i915_vma *vma) { WARN_ON(vma->node.allocated); list_del(&vma->vma_link); + + /* Keep the vma as a placeholder in the execbuffer reservation lists */ + if (!list_empty(&vma->exec_list)) + return; + kfree(vma); } -- GitLab From b93dab6e9d286ec2cd95b28078afdfa6dd515205 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 26 Aug 2013 11:23:47 +0200 Subject: [PATCH 00039/10742] drm/i915: More vma fixups around unbind/destroy The important bugfix here is that we must not unlink the vma when we keep it around as a placeholder for the execbuf code. Since then we won't find it again when execbuf gets interrupt and restarted and create a 2nd vma. And since the code as-is isn't fit yet to deal with more than one vma, hilarity ensues. Specifically the dma map/unmap of the sg table isn't adjusted for multiple vmas yet and will blow up like this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [] i915_gem_gtt_finish_object+0x73/0xc8 [i915] PGD 56bb5067 PUD ad3dd067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: tcp_lp ppdev parport_pc lp parport ipv6 dm_mod dcdbas snd_hda_codec_hdmi pcspkr snd_hda_codec_realtek serio_raw i2c_i801 iTCO_wdt iTCO_vendor_support snd_hda_intel snd_hda_codec lpc_ich snd_hwdep mfd_core snd_pcm snd_page_alloc snd_timer snd soundcore acpi_cpufreq i915 video button drm_kms_helper drm mperf freq_table CPU: 1 PID: 16650 Comm: fbo-maxsize Not tainted 3.11.0-rc4_nightlytop_d93f59_debug_20130814_+ #6957 Hardware name: Dell Inc. OptiPlex 9010/03JR84, BIOS A01 05/04/2012 task: ffff8800563b3f00 ti: ffff88004bdf4000 task.ti: ffff88004bdf4000 RIP: 0010:[] [] i915_gem_gtt_finish_object+0x73/0xc8 [i915] RSP: 0018:ffff88004bdf5958 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8801135e0000 RCX: ffff8800ad3bf8e0 RDX: ffff8800ad3bf8e0 RSI: 0000000000000000 RDI: ffff8801007ee780 RBP: ffff88004bdf5978 R08: ffff8800ad3bf8e0 R09: 0000000000000000 R10: ffffffff86ca1810 R11: ffff880036a17101 R12: ffff8801007ee780 R13: 0000000000018001 R14: ffff880118c4e000 R15: ffff8801007ee780 FS: 00007f401a0ce740(0000) GS:ffff88011e280000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000005635c000 CR4: 00000000001407e0 Stack: ffff8801007ee780 ffff88005c253180 0000000000018000 ffff8801135e0000 ffff88004bdf59a8 ffffffffa0088e55 0000000000000011 ffff8801007eec00 0000000000018000 ffff880036a17101 ffff88004bdf5a08 ffffffffa0089026 Call Trace: [] i915_vma_unbind+0xdf/0x1ab [i915] [] __i915_gem_shrink+0x105/0x177 [i915] [] i915_gem_object_get_pages_gtt+0x108/0x309 [i915] [] i915_gem_object_get_pages+0x61/0x90 [i915] [] ? gen6_ppgtt_insert_entries+0x103/0x125 [i915] [] i915_gem_object_pin+0x1fa/0x5df [i915] [] i915_gem_execbuffer_reserve_object.isra.6+0x8d/0x1bc [i915] [] i915_gem_execbuffer_reserve+0x229/0x367 [i915] [] i915_gem_do_execbuffer.isra.12+0x4dc/0xf3a [i915] [] ? might_fault+0x40/0x90 [] i915_gem_execbuffer2+0x187/0x222 [i915] [] drm_ioctl+0x308/0x442 [drm] [] ? i915_gem_execbuffer+0x3ae/0x3ae [i915] [] ? __do_page_fault+0x3dd/0x481 [] vfs_ioctl+0x26/0x39 [] do_vfs_ioctl+0x40e/0x451 [] ? sysret_check+0x1b/0x56 [] SyS_ioctl+0x57/0x87 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b Code: 48 c7 c6 84 30 0e a0 31 c0 e8 d0 e9 f7 ff bf c6 a7 00 00 e8 07 af 2c e1 41 f6 84 24 03 01 00 00 10 75 44 49 8b 84 24 08 01 00 00 <8b> 50 08 48 8b 30 49 8b 86 b0 04 00 00 48 89 c7 48 81 c7 98 00 RIP [] i915_gem_gtt_finish_object+0x73/0xc8 [i915] RSP CR2: 0000000000000008 As a consequence we need to change the "only one vma for now" check in vma_unbind - since vma_destroy isn't always called the obj->vma_list might not be empty. Instead check that the vma list is singular at the beginning of vma_unbind. This is also more symmetric with bind_to_vm. This fixes the igt/gem_evict_everything|alignment testcases. v2: - Add a paranoid WARN to mark_free in the eviction code to make sure we never try to evict a vma used by the execbuf code right now. - Move the check for a temporary execbuf vma into vma_destroy - otherwise the failure path cleanup in bind_to_vm will blow up. Our first attempting at fixing this was commit 1be81a2f2cfd8789a627401d470423358fba2d76 Author: Chris Wilson Date: Tue Aug 20 12:56:40 2013 +0100 drm/i915: Don't destroy the vma placeholder during execbuffer reservation Squash with this when merging! v3: Improvements suggested in Chris' review: - Move the WARN_ON in vma_destroy that checks for vmas with an drm_mm allocation before the early return. - Bail out if we hit the WARN in mark_free to hopefully make the kernel survive for long enough to capture it. Cc: Chris Wilson Cc: Ben Widawsky Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68298 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68171 Tested-by: lu hua (v2) Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 10 ++++++---- drivers/gpu/drm/i915/i915_gem_evict.c | 3 +++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 29b7e1c32b84..d57368d5fc1d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2604,6 +2604,9 @@ int i915_vma_unbind(struct i915_vma *vma) drm_i915_private_t *dev_priv = obj->base.dev->dev_private; int ret; + /* For now we only ever use 1 vma per object */ + WARN_ON(!list_is_singular(&obj->vma_list)); + if (list_empty(&vma->vma_link)) return 0; @@ -2652,9 +2655,7 @@ int i915_vma_unbind(struct i915_vma *vma) i915_gem_vma_destroy(vma); /* Since the unbound list is global, only move to that list if - * no more VMAs exist. - * NB: Until we have real VMAs there will only ever be one */ - WARN_ON(!list_empty(&obj->vma_list)); + * no more VMAs exist. */ if (list_empty(&obj->vma_list)) list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); @@ -4158,12 +4159,13 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, void i915_gem_vma_destroy(struct i915_vma *vma) { WARN_ON(vma->node.allocated); - list_del(&vma->vma_link); /* Keep the vma as a placeholder in the execbuffer reservation lists */ if (!list_empty(&vma->exec_list)) return; + list_del(&vma->vma_link); + kfree(vma); } diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 91b700155850..cc8974fbcf31 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -37,6 +37,9 @@ mark_free(struct i915_vma *vma, struct list_head *unwind) if (vma->obj->pin_count) return false; + if (WARN_ON(!list_empty(&vma->exec_list))) + return false; + list_add(&vma->exec_list, unwind); return drm_mm_scan_add_block(&vma->node); } -- GitLab From 2cc86b826070cf312d7b0571e383c56d31a1fe5c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 26 Aug 2013 19:51:00 -0300 Subject: [PATCH 00040/10742] drm/i915: Always prefer CPU relocations with LLC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A follow-on to the update of the LLC coherency logic is that we can rely on the LLC being coherent with the CS for rewriting batchbuffers irrespective of their cache domain. (This should have no effect currently as all the batch buffers are expected to be I915_CACHE_LLC and so using the cpu relocation path anyway.) Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 93c8b28ff2e1..e519f9f6e5cd 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -202,7 +202,8 @@ static void eb_destroy(struct eb_vmas *eb) { static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) { - return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || + return (HAS_LLC(obj->base.dev) || + obj->base.write_domain == I915_GEM_DOMAIN_CPU || !obj->map_and_fenceable || obj->cache_level != I915_CACHE_NONE); } @@ -215,7 +216,7 @@ relocate_entry_cpu(struct drm_i915_gem_object *obj, char *vaddr; int ret = -EINVAL; - ret = i915_gem_object_set_to_cpu_domain(obj, 1); + ret = i915_gem_object_set_to_cpu_domain(obj, true); if (ret) return ret; -- GitLab From b89232732f642bfa24f9e252dd241ddfb40d3817 Mon Sep 17 00:00:00 2001 From: Shobhit Kumar Date: Tue, 27 Aug 2013 15:12:13 +0300 Subject: [PATCH 00041/10742] drm: add MIPI DSI encoder and connector types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Shobhit Kumar Signed-off-by: Jani Nikula Acked-by: Dave Airlie Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 2 ++ include/uapi/drm/drm_mode.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index bff2fa941f60..e79577cb4665 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -202,6 +202,7 @@ static struct drm_conn_prop_enum_list drm_connector_enum_list[] = { DRM_MODE_CONNECTOR_TV, "TV" }, { DRM_MODE_CONNECTOR_eDP, "eDP" }, { DRM_MODE_CONNECTOR_VIRTUAL, "Virtual" }, + { DRM_MODE_CONNECTOR_DSI, "DSI" }, }; static const struct drm_prop_enum_list drm_encoder_enum_list[] = @@ -211,6 +212,7 @@ static const struct drm_prop_enum_list drm_encoder_enum_list[] = { DRM_MODE_ENCODER_LVDS, "LVDS" }, { DRM_MODE_ENCODER_TVDAC, "TV" }, { DRM_MODE_ENCODER_VIRTUAL, "Virtual" }, + { DRM_MODE_ENCODER_DSI, "DSI" }, }; void drm_connector_ida_init(void) diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 550811712f78..113d32457fa4 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -165,6 +165,7 @@ struct drm_mode_get_plane_res { #define DRM_MODE_ENCODER_LVDS 3 #define DRM_MODE_ENCODER_TVDAC 4 #define DRM_MODE_ENCODER_VIRTUAL 5 +#define DRM_MODE_ENCODER_DSI 6 struct drm_mode_get_encoder { __u32 encoder_id; @@ -203,6 +204,7 @@ struct drm_mode_get_encoder { #define DRM_MODE_CONNECTOR_TV 13 #define DRM_MODE_CONNECTOR_eDP 14 #define DRM_MODE_CONNECTOR_VIRTUAL 15 +#define DRM_MODE_CONNECTOR_DSI 16 struct drm_mode_get_connector { -- GitLab From e9f882a3f1e4ffabe0730af16d5f6d51737515a5 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 27 Aug 2013 15:12:14 +0300 Subject: [PATCH 00042/10742] drm/i915: add more VLV IOSF sideband ports accessors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For GPIO NC, CCK, CCU, and GPS CORE. Signed-off-by: Jani Nikula Signed-off-by: Shobhit Kumar Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 8 ++++ drivers/gpu/drm/i915/i915_reg.h | 4 ++ drivers/gpu/drm/i915/intel_sideband.c | 56 +++++++++++++++++++++++++++ 3 files changed, 68 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d5cff9718103..e2628579dcbe 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2248,6 +2248,14 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val) u32 vlv_punit_read(struct drm_i915_private *dev_priv, u8 addr); void vlv_punit_write(struct drm_i915_private *dev_priv, u8 addr, u32 val); u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr); +u32 vlv_gpio_nc_read(struct drm_i915_private *dev_priv, u32 reg); +void vlv_gpio_nc_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); +u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg); +void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); +u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg); +void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); +u32 vlv_gps_core_read(struct drm_i915_private *dev_priv, u32 reg); +void vlv_gps_core_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); u32 vlv_dpio_read(struct drm_i915_private *dev_priv, int reg); void vlv_dpio_write(struct drm_i915_private *dev_priv, int reg, u32 val); u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c159e1a6810f..8f4c8b69ebc1 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -346,6 +346,10 @@ #define IOSF_PORT_PUNIT 0x4 #define IOSF_PORT_NC 0x11 #define IOSF_PORT_DPIO 0x12 +#define IOSF_PORT_GPIO_NC 0x13 +#define IOSF_PORT_CCK 0x14 +#define IOSF_PORT_CCU 0xA9 +#define IOSF_PORT_GPS_CORE 0x48 #define VLV_IOSF_DATA (VLV_DISPLAY_BASE + 0x2104) #define VLV_IOSF_ADDR (VLV_DISPLAY_BASE + 0x2108) diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 9a0e6c5ea540..0a4167019769 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -101,6 +101,62 @@ u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr) return val; } +u32 vlv_gpio_nc_read(struct drm_i915_private *dev_priv, u32 reg) +{ + u32 val = 0; + vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_GPIO_NC, + PUNIT_OPCODE_REG_READ, reg, &val); + return val; +} + +void vlv_gpio_nc_write(struct drm_i915_private *dev_priv, u32 reg, u32 val) +{ + vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_GPIO_NC, + PUNIT_OPCODE_REG_WRITE, reg, &val); +} + +u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg) +{ + u32 val = 0; + vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_CCK, + PUNIT_OPCODE_REG_READ, reg, &val); + return val; +} + +void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val) +{ + vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_CCK, + PUNIT_OPCODE_REG_WRITE, reg, &val); +} + +u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg) +{ + u32 val = 0; + vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_CCU, + PUNIT_OPCODE_REG_READ, reg, &val); + return val; +} + +void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val) +{ + vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_CCU, + PUNIT_OPCODE_REG_WRITE, reg, &val); +} + +u32 vlv_gps_core_read(struct drm_i915_private *dev_priv, u32 reg) +{ + u32 val = 0; + vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_GPS_CORE, + PUNIT_OPCODE_REG_READ, reg, &val); + return val; +} + +void vlv_gps_core_write(struct drm_i915_private *dev_priv, u32 reg, u32 val) +{ + vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_GPS_CORE, + PUNIT_OPCODE_REG_WRITE, reg, &val); +} + u32 vlv_dpio_read(struct drm_i915_private *dev_priv, int reg) { u32 val = 0; -- GitLab From b6ec10b36566c3eb330f4c03c1b00a02c974fd21 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 27 Aug 2013 15:12:15 +0300 Subject: [PATCH 00043/10742] drm/i915: add VLV pipeconf bit definition for DSI PLL lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: Add comment this is pipe A only (Ville) Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8f4c8b69ebc1..7a7b9c2f84cb 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2986,6 +2986,7 @@ #define PIPECONF_DISABLE 0 #define PIPECONF_DOUBLE_WIDE (1<<30) #define I965_PIPECONF_ACTIVE (1<<30) +#define PIPECONF_DSI_PLL_LOCKED (1<<29) /* vlv & pipe A only */ #define PIPECONF_FRAME_START_DELAY_MASK (3<<27) #define PIPECONF_SINGLE_WIDE 0 #define PIPECONF_PIPE_UNLOCKED 0 -- GitLab From 3230bf14c14e04d49525d172db53a55e153447a7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 27 Aug 2013 15:12:16 +0300 Subject: [PATCH 00044/10742] drm/i915: add MIPI DSI register definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add definitions for VLV MIPI DSI registers. v2: Small fixes per Ville's review comments. Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 410 ++++++++++++++++++++++++++++++++ 1 file changed, 410 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7a7b9c2f84cb..b26cf9b5b1e4 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5121,4 +5121,414 @@ #define PIPE_CSC_POSTOFF_ME(pipe) _PIPE(pipe, _PIPE_A_CSC_POSTOFF_ME, _PIPE_B_CSC_POSTOFF_ME) #define PIPE_CSC_POSTOFF_LO(pipe) _PIPE(pipe, _PIPE_A_CSC_POSTOFF_LO, _PIPE_B_CSC_POSTOFF_LO) +/* VLV MIPI registers */ + +#define _MIPIA_PORT_CTRL (VLV_DISPLAY_BASE + 0x61190) +#define _MIPIB_PORT_CTRL (VLV_DISPLAY_BASE + 0x61700) +#define MIPI_PORT_CTRL(pipe) _PIPE(pipe, _MIPIA_PORT_CTRL, _MIPIB_PORT_CTRL) +#define DPI_ENABLE (1 << 31) /* A + B */ +#define MIPIA_MIPI4DPHY_DELAY_COUNT_SHIFT 27 +#define MIPIA_MIPI4DPHY_DELAY_COUNT_MASK (0xf << 27) +#define DUAL_LINK_MODE_MASK (1 << 26) +#define DUAL_LINK_MODE_FRONT_BACK (0 << 26) +#define DUAL_LINK_MODE_PIXEL_ALTERNATIVE (1 << 26) +#define DITHERING_ENABLE (1 << 25) /* A + B */ +#define FLOPPED_HSTX (1 << 23) +#define DE_INVERT (1 << 19) /* XXX */ +#define MIPIA_FLISDSI_DELAY_COUNT_SHIFT 18 +#define MIPIA_FLISDSI_DELAY_COUNT_MASK (0xf << 18) +#define AFE_LATCHOUT (1 << 17) +#define LP_OUTPUT_HOLD (1 << 16) +#define MIPIB_FLISDSI_DELAY_COUNT_HIGH_SHIFT 15 +#define MIPIB_FLISDSI_DELAY_COUNT_HIGH_MASK (1 << 15) +#define MIPIB_MIPI4DPHY_DELAY_COUNT_SHIFT 11 +#define MIPIB_MIPI4DPHY_DELAY_COUNT_MASK (0xf << 11) +#define CSB_SHIFT 9 +#define CSB_MASK (3 << 9) +#define CSB_20MHZ (0 << 9) +#define CSB_10MHZ (1 << 9) +#define CSB_40MHZ (2 << 9) +#define BANDGAP_MASK (1 << 8) +#define BANDGAP_PNW_CIRCUIT (0 << 8) +#define BANDGAP_LNC_CIRCUIT (1 << 8) +#define MIPIB_FLISDSI_DELAY_COUNT_LOW_SHIFT 5 +#define MIPIB_FLISDSI_DELAY_COUNT_LOW_MASK (7 << 5) +#define TEARING_EFFECT_DELAY (1 << 4) /* A + B */ +#define TEARING_EFFECT_SHIFT 2 /* A + B */ +#define TEARING_EFFECT_MASK (3 << 2) +#define TEARING_EFFECT_OFF (0 << 2) +#define TEARING_EFFECT_DSI (1 << 2) +#define TEARING_EFFECT_GPIO (2 << 2) +#define LANE_CONFIGURATION_SHIFT 0 +#define LANE_CONFIGURATION_MASK (3 << 0) +#define LANE_CONFIGURATION_4LANE (0 << 0) +#define LANE_CONFIGURATION_DUAL_LINK_A (1 << 0) +#define LANE_CONFIGURATION_DUAL_LINK_B (2 << 0) + +#define _MIPIA_TEARING_CTRL (VLV_DISPLAY_BASE + 0x61194) +#define _MIPIB_TEARING_CTRL (VLV_DISPLAY_BASE + 0x61704) +#define MIPI_TEARING_CTRL(pipe) _PIPE(pipe, _MIPIA_TEARING_CTRL, _MIPIB_TEARING_CTRL) +#define TEARING_EFFECT_DELAY_SHIFT 0 +#define TEARING_EFFECT_DELAY_MASK (0xffff << 0) + +/* XXX: all bits reserved */ +#define _MIPIA_AUTOPWG (VLV_DISPLAY_BASE + 0x611a0) + +/* MIPI DSI Controller and D-PHY registers */ + +#define _MIPIA_DEVICE_READY (VLV_DISPLAY_BASE + 0xb000) +#define _MIPIB_DEVICE_READY (VLV_DISPLAY_BASE + 0xb800) +#define MIPI_DEVICE_READY(pipe) _PIPE(pipe, _MIPIA_DEVICE_READY, _MIPIB_DEVICE_READY) +#define BUS_POSSESSION (1 << 3) /* set to give bus to receiver */ +#define ULPS_STATE_MASK (3 << 1) +#define ULPS_STATE_ENTER (2 << 1) +#define ULPS_STATE_EXIT (1 << 1) +#define ULPS_STATE_NORMAL_OPERATION (0 << 1) +#define DEVICE_READY (1 << 0) + +#define _MIPIA_INTR_STAT (VLV_DISPLAY_BASE + 0xb004) +#define _MIPIB_INTR_STAT (VLV_DISPLAY_BASE + 0xb804) +#define MIPI_INTR_STAT(pipe) _PIPE(pipe, _MIPIA_INTR_STAT, _MIPIB_INTR_STAT) +#define _MIPIA_INTR_EN (VLV_DISPLAY_BASE + 0xb008) +#define _MIPIB_INTR_EN (VLV_DISPLAY_BASE + 0xb808) +#define MIPI_INTR_EN(pipe) _PIPE(pipe, _MIPIA_INTR_EN, _MIPIB_INTR_EN) +#define TEARING_EFFECT (1 << 31) +#define SPL_PKT_SENT_INTERRUPT (1 << 30) +#define GEN_READ_DATA_AVAIL (1 << 29) +#define LP_GENERIC_WR_FIFO_FULL (1 << 28) +#define HS_GENERIC_WR_FIFO_FULL (1 << 27) +#define RX_PROT_VIOLATION (1 << 26) +#define RX_INVALID_TX_LENGTH (1 << 25) +#define ACK_WITH_NO_ERROR (1 << 24) +#define TURN_AROUND_ACK_TIMEOUT (1 << 23) +#define LP_RX_TIMEOUT (1 << 22) +#define HS_TX_TIMEOUT (1 << 21) +#define DPI_FIFO_UNDERRUN (1 << 20) +#define LOW_CONTENTION (1 << 19) +#define HIGH_CONTENTION (1 << 18) +#define TXDSI_VC_ID_INVALID (1 << 17) +#define TXDSI_DATA_TYPE_NOT_RECOGNISED (1 << 16) +#define TXCHECKSUM_ERROR (1 << 15) +#define TXECC_MULTIBIT_ERROR (1 << 14) +#define TXECC_SINGLE_BIT_ERROR (1 << 13) +#define TXFALSE_CONTROL_ERROR (1 << 12) +#define RXDSI_VC_ID_INVALID (1 << 11) +#define RXDSI_DATA_TYPE_NOT_REGOGNISED (1 << 10) +#define RXCHECKSUM_ERROR (1 << 9) +#define RXECC_MULTIBIT_ERROR (1 << 8) +#define RXECC_SINGLE_BIT_ERROR (1 << 7) +#define RXFALSE_CONTROL_ERROR (1 << 6) +#define RXHS_RECEIVE_TIMEOUT_ERROR (1 << 5) +#define RX_LP_TX_SYNC_ERROR (1 << 4) +#define RXEXCAPE_MODE_ENTRY_ERROR (1 << 3) +#define RXEOT_SYNC_ERROR (1 << 2) +#define RXSOT_SYNC_ERROR (1 << 1) +#define RXSOT_ERROR (1 << 0) + +#define _MIPIA_DSI_FUNC_PRG (VLV_DISPLAY_BASE + 0xb00c) +#define _MIPIB_DSI_FUNC_PRG (VLV_DISPLAY_BASE + 0xb80c) +#define MIPI_DSI_FUNC_PRG(pipe) _PIPE(pipe, _MIPIA_DSI_FUNC_PRG, _MIPIB_DSI_FUNC_PRG) +#define CMD_MODE_DATA_WIDTH_MASK (7 << 13) +#define CMD_MODE_NOT_SUPPORTED (0 << 13) +#define CMD_MODE_DATA_WIDTH_16_BIT (1 << 13) +#define CMD_MODE_DATA_WIDTH_9_BIT (2 << 13) +#define CMD_MODE_DATA_WIDTH_8_BIT (3 << 13) +#define CMD_MODE_DATA_WIDTH_OPTION1 (4 << 13) +#define CMD_MODE_DATA_WIDTH_OPTION2 (5 << 13) +#define VID_MODE_FORMAT_MASK (0xf << 7) +#define VID_MODE_NOT_SUPPORTED (0 << 7) +#define VID_MODE_FORMAT_RGB565 (1 << 7) +#define VID_MODE_FORMAT_RGB666 (2 << 7) +#define VID_MODE_FORMAT_RGB666_LOOSE (3 << 7) +#define VID_MODE_FORMAT_RGB888 (4 << 7) +#define CMD_MODE_CHANNEL_NUMBER_SHIFT 5 +#define CMD_MODE_CHANNEL_NUMBER_MASK (3 << 5) +#define VID_MODE_CHANNEL_NUMBER_SHIFT 3 +#define VID_MODE_CHANNEL_NUMBER_MASK (3 << 3) +#define DATA_LANES_PRG_REG_SHIFT 0 +#define DATA_LANES_PRG_REG_MASK (7 << 0) + +#define _MIPIA_HS_TX_TIMEOUT (VLV_DISPLAY_BASE + 0xb010) +#define _MIPIB_HS_TX_TIMEOUT (VLV_DISPLAY_BASE + 0xb810) +#define MIPI_HS_TX_TIMEOUT(pipe) _PIPE(pipe, _MIPIA_HS_TX_TIMEOUT, _MIPIB_HS_TX_TIMEOUT) +#define HIGH_SPEED_TX_TIMEOUT_COUNTER_MASK 0xffffff + +#define _MIPIA_LP_RX_TIMEOUT (VLV_DISPLAY_BASE + 0xb014) +#define _MIPIB_LP_RX_TIMEOUT (VLV_DISPLAY_BASE + 0xb814) +#define MIPI_LP_RX_TIMEOUT(pipe) _PIPE(pipe, _MIPIA_LP_RX_TIMEOUT, _MIPIB_LP_RX_TIMEOUT) +#define LOW_POWER_RX_TIMEOUT_COUNTER_MASK 0xffffff + +#define _MIPIA_TURN_AROUND_TIMEOUT (VLV_DISPLAY_BASE + 0xb018) +#define _MIPIB_TURN_AROUND_TIMEOUT (VLV_DISPLAY_BASE + 0xb818) +#define MIPI_TURN_AROUND_TIMEOUT(pipe) _PIPE(pipe, _MIPIA_TURN_AROUND_TIMEOUT, _MIPIB_TURN_AROUND_TIMEOUT) +#define TURN_AROUND_TIMEOUT_MASK 0x3f + +#define _MIPIA_DEVICE_RESET_TIMER (VLV_DISPLAY_BASE + 0xb01c) +#define _MIPIB_DEVICE_RESET_TIMER (VLV_DISPLAY_BASE + 0xb81c) +#define MIPI_DEVICE_RESET_TIMER(pipe) _PIPE(pipe, _MIPIA_DEVICE_RESET_TIMER, _MIPIB_DEVICE_RESET_TIMER) +#define DEVICE_RESET_TIMER_MASK 0xffff + +#define _MIPIA_DPI_RESOLUTION (VLV_DISPLAY_BASE + 0xb020) +#define _MIPIB_DPI_RESOLUTION (VLV_DISPLAY_BASE + 0xb820) +#define MIPI_DPI_RESOLUTION(pipe) _PIPE(pipe, _MIPIA_DPI_RESOLUTION, _MIPIB_DPI_RESOLUTION) +#define VERTICAL_ADDRESS_SHIFT 16 +#define VERTICAL_ADDRESS_MASK (0xffff << 16) +#define HORIZONTAL_ADDRESS_SHIFT 0 +#define HORIZONTAL_ADDRESS_MASK 0xffff + +#define _MIPIA_DBI_FIFO_THROTTLE (VLV_DISPLAY_BASE + 0xb024) +#define _MIPIB_DBI_FIFO_THROTTLE (VLV_DISPLAY_BASE + 0xb824) +#define MIPI_DBI_FIFO_THROTTLE(pipe) _PIPE(pipe, _MIPIA_DBI_FIFO_THROTTLE, _MIPIB_DBI_FIFO_THROTTLE) +#define DBI_FIFO_EMPTY_HALF (0 << 0) +#define DBI_FIFO_EMPTY_QUARTER (1 << 0) +#define DBI_FIFO_EMPTY_7_LOCATIONS (2 << 0) + +/* regs below are bits 15:0 */ +#define _MIPIA_HSYNC_PADDING_COUNT (VLV_DISPLAY_BASE + 0xb028) +#define _MIPIB_HSYNC_PADDING_COUNT (VLV_DISPLAY_BASE + 0xb828) +#define MIPI_HSYNC_PADDING_COUNT(pipe) _PIPE(pipe, _MIPIA_HSYNC_PADDING_COUNT, _MIPIB_HSYNC_PADDING_COUNT) + +#define _MIPIA_HBP_COUNT (VLV_DISPLAY_BASE + 0xb02c) +#define _MIPIB_HBP_COUNT (VLV_DISPLAY_BASE + 0xb82c) +#define MIPI_HBP_COUNT(pipe) _PIPE(pipe, _MIPIA_HBP_COUNT, _MIPIB_HBP_COUNT) + +#define _MIPIA_HFP_COUNT (VLV_DISPLAY_BASE + 0xb030) +#define _MIPIB_HFP_COUNT (VLV_DISPLAY_BASE + 0xb830) +#define MIPI_HFP_COUNT(pipe) _PIPE(pipe, _MIPIA_HFP_COUNT, _MIPIB_HFP_COUNT) + +#define _MIPIA_HACTIVE_AREA_COUNT (VLV_DISPLAY_BASE + 0xb034) +#define _MIPIB_HACTIVE_AREA_COUNT (VLV_DISPLAY_BASE + 0xb834) +#define MIPI_HACTIVE_AREA_COUNT(pipe) _PIPE(pipe, _MIPIA_HACTIVE_AREA_COUNT, _MIPIB_HACTIVE_AREA_COUNT) + +#define _MIPIA_VSYNC_PADDING_COUNT (VLV_DISPLAY_BASE + 0xb038) +#define _MIPIB_VSYNC_PADDING_COUNT (VLV_DISPLAY_BASE + 0xb838) +#define MIPI_VSYNC_PADDING_COUNT(pipe) _PIPE(pipe, _MIPIA_VSYNC_PADDING_COUNT, _MIPIB_VSYNC_PADDING_COUNT) + +#define _MIPIA_VBP_COUNT (VLV_DISPLAY_BASE + 0xb03c) +#define _MIPIB_VBP_COUNT (VLV_DISPLAY_BASE + 0xb83c) +#define MIPI_VBP_COUNT(pipe) _PIPE(pipe, _MIPIA_VBP_COUNT, _MIPIB_VBP_COUNT) + +#define _MIPIA_VFP_COUNT (VLV_DISPLAY_BASE + 0xb040) +#define _MIPIB_VFP_COUNT (VLV_DISPLAY_BASE + 0xb840) +#define MIPI_VFP_COUNT(pipe) _PIPE(pipe, _MIPIA_VFP_COUNT, _MIPIB_VFP_COUNT) + +#define _MIPIA_HIGH_LOW_SWITCH_COUNT (VLV_DISPLAY_BASE + 0xb044) +#define _MIPIB_HIGH_LOW_SWITCH_COUNT (VLV_DISPLAY_BASE + 0xb844) +#define MIPI_HIGH_LOW_SWITCH_COUNT(pipe) _PIPE(pipe, _MIPIA_HIGH_LOW_SWITCH_COUNT, _MIPIB_HIGH_LOW_SWITCH_COUNT) +/* regs above are bits 15:0 */ + +#define _MIPIA_DPI_CONTROL (VLV_DISPLAY_BASE + 0xb048) +#define _MIPIB_DPI_CONTROL (VLV_DISPLAY_BASE + 0xb848) +#define MIPI_DPI_CONTROL(pipe) _PIPE(pipe, _MIPIA_DPI_CONTROL, _MIPIB_DPI_CONTROL) +#define DPI_LP_MODE (1 << 6) +#define BACKLIGHT_OFF (1 << 5) +#define BACKLIGHT_ON (1 << 4) +#define COLOR_MODE_OFF (1 << 3) +#define COLOR_MODE_ON (1 << 2) +#define TURN_ON (1 << 1) +#define SHUTDOWN (1 << 0) + +#define _MIPIA_DPI_DATA (VLV_DISPLAY_BASE + 0xb04c) +#define _MIPIB_DPI_DATA (VLV_DISPLAY_BASE + 0xb84c) +#define MIPI_DPI_DATA(pipe) _PIPE(pipe, _MIPIA_DPI_DATA, _MIPIB_DPI_DATA) +#define COMMAND_BYTE_SHIFT 0 +#define COMMAND_BYTE_MASK (0x3f << 0) + +#define _MIPIA_INIT_COUNT (VLV_DISPLAY_BASE + 0xb050) +#define _MIPIB_INIT_COUNT (VLV_DISPLAY_BASE + 0xb850) +#define MIPI_INIT_COUNT(pipe) _PIPE(pipe, _MIPIA_INIT_COUNT, _MIPIB_INIT_COUNT) +#define MASTER_INIT_TIMER_SHIFT 0 +#define MASTER_INIT_TIMER_MASK (0xffff << 0) + +#define _MIPIA_MAX_RETURN_PKT_SIZE (VLV_DISPLAY_BASE + 0xb054) +#define _MIPIB_MAX_RETURN_PKT_SIZE (VLV_DISPLAY_BASE + 0xb854) +#define MIPI_MAX_RETURN_PKT_SIZE(pipe) _PIPE(pipe, _MIPIA_MAX_RETURN_PKT_SIZE, _MIPIB_MAX_RETURN_PKT_SIZE) +#define MAX_RETURN_PKT_SIZE_SHIFT 0 +#define MAX_RETURN_PKT_SIZE_MASK (0x3ff << 0) + +#define _MIPIA_VIDEO_MODE_FORMAT (VLV_DISPLAY_BASE + 0xb058) +#define _MIPIB_VIDEO_MODE_FORMAT (VLV_DISPLAY_BASE + 0xb858) +#define MIPI_VIDEO_MODE_FORMAT(pipe) _PIPE(pipe, _MIPIA_VIDEO_MODE_FORMAT, _MIPIB_VIDEO_MODE_FORMAT) +#define RANDOM_DPI_DISPLAY_RESOLUTION (1 << 4) +#define DISABLE_VIDEO_BTA (1 << 3) +#define IP_TG_CONFIG (1 << 2) +#define VIDEO_MODE_NON_BURST_WITH_SYNC_PULSE (1 << 0) +#define VIDEO_MODE_NON_BURST_WITH_SYNC_EVENTS (2 << 0) +#define VIDEO_MODE_BURST (3 << 0) + +#define _MIPIA_EOT_DISABLE (VLV_DISPLAY_BASE + 0xb05c) +#define _MIPIB_EOT_DISABLE (VLV_DISPLAY_BASE + 0xb85c) +#define MIPI_EOT_DISABLE(pipe) _PIPE(pipe, _MIPIA_EOT_DISABLE, _MIPIB_EOT_DISABLE) +#define LP_RX_TIMEOUT_ERROR_RECOVERY_DISABLE (1 << 7) +#define HS_RX_TIMEOUT_ERROR_RECOVERY_DISABLE (1 << 6) +#define LOW_CONTENTION_RECOVERY_DISABLE (1 << 5) +#define HIGH_CONTENTION_RECOVERY_DISABLE (1 << 4) +#define TXDSI_TYPE_NOT_RECOGNISED_ERROR_RECOVERY_DISABLE (1 << 3) +#define TXECC_MULTIBIT_ERROR_RECOVERY_DISABLE (1 << 2) +#define CLOCKSTOP (1 << 1) +#define EOT_DISABLE (1 << 0) + +#define _MIPIA_LP_BYTECLK (VLV_DISPLAY_BASE + 0xb060) +#define _MIPIB_LP_BYTECLK (VLV_DISPLAY_BASE + 0xb860) +#define MIPI_LP_BYTECLK(pipe) _PIPE(pipe, _MIPIA_LP_BYTECLK, _MIPIB_LP_BYTECLK) +#define LP_BYTECLK_SHIFT 0 +#define LP_BYTECLK_MASK (0xffff << 0) + +/* bits 31:0 */ +#define _MIPIA_LP_GEN_DATA (VLV_DISPLAY_BASE + 0xb064) +#define _MIPIB_LP_GEN_DATA (VLV_DISPLAY_BASE + 0xb864) +#define MIPI_LP_GEN_DATA(pipe) _PIPE(pipe, _MIPIA_LP_GEN_DATA, _MIPIB_LP_GEN_DATA) + +/* bits 31:0 */ +#define _MIPIA_HS_GEN_DATA (VLV_DISPLAY_BASE + 0xb068) +#define _MIPIB_HS_GEN_DATA (VLV_DISPLAY_BASE + 0xb868) +#define MIPI_HS_GEN_DATA(pipe) _PIPE(pipe, _MIPIA_HS_GEN_DATA, _MIPIB_HS_GEN_DATA) + +#define _MIPIA_LP_GEN_CTRL (VLV_DISPLAY_BASE + 0xb06c) +#define _MIPIB_LP_GEN_CTRL (VLV_DISPLAY_BASE + 0xb86c) +#define MIPI_LP_GEN_CTRL(pipe) _PIPE(pipe, _MIPIA_LP_GEN_CTRL, _MIPIB_LP_GEN_CTRL) +#define _MIPIA_HS_GEN_CTRL (VLV_DISPLAY_BASE + 0xb070) +#define _MIPIB_HS_GEN_CTRL (VLV_DISPLAY_BASE + 0xb870) +#define MIPI_HS_GEN_CTRL(pipe) _PIPE(pipe, _MIPIA_HS_GEN_CTRL, _MIPIB_HS_GEN_CTRL) +#define LONG_PACKET_WORD_COUNT_SHIFT 8 +#define LONG_PACKET_WORD_COUNT_MASK (0xffff << 8) +#define SHORT_PACKET_PARAM_SHIFT 8 +#define SHORT_PACKET_PARAM_MASK (0xffff << 8) +#define VIRTUAL_CHANNEL_SHIFT 6 +#define VIRTUAL_CHANNEL_MASK (3 << 6) +#define DATA_TYPE_SHIFT 0 +#define DATA_TYPE_MASK (3f << 0) +/* data type values, see include/video/mipi_display.h */ + +#define _MIPIA_GEN_FIFO_STAT (VLV_DISPLAY_BASE + 0xb074) +#define _MIPIB_GEN_FIFO_STAT (VLV_DISPLAY_BASE + 0xb874) +#define MIPI_GEN_FIFO_STAT(pipe) _PIPE(pipe, _MIPIA_GEN_FIFO_STAT, _MIPIB_GEN_FIFO_STAT) +#define DPI_FIFO_EMPTY (1 << 28) +#define DBI_FIFO_EMPTY (1 << 27) +#define LP_CTRL_FIFO_EMPTY (1 << 26) +#define LP_CTRL_FIFO_HALF_EMPTY (1 << 25) +#define LP_CTRL_FIFO_FULL (1 << 24) +#define HS_CTRL_FIFO_EMPTY (1 << 18) +#define HS_CTRL_FIFO_HALF_EMPTY (1 << 17) +#define HS_CTRL_FIFO_FULL (1 << 16) +#define LP_DATA_FIFO_EMPTY (1 << 10) +#define LP_DATA_FIFO_HALF_EMPTY (1 << 9) +#define LP_DATA_FIFO_FULL (1 << 8) +#define HS_DATA_FIFO_EMPTY (1 << 2) +#define HS_DATA_FIFO_HALF_EMPTY (1 << 1) +#define HS_DATA_FIFO_FULL (1 << 0) + +#define _MIPIA_HS_LS_DBI_ENABLE (VLV_DISPLAY_BASE + 0xb078) +#define _MIPIB_HS_LS_DBI_ENABLE (VLV_DISPLAY_BASE + 0xb878) +#define MIPI_HS_LP_DBI_ENABLE(pipe) _PIPE(pipe, _MIPIA_HS_LS_DBI_ENABLE, _MIPIB_HS_LS_DBI_ENABLE) +#define DBI_HS_LP_MODE_MASK (1 << 0) +#define DBI_LP_MODE (1 << 0) +#define DBI_HS_MODE (0 << 0) + +#define _MIPIA_DPHY_PARAM (VLV_DISPLAY_BASE + 0xb080) +#define _MIPIB_DPHY_PARAM (VLV_DISPLAY_BASE + 0xb880) +#define MIPI_DPHY_PARAM(pipe) _PIPE(pipe, _MIPIA_DPHY_PARAM, _MIPIB_DPHY_PARAM) +#define EXIT_ZERO_COUNT_SHIFT 24 +#define EXIT_ZERO_COUNT_MASK (0x3f << 24) +#define TRAIL_COUNT_SHIFT 16 +#define TRAIL_COUNT_MASK (0x1f << 16) +#define CLK_ZERO_COUNT_SHIFT 8 +#define CLK_ZERO_COUNT_MASK (0xff << 8) +#define PREPARE_COUNT_SHIFT 0 +#define PREPARE_COUNT_MASK (0x3f << 0) + +/* bits 31:0 */ +#define _MIPIA_DBI_BW_CTRL (VLV_DISPLAY_BASE + 0xb084) +#define _MIPIB_DBI_BW_CTRL (VLV_DISPLAY_BASE + 0xb884) +#define MIPI_DBI_BW_CTRL(pipe) _PIPE(pipe, _MIPIA_DBI_BW_CTRL, _MIPIB_DBI_BW_CTRL) + +#define _MIPIA_CLK_LANE_SWITCH_TIME_CNT (VLV_DISPLAY_BASE + 0xb088) +#define _MIPIB_CLK_LANE_SWITCH_TIME_CNT (VLV_DISPLAY_BASE + 0xb888) +#define MIPI_CLK_LANE_SWITCH_TIME_CNT(pipe) _PIPE(pipe, _MIPIA_CLK_LANE_SWITCH_TIME_CNT, _MIPIB_CLK_LANE_SWITCH_TIME_CNT) +#define LP_HS_SSW_CNT_SHIFT 16 +#define LP_HS_SSW_CNT_MASK (0xffff << 16) +#define HS_LP_PWR_SW_CNT_SHIFT 0 +#define HS_LP_PWR_SW_CNT_MASK (0xffff << 0) + +#define _MIPIA_STOP_STATE_STALL (VLV_DISPLAY_BASE + 0xb08c) +#define _MIPIB_STOP_STATE_STALL (VLV_DISPLAY_BASE + 0xb88c) +#define MIPI_STOP_STATE_STALL(pipe) _PIPE(pipe, _MIPIA_STOP_STATE_STALL, _MIPIB_STOP_STATE_STALL) +#define STOP_STATE_STALL_COUNTER_SHIFT 0 +#define STOP_STATE_STALL_COUNTER_MASK (0xff << 0) + +#define _MIPIA_INTR_STAT_REG_1 (VLV_DISPLAY_BASE + 0xb090) +#define _MIPIB_INTR_STAT_REG_1 (VLV_DISPLAY_BASE + 0xb890) +#define MIPI_INTR_STAT_REG_1(pipe) _PIPE(pipe, _MIPIA_INTR_STAT_REG_1, _MIPIB_INTR_STAT_REG_1) +#define _MIPIA_INTR_EN_REG_1 (VLV_DISPLAY_BASE + 0xb094) +#define _MIPIB_INTR_EN_REG_1 (VLV_DISPLAY_BASE + 0xb894) +#define MIPI_INTR_EN_REG_1(pipe) _PIPE(pipe, _MIPIA_INTR_EN_REG_1, _MIPIB_INTR_EN_REG_1) +#define RX_CONTENTION_DETECTED (1 << 0) + +/* XXX: only pipe A ?!? */ +#define MIPIA_DBI_TYPEC_CTRL (VLV_DISPLAY_BASE + 0xb100) +#define DBI_TYPEC_ENABLE (1 << 31) +#define DBI_TYPEC_WIP (1 << 30) +#define DBI_TYPEC_OPTION_SHIFT 28 +#define DBI_TYPEC_OPTION_MASK (3 << 28) +#define DBI_TYPEC_FREQ_SHIFT 24 +#define DBI_TYPEC_FREQ_MASK (0xf << 24) +#define DBI_TYPEC_OVERRIDE (1 << 8) +#define DBI_TYPEC_OVERRIDE_COUNTER_SHIFT 0 +#define DBI_TYPEC_OVERRIDE_COUNTER_MASK (0xff << 0) + + +/* MIPI adapter registers */ + +#define _MIPIA_CTRL (VLV_DISPLAY_BASE + 0xb104) +#define _MIPIB_CTRL (VLV_DISPLAY_BASE + 0xb904) +#define MIPI_CTRL(pipe) _PIPE(pipe, _MIPIA_CTRL, _MIPIB_CTRL) +#define ESCAPE_CLOCK_DIVIDER_SHIFT 5 /* A only */ +#define ESCAPE_CLOCK_DIVIDER_MASK (3 << 5) +#define ESCAPE_CLOCK_DIVIDER_1 (0 << 5) +#define ESCAPE_CLOCK_DIVIDER_2 (1 << 5) +#define ESCAPE_CLOCK_DIVIDER_4 (2 << 5) +#define READ_REQUEST_PRIORITY_SHIFT 3 +#define READ_REQUEST_PRIORITY_MASK (3 << 3) +#define READ_REQUEST_PRIORITY_LOW (0 << 3) +#define READ_REQUEST_PRIORITY_HIGH (3 << 3) +#define RGB_FLIP_TO_BGR (1 << 2) + +#define _MIPIA_DATA_ADDRESS (VLV_DISPLAY_BASE + 0xb108) +#define _MIPIB_DATA_ADDRESS (VLV_DISPLAY_BASE + 0xb908) +#define MIPI_DATA_ADDRESS(pipe) _PIPE(pipe, _MIPIA_DATA_ADDRESS, _MIPIB_DATA_ADDRESS) +#define DATA_MEM_ADDRESS_SHIFT 5 +#define DATA_MEM_ADDRESS_MASK (0x7ffffff << 5) +#define DATA_VALID (1 << 0) + +#define _MIPIA_DATA_LENGTH (VLV_DISPLAY_BASE + 0xb10c) +#define _MIPIB_DATA_LENGTH (VLV_DISPLAY_BASE + 0xb90c) +#define MIPI_DATA_LENGTH(pipe) _PIPE(pipe, _MIPIA_DATA_LENGTH, _MIPIB_DATA_LENGTH) +#define DATA_LENGTH_SHIFT 0 +#define DATA_LENGTH_MASK (0xfffff << 0) + +#define _MIPIA_COMMAND_ADDRESS (VLV_DISPLAY_BASE + 0xb110) +#define _MIPIB_COMMAND_ADDRESS (VLV_DISPLAY_BASE + 0xb910) +#define MIPI_COMMAND_ADDRESS(pipe) _PIPE(pipe, _MIPIA_COMMAND_ADDRESS, _MIPIB_COMMAND_ADDRESS) +#define COMMAND_MEM_ADDRESS_SHIFT 5 +#define COMMAND_MEM_ADDRESS_MASK (0x7ffffff << 5) +#define AUTO_PWG_ENABLE (1 << 2) +#define MEMORY_WRITE_DATA_FROM_PIPE_RENDERING (1 << 1) +#define COMMAND_VALID (1 << 0) + +#define _MIPIA_COMMAND_LENGTH (VLV_DISPLAY_BASE + 0xb114) +#define _MIPIB_COMMAND_LENGTH (VLV_DISPLAY_BASE + 0xb914) +#define MIPI_COMMAND_LENGTH(pipe) _PIPE(pipe, _MIPIA_COMMAND_LENGTH, _MIPIB_COMMAND_LENGTH) +#define COMMAND_LENGTH_SHIFT(n) (8 * (n)) /* n: 0...3 */ +#define COMMAND_LENGTH_MASK(n) (0xff << (8 * (n))) + +#define _MIPIA_READ_DATA_RETURN0 (VLV_DISPLAY_BASE + 0xb118) +#define _MIPIB_READ_DATA_RETURN0 (VLV_DISPLAY_BASE + 0xb918) +#define MIPI_READ_DATA_RETURN(pipe, n) \ + (_PIPE(pipe, _MIPIA_READ_DATA_RETURN0, _MIPIB_READ_DATA_RETURN0) + 4 * (n)) /* n: 0...7 */ + +#define _MIPIA_READ_DATA_VALID (VLV_DISPLAY_BASE + 0xb138) +#define _MIPIB_READ_DATA_VALID (VLV_DISPLAY_BASE + 0xb938) +#define MIPI_READ_DATA_VALID(pipe) _PIPE(pipe, _MIPIA_READ_DATA_VALID, _MIPIB_READ_DATA_VALID) +#define READ_DATA_VALID(n) (1 << (n)) + #endif /* _I915_REG_H_ */ -- GitLab From 72ffa333418426b46648ac84ad4f015963b025e1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 27 Aug 2013 15:12:17 +0300 Subject: [PATCH 00045/10742] drm/i915: add MIPI DSI output type and subtypes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jani Nikula Signed-off-by: Shobhit Kumar Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_drv.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index dbfe5f7bb3de..8e0d54d895be 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -93,13 +93,17 @@ #define INTEL_OUTPUT_HDMI 6 #define INTEL_OUTPUT_DISPLAYPORT 7 #define INTEL_OUTPUT_EDP 8 -#define INTEL_OUTPUT_UNKNOWN 9 +#define INTEL_OUTPUT_DSI 9 +#define INTEL_OUTPUT_UNKNOWN 10 #define INTEL_DVO_CHIP_NONE 0 #define INTEL_DVO_CHIP_LVDS 1 #define INTEL_DVO_CHIP_TMDS 2 #define INTEL_DVO_CHIP_TVOUT 4 +#define INTEL_DSI_COMMAND_MODE 0 +#define INTEL_DSI_VIDEO_MODE 1 + struct intel_framebuffer { struct drm_framebuffer base; struct drm_i915_gem_object *obj; -- GitLab From f5e11b06eb8a5aa6d4918aca85f88268e131a88e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 27 Aug 2013 15:12:18 +0300 Subject: [PATCH 00046/10742] drm/i915: add structs for MIPI DSI output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sub-encoder model is copied from DVO. v2: Add attached_connector to struct intel_dsi. Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dsi.h | 99 ++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 drivers/gpu/drm/i915/intel_dsi.h diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h new file mode 100644 index 000000000000..f308269cd87c --- /dev/null +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -0,0 +1,99 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _INTEL_DSI_H +#define _INTEL_DSI_H + +#include +#include +#include "intel_drv.h" + +struct intel_dsi_device { + unsigned int panel_id; + const char *name; + int type; + const struct intel_dsi_dev_ops *dev_ops; + void *dev_priv; +}; + +struct intel_dsi_dev_ops { + bool (*init)(struct intel_dsi_device *dsi); + + /* This callback must be able to assume DSI commands can be sent */ + void (*enable)(struct intel_dsi_device *dsi); + + /* This callback must be able to assume DSI commands can be sent */ + void (*disable)(struct intel_dsi_device *dsi); + + int (*mode_valid)(struct intel_dsi_device *dsi, + struct drm_display_mode *mode); + + bool (*mode_fixup)(struct intel_dsi_device *dsi, + const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode); + + void (*mode_set)(struct intel_dsi_device *dsi, + struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode); + + enum drm_connector_status (*detect)(struct intel_dsi_device *dsi); + + bool (*get_hw_state)(struct intel_dsi_device *dev); + + struct drm_display_mode *(*get_modes)(struct intel_dsi_device *dsi); + + void (*destroy) (struct intel_dsi_device *dsi); +}; + +struct intel_dsi { + struct intel_encoder base; + + struct intel_dsi_device dev; + + struct intel_connector *attached_connector; + + /* if true, use HS mode, otherwise LP */ + bool hs; + + /* virtual channel */ + int channel; + + /* number of DSI lanes */ + unsigned int lane_count; + + /* video mode pixel format for MIPI_DSI_FUNC_PRG register */ + u32 pixel_format; + + /* video mode format for MIPI_VIDEO_MODE_FORMAT register */ + u32 video_mode_format; + + /* eot for MIPI_EOT_DISABLE register */ + u32 eot_disable; +}; + +static inline struct intel_dsi *enc_to_intel_dsi(struct drm_encoder *encoder) +{ + return container_of(encoder, struct intel_dsi, base.base); +} + +#endif /* _INTEL_DSI_H */ -- GitLab From 69c05eb2d8421cbe96c2c66ea6e3223d25928d92 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 27 Aug 2013 15:12:19 +0300 Subject: [PATCH 00047/10742] drm/i915: add MIPI DSI command sending routines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: Rebase due to register bit definition change. v3: Mostly based on Ville's review comments. - Use size_t for length all around. - Reuse dsi_vc_send_short in dsi_vc_send_long. - Remove stale/incorrect comments. - Reverse special packet sent interrupt check. - Use DSI controller regs for reading, not adapter. Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/intel_dsi_cmd.c | 427 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_dsi_cmd.h | 109 +++++++ 3 files changed, 537 insertions(+) create mode 100644 drivers/gpu/drm/i915/intel_dsi_cmd.c create mode 100644 drivers/gpu/drm/i915/intel_dsi_cmd.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index b8449a84a0dc..8bffd292d1dc 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -21,6 +21,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \ intel_display.o \ intel_crt.o \ intel_lvds.o \ + intel_dsi_cmd.o \ intel_bios.o \ intel_ddi.o \ intel_dp.o \ diff --git a/drivers/gpu/drm/i915/intel_dsi_cmd.c b/drivers/gpu/drm/i915/intel_dsi_cmd.c new file mode 100644 index 000000000000..86577a0fc620 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_dsi_cmd.c @@ -0,0 +1,427 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Jani Nikula + */ + +#include +#include +#include +#include