Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b5ed7639 authored by Jeff Garzik's avatar Jeff Garzik
Browse files

Merge branch 'master' into upstream

parents 0638dec0 eb35cf60
Loading
Loading
Loading
Loading
+270 −78
Original line number Original line Diff line number Diff line
@@ -19,6 +19,7 @@ Contents:
     - Control dependencies.
     - Control dependencies.
     - SMP barrier pairing.
     - SMP barrier pairing.
     - Examples of memory barrier sequences.
     - Examples of memory barrier sequences.
     - Read memory barriers vs load speculation.


 (*) Explicit kernel barriers.
 (*) Explicit kernel barriers.


@@ -248,7 +249,7 @@ And there are a number of things that _must_ or _must_not_ be assumed:
     we may get either of:
     we may get either of:


	STORE *A = X; Y = LOAD *A;
	STORE *A = X; Y = LOAD *A;
	STORE *A = Y;
	STORE *A = Y = X;




=========================
=========================
@@ -344,9 +345,12 @@ Memory barriers come in four basic varieties:


 (4) General memory barriers.
 (4) General memory barriers.


     A general memory barrier is a combination of both a read memory barrier
     A general memory barrier gives a guarantee that all the LOAD and STORE
     and a write memory barrier.  It is a partial ordering over both loads and
     operations specified before the barrier will appear to happen before all
     stores.
     the LOAD and STORE operations specified after the barrier with respect to
     the other components of the system.

     A general memory barrier is a partial ordering over both loads and stores.


     General memory barriers imply both read and write memory barriers, and so
     General memory barriers imply both read and write memory barriers, and so
     can substitute for either.
     can substitute for either.
@@ -546,9 +550,9 @@ write barrier, though, again, a general barrier is viable:
	===============	===============
	===============	===============
	a = 1;
	a = 1;
	<write barrier>
	<write barrier>
	b = 2;		x = a;
	b = 2;		x = b;
			<read barrier>
			<read barrier>
			y = b;
			y = a;


Or:
Or:


@@ -563,6 +567,18 @@ Or:
Basically, the read barrier always has to be there, even though it can be of
Basically, the read barrier always has to be there, even though it can be of
the "weaker" type.
the "weaker" type.


[!] Note that the stores before the write barrier would normally be expected to
match the loads after the read barrier or data dependency barrier, and vice
versa:

	CPU 1                           CPU 2
	===============                 ===============
	a = 1;           }----   --->{  v = c
	b = 2;           }    \ /    {  w = d
	<write barrier>        \        <read barrier>
	c = 3;           }    / \    {  x = a;
	d = 4;           }----   --->{  y = b;



EXAMPLES OF MEMORY BARRIER SEQUENCES
EXAMPLES OF MEMORY BARRIER SEQUENCES
------------------------------------
------------------------------------
@@ -600,8 +616,8 @@ STORE B, STORE C } all occuring before the unordered set of { STORE D, STORE E
	|       |       +------+
	|       |       +------+
	+-------+       :      :
	+-------+       :      :
	                   |
	                   |
	                   | Sequence in which stores committed to memory system
	                   | Sequence in which stores are committed to the
	                   | by CPU 1
	                   | memory system by CPU 1
	                   V
	                   V




@@ -683,14 +699,12 @@ then the following will occur:
	                               |        :       :       |       |
	                               |        :       :       |       |
	                               |        :       :       | CPU 2 |
	                               |        :       :       | CPU 2 |
	                               |        +-------+       |       |
	                               |        +-------+       |       |
	                                \       | X->9  |------>|       |
	                               |        | X->9  |------>|       |
	                                 \      +-------+       |       |
	                               |        +-------+       |       |
	                                  ----->| B->2  |       |       |
	  Makes sure all effects --->   \   ddddddddddddddddd   |       |
	                                        +-------+       |       |
	  prior to the store of C        \      +-------+       |       |
	     Makes sure all effects --->    ddddddddddddddddd   |       |
	  are perceptible to              ----->| B->2  |------>|       |
	     prior to the store of C            +-------+       |       |
	  subsequent loads                      +-------+       |       |
	     are perceptible to                 | B->2  |------>|       |
	     successive loads                   +-------+       |       |
	                                        :       :       +-------+
	                                        :       :       +-------+




@@ -699,75 +713,241 @@ following sequence of events:


	CPU 1			CPU 2
	CPU 1			CPU 2
	=======================	=======================
	=======================	=======================
		{ A = 0, B = 9 }
	STORE A=1
	STORE A=1
	STORE B=2
	STORE C=3
	<write barrier>
	<write barrier>
	STORE D=4
	STORE B=2
	STORE E=5
				LOAD A
				LOAD B
				LOAD B
				LOAD C
				LOAD A
				LOAD D
				LOAD E


Without intervention, CPU 2 may then choose to perceive the events on CPU 1 in
Without intervention, CPU 2 may then choose to perceive the events on CPU 1 in
some effectively random order, despite the write barrier issued by CPU 1:
some effectively random order, despite the write barrier issued by CPU 1:


	+-------+       :      :
	+-------+       :      :                :       :
	|       |       +------+
	|       |       +------+                +-------+
	|       |------>| C=3  | }
	|       |------>| A=1  |------      --->| A->0  |
	|       |  :    +------+ }
	|       |       +------+      \         +-------+
	|       |  :    | A=1  | }
	| CPU 1 |   wwwwwwwwwwwwwwww   \    --->| B->9  |
	|       |  :    +------+ }
	|       |       +------+        |       +-------+
	| CPU 1 |  :    | B=2  | }---
	|       |------>| B=2  |---     |       :       :
	|       |       +------+ }   \
	|       |       +------+   \    |       :       :       +-------+
	|       |   wwwwwwwwwwwww}    \
	+-------+       :      :    \   |       +-------+       |       |
	|       |       +------+ }     \          :       :       +-------+
	                             ---------->| B->2  |------>|       |
	|       |  :    | E=5  | }      \         +-------+       |       |
	                                |       +-------+       | CPU 2 |
	|       |  :    +------+ }       \      { | C->3  |------>|       |
	                                |       | A->0  |------>|       |
	|       |------>| D=4  | }        \     { +-------+    :  |       |
	                                |       +-------+       |       |
	|       |       +------+           \    { | E->5  |    :  |       |
	                                |       :       :       +-------+
	+-------+       :      :            \   { +-------+    :  |       |
	                                 \      :       :
	                           Transfer  -->{ | A->1  |    :  | CPU 2 |
	                                  \     +-------+
	                          from CPU 1    { +-------+    :  |       |
	                                   ---->| A->1  |
	                           to CPU 2     { | D->4  |    :  |       |
	                                        +-------+
	                                        { +-------+    :  |       |
	                                        :       :
	                                        { | B->2  |------>|       |

	                                          +-------+       |       |

If, however, a read barrier were to be placed between the load of E and the
load of A on CPU 2:

	CPU 1			CPU 2
	=======================	=======================
		{ A = 0, B = 9 }
	STORE A=1
	<write barrier>
	STORE B=2
				LOAD B
				<read barrier>
				LOAD A

then the partial ordering imposed by CPU 1 will be perceived correctly by CPU
2:

	+-------+       :      :                :       :
	|       |       +------+                +-------+
	|       |------>| A=1  |------      --->| A->0  |
	|       |       +------+      \         +-------+
	| CPU 1 |   wwwwwwwwwwwwwwww   \    --->| B->9  |
	|       |       +------+        |       +-------+
	|       |------>| B=2  |---     |       :       :
	|       |       +------+   \    |       :       :       +-------+
	+-------+       :      :    \   |       +-------+       |       |
	                             ---------->| B->2  |------>|       |
	                                |       +-------+       | CPU 2 |
	                                |       :       :       |       |
	                                |       :       :       |       |
	  At this point the read ---->   \  rrrrrrrrrrrrrrrrr   |       |
	  barrier causes all effects      \     +-------+       |       |
	  prior to the storage of B        ---->| A->1  |------>|       |
	  to be perceptible to CPU 2            +-------+       |       |
	                                        :       :       +-------+
	                                        :       :       +-------+




If, however, a read barrier were to be placed between the load of C and the
To illustrate this more completely, consider what could happen if the code
load of D on CPU 2, then the partial ordering imposed by CPU 1 will be
contained a load of A either side of the read barrier:
perceived correctly by CPU 2.


	+-------+       :      :
	CPU 1			CPU 2
	|       |       +------+
	=======================	=======================
	|       |------>| C=3  | }
		{ A = 0, B = 9 }
	|       |  :    +------+ }
	STORE A=1
	|       |  :    | A=1  | }---
	<write barrier>
	|       |  :    +------+ }   \
	STORE B=2
	| CPU 1 |  :    | B=2  | }    \
				LOAD B
	|       |       +------+       \
				LOAD A [first load of A]
	|       |   wwwwwwwwwwwwwwww    \
				<read barrier>
	|       |       +------+         \        :       :       +-------+
				LOAD A [second load of A]
	|       |  :    | E=5  | }        \       +-------+       |       |

	|       |  :    +------+ }---      \    { | C->3  |------>|       |
Even though the two loads of A both occur after the load of B, they may both
	|       |------>| D=4  | }   \      \   { +-------+    :  |       |
come up with different values:
	|       |       +------+      \      -->{ | B->2  |    :  |       |

	+-------+       :      :       \        { +-------+    :  |       |
	+-------+       :      :                :       :
	                                \       { | A->1  |    :  | CPU 2 |
	|       |       +------+                +-------+
	                                 \        +-------+       |       |
	|       |------>| A=1  |------      --->| A->0  |
	|       |       +------+      \         +-------+
	| CPU 1 |   wwwwwwwwwwwwwwww   \    --->| B->9  |
	|       |       +------+        |       +-------+
	|       |------>| B=2  |---     |       :       :
	|       |       +------+   \    |       :       :       +-------+
	+-------+       :      :    \   |       +-------+       |       |
	                             ---------->| B->2  |------>|       |
	                                |       +-------+       | CPU 2 |
	                                |       :       :       |       |
	                                |       :       :       |       |
	                                |       +-------+       |       |
	                                |       | A->0  |------>| 1st   |
	                                |       +-------+       |       |
	  At this point the read ---->   \  rrrrrrrrrrrrrrrrr   |       |
	  At this point the read ---->   \  rrrrrrrrrrrrrrrrr   |       |
	  barrier causes all effects      \     +-------+       |       |
	  barrier causes all effects      \     +-------+       |       |
	   prior to the storage of C        \   { | E->5  |    :  |       |
	  prior to the storage of B        ---->| A->1  |------>| 2nd   |
	   to be perceptible to CPU 2        -->{ +-------+    :  |       |
	  to be perceptible to CPU 2            +-------+       |       |
	                                        { | D->4  |------>|       |
	                                        :       :       +-------+


But it may be that the update to A from CPU 1 becomes perceptible to CPU 2
before the read barrier completes anyway:

	+-------+       :      :                :       :
	|       |       +------+                +-------+
	|       |------>| A=1  |------      --->| A->0  |
	|       |       +------+      \         +-------+
	| CPU 1 |   wwwwwwwwwwwwwwww   \    --->| B->9  |
	|       |       +------+        |       +-------+
	|       |------>| B=2  |---     |       :       :
	|       |       +------+   \    |       :       :       +-------+
	+-------+       :      :    \   |       +-------+       |       |
	                             ---------->| B->2  |------>|       |
	                                |       +-------+       | CPU 2 |
	                                |       :       :       |       |
	                                 \      :       :       |       |
	                                  \     +-------+       |       |
	                                   ---->| A->1  |------>| 1st   |
	                                        +-------+       |       |
	                                    rrrrrrrrrrrrrrrrr   |       |
	                                        +-------+       |       |
	                                        | A->1  |------>| 2nd   |
	                                        +-------+       |       |
	                                        :       :       +-------+


The guarantee is that the second load will always come up with A == 1 if the
load of B came up with B == 2.  No such guarantee exists for the first load of
A; that may come up with either A == 0 or A == 1.


READ MEMORY BARRIERS VS LOAD SPECULATION
----------------------------------------

Many CPUs speculate with loads: that is they see that they will need to load an
item from memory, and they find a time where they're not using the bus for any
other loads, and so do the load in advance - even though they haven't actually
got to that point in the instruction execution flow yet.  This permits the
actual load instruction to potentially complete immediately because the CPU
already has the value to hand.

It may turn out that the CPU didn't actually need the value - perhaps because a
branch circumvented the load - in which case it can discard the value or just
cache it for later use.

Consider:

	CPU 1	   		CPU 2
	=======================	=======================
	 	   		LOAD B
	 	   		DIVIDE		} Divide instructions generally
	 	   		DIVIDE		} take a long time to perform
	 	   		LOAD A

Which might appear as this:

	                                        :       :       +-------+
	                                        +-------+       |       |
	                                    --->| B->2  |------>|       |
	                                        +-------+       | CPU 2 |
	                                        :       :DIVIDE |       |
	                                        +-------+       |       |
	The CPU being busy doing a --->     --->| A->0  |~~~~   |       |
	division speculates on the              +-------+   ~   |       |
	LOAD of A                               :       :   ~   |       |
	                                        :       :DIVIDE |       |
	                                        :       :   ~   |       |
	Once the divisions are complete -->     :       :   ~-->|       |
	the CPU can then perform the            :       :       |       |
	LOAD with immediate effect              :       :       +-------+


Placing a read barrier or a data dependency barrier just before the second
load:

	CPU 1	   		CPU 2
	=======================	=======================
	 	   		LOAD B
	 	   		DIVIDE
	 	   		DIVIDE
				<read barrier>
	 	   		LOAD A

will force any value speculatively obtained to be reconsidered to an extent
dependent on the type of barrier used.  If there was no change made to the
speculated memory location, then the speculated value will just be used:

	                                        :       :       +-------+
	                                        +-------+       |       |
	                                        +-------+       |       |
	                                    --->| B->2  |------>|       |
	                                        +-------+       | CPU 2 |
	                                        :       :DIVIDE |       |
	                                        +-------+       |       |
	The CPU being busy doing a --->     --->| A->0  |~~~~   |       |
	division speculates on the              +-------+   ~   |       |
	LOAD of A                               :       :   ~   |       |
	                                        :       :DIVIDE |       |
	                                        :       :   ~   |       |
	                                        :       :   ~   |       |
	                                    rrrrrrrrrrrrrrrr~   |       |
	                                        :       :   ~   |       |
	                                        :       :   ~-->|       |
	                                        :       :       |       |
	                                        :       :       +-------+
	                                        :       :       +-------+




but if there was an update or an invalidation from another CPU pending, then
the speculation will be cancelled and the value reloaded:

	                                        :       :       +-------+
	                                        +-------+       |       |
	                                    --->| B->2  |------>|       |
	                                        +-------+       | CPU 2 |
	                                        :       :DIVIDE |       |
	                                        +-------+       |       |
	The CPU being busy doing a --->     --->| A->0  |~~~~   |       |
	division speculates on the              +-------+   ~   |       |
	LOAD of A                               :       :   ~   |       |
	                                        :       :DIVIDE |       |
	                                        :       :   ~   |       |
	                                        :       :   ~   |       |
	                                    rrrrrrrrrrrrrrrrr   |       |
	                                        +-------+       |       |
	The speculation is discarded --->   --->| A->1  |------>|       |
	and an updated value is                 +-------+       |       |
	retrieved                               :       :       +-------+


========================
========================
EXPLICIT KERNEL BARRIERS
EXPLICIT KERNEL BARRIERS
========================
========================
@@ -901,7 +1081,7 @@ IMPLICIT KERNEL MEMORY BARRIERS
===============================
===============================


Some of the other functions in the linux kernel imply memory barriers, amongst
Some of the other functions in the linux kernel imply memory barriers, amongst
which are locking, scheduling and memory allocation functions.
which are locking and scheduling functions.


This specification is a _minimum_ guarantee; any particular architecture may
This specification is a _minimum_ guarantee; any particular architecture may
provide more substantial guarantees, but these may not be relied upon outside
provide more substantial guarantees, but these may not be relied upon outside
@@ -966,6 +1146,20 @@ equivalent to a full barrier, but a LOCK followed by an UNLOCK is not.
    barriers is that the effects instructions outside of a critical section may
    barriers is that the effects instructions outside of a critical section may
    seep into the inside of the critical section.
    seep into the inside of the critical section.


A LOCK followed by an UNLOCK may not be assumed to be full memory barrier
because it is possible for an access preceding the LOCK to happen after the
LOCK, and an access following the UNLOCK to happen before the UNLOCK, and the
two accesses can themselves then cross:

	*A = a;
	LOCK
	UNLOCK
	*B = b;

may occur as:

	LOCK, STORE *B, STORE *A, UNLOCK

Locks and semaphores may not provide any guarantee of ordering on UP compiled
Locks and semaphores may not provide any guarantee of ordering on UP compiled
systems, and so cannot be counted on in such a situation to actually achieve
systems, and so cannot be counted on in such a situation to actually achieve
anything at all - especially with respect to I/O accesses - unless combined
anything at all - especially with respect to I/O accesses - unless combined
@@ -1016,8 +1210,6 @@ Other functions that imply barriers:


 (*) schedule() and similar imply full memory barriers.
 (*) schedule() and similar imply full memory barriers.


 (*) Memory allocation and release functions imply full memory barriers.



=================================
=================================
INTER-CPU LOCKING BARRIER EFFECTS
INTER-CPU LOCKING BARRIER EFFECTS
+1 −1
Original line number Original line Diff line number Diff line
@@ -453,7 +453,7 @@ config ALPHA_IRONGATE


config GENERIC_HWEIGHT
config GENERIC_HWEIGHT
	bool
	bool
	default y if !ALPHA_EV6 && !ALPHA_EV67
	default y if !ALPHA_EV67


config ALPHA_AVANTI
config ALPHA_AVANTI
	bool
	bool
+4 −4
Original line number Original line Diff line number Diff line
@@ -111,21 +111,21 @@ static void __init ts72xx_map_io(void)
	}
	}
}
}


static unsigned char ts72xx_rtc_readb(unsigned long addr)
static unsigned char ts72xx_rtc_readbyte(unsigned long addr)
{
{
	__raw_writeb(addr, TS72XX_RTC_INDEX_VIRT_BASE);
	__raw_writeb(addr, TS72XX_RTC_INDEX_VIRT_BASE);
	return __raw_readb(TS72XX_RTC_DATA_VIRT_BASE);
	return __raw_readb(TS72XX_RTC_DATA_VIRT_BASE);
}
}


static void ts72xx_rtc_writeb(unsigned char value, unsigned long addr)
static void ts72xx_rtc_writebyte(unsigned char value, unsigned long addr)
{
{
	__raw_writeb(addr, TS72XX_RTC_INDEX_VIRT_BASE);
	__raw_writeb(addr, TS72XX_RTC_INDEX_VIRT_BASE);
	__raw_writeb(value, TS72XX_RTC_DATA_VIRT_BASE);
	__raw_writeb(value, TS72XX_RTC_DATA_VIRT_BASE);
}
}


static struct m48t86_ops ts72xx_rtc_ops = {
static struct m48t86_ops ts72xx_rtc_ops = {
	.readb			= ts72xx_rtc_readb,
	.readbyte		= ts72xx_rtc_readbyte,
	.writeb			= ts72xx_rtc_writeb,
	.writebyte		= ts72xx_rtc_writebyte,
};
};


static struct platform_device ts72xx_rtc_device = {
static struct platform_device ts72xx_rtc_device = {
+1 −1
Original line number Original line Diff line number Diff line
@@ -127,7 +127,7 @@ static void
imx_gpio_ack_irq(unsigned int irq)
imx_gpio_ack_irq(unsigned int irq)
{
{
	DEBUG_IRQ("%s: irq %d\n", __FUNCTION__, irq);
	DEBUG_IRQ("%s: irq %d\n", __FUNCTION__, irq);
	ISR(IRQ_TO_REG(irq)) |= 1 << ((irq - IRQ_GPIOA(0)) % 32);
	ISR(IRQ_TO_REG(irq)) = 1 << ((irq - IRQ_GPIOA(0)) % 32);
}
}


static void
static void
+1 −4
Original line number Original line Diff line number Diff line
@@ -232,8 +232,6 @@ static void __init intcp_init_irq(void)
	for (i = IRQ_PIC_START; i <= IRQ_PIC_END; i++) {
	for (i = IRQ_PIC_START; i <= IRQ_PIC_END; i++) {
		if (i == 11)
		if (i == 11)
			i = 22;
			i = 22;
		if (i == IRQ_CP_CPPLDINT)
			i++;
		if (i == 29)
		if (i == 29)
			break;
			break;
		set_irq_chip(i, &pic_chip);
		set_irq_chip(i, &pic_chip);
@@ -259,8 +257,7 @@ static void __init intcp_init_irq(void)
		set_irq_flags(i, IRQF_VALID | IRQF_PROBE);
		set_irq_flags(i, IRQF_VALID | IRQF_PROBE);
	}
	}


	set_irq_handler(IRQ_CP_CPPLDINT, sic_handle_irq);
	set_irq_chained_handler(IRQ_CP_CPPLDINT, sic_handle_irq);
	pic_unmask_irq(IRQ_CP_CPPLDINT);
}
}


/*
/*
Loading