|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] MCE: Fix race condition in mctelem_reserve
Thanks!
Reviewed-by: Liu Jinsong <jinsong.liu@xxxxxxxxx>
Frediano Ziglio wrote:
> Yes,
> all these patches came from some customer reports. We manage with
> xen-mceinj to reproduce the problem and got these types of races.
> These patches (the other one is already applied) fix the race issues
> we found.
>
> I tested with a CentOS 6.4 version with mcelog.
>
> Frediano
>
> On Tue, 2014-02-18 at 08:42 +0000, Liu, Jinsong wrote:
>> This logic (mctelem) is related to dom0 mcelog logic. Have you
>> tested if mcelog works fine with your patch?
>>
>> Thanks,
>> Jinsong
>>
>> Frediano Ziglio wrote:
>>> These lines (in mctelem_reserve)
>>>
>>>
>>> newhead = oldhead->mcte_next;
>>> if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
>>>
>>> are racy. After you read the newhead pointer it can happen that
>>> another flow (thread or recursive invocation) change all the list
>>> but set head with same value. So oldhead is the same as *freelp but
>>> you are setting a new head that could point to whatever element
>>> (even already used).
>>>
>>> This patch use instead a bit array and atomic bit operations.
>>>
>>> Actually it use unsigned long instead of bitmap type as testing for
>>> all zeroes is easier.
>>>
>>> Signed-off-by: Frediano Ziglio <frediano.ziglio@xxxxxxxxxx> ---
>>> xen/arch/x86/cpu/mcheck/mctelem.c | 52
>>> ++++++++++++++++++++++--------------- 1 file changed, 31
>>> insertions(+), 21 deletions(-)
>>>
>>> diff --git a/xen/arch/x86/cpu/mcheck/mctelem.c
>>> b/xen/arch/x86/cpu/mcheck/mctelem.c index 895ce1a..e56b6fb 100644
>>> --- a/xen/arch/x86/cpu/mcheck/mctelem.c
>>> +++ b/xen/arch/x86/cpu/mcheck/mctelem.c
>>> @@ -69,6 +69,11 @@ struct mctelem_ent {
>>> #define MC_URGENT_NENT 10
>>> #define MC_NONURGENT_NENT 20
>>>
>>> +/* Check if we can fit enough bits in the free bit array */
>>> +#if MC_URGENT_NENT + MC_NONURGENT_NENT > BITS_PER_LONG +#error Too
>>> much elements +#endif
>>> +
>>> #define MC_NCLASSES (MC_NONURGENT + 1)
>>>
>>> #define COOKIE2MCTE(c) ((struct mctelem_ent *)(c))
>>> @@ -77,11 +82,9 @@ struct mctelem_ent {
>>> static struct mc_telem_ctl {
>>> /* Linked lists that thread the array members together. *
>>> - * The free lists are singly-linked via mcte_next, and we allocate
>>> - * from them by atomically unlinking an element from the head.
>>> - * Consumed entries are returned to the head of the free list.
>>> - * When an entry is reserved off the free list it is not linked
>>> - * on any list until it is committed or dismissed.
>>> + * The free lists is a bit array where bit 1 means free.
>>> + * This as element number is quite small and is easy to
>>> + * atomically allocate that way.
>>> *
>>> * The committed list grows at the head and we do not maintain a
>>> * tail pointer; insertions are performed atomically. The head
>>> @@ -101,7 +104,7 @@ static struct mc_telem_ctl {
>>> * we can lock it for updates. The head of the processing list
>>> * always has the oldest telemetry, and we append (as above)
>>> * at the tail of the processing list. */
>>> - struct mctelem_ent *mctc_free[MC_NCLASSES];
>>> + unsigned long mctc_free[MC_NCLASSES];
>>> struct mctelem_ent *mctc_committed[MC_NCLASSES];
>>> struct mctelem_ent *mctc_processing_head[MC_NCLASSES];
>>> struct mctelem_ent *mctc_processing_tail[MC_NCLASSES];
>>> @@ -214,7 +217,10 @@ static void mctelem_free(struct mctelem_ent
>>> *tep) BUG_ON(MCTE_STATE(tep) != MCTE_F_STATE_FREE);
>>>
>>> tep->mcte_prev = NULL;
>>> - mctelem_xchg_head(&mctctl.mctc_free[target], &tep->mcte_next,
>>> tep); + tep->mcte_next = NULL; +
>>> + /* set free in array */
>>> + set_bit(tep - mctctl.mctc_elems, &mctctl.mctc_free[target]); }
>>>
>>> /* Increment the reference count of an entry that is not linked on
>>> to @@ -284,7 +290,7 @@ void mctelem_init(int reqdatasz) }
>>>
>>> for (i = 0; i < MC_URGENT_NENT + MC_NONURGENT_NENT; i++) {
>>> - struct mctelem_ent *tep, **tepp;
>>> + struct mctelem_ent *tep;
>>>
>>> tep = mctctl.mctc_elems + i;
>>> tep->mcte_flags = MCTE_F_STATE_FREE;
>>> @@ -292,16 +298,15 @@ void mctelem_init(int reqdatasz)
>>> tep->mcte_data = datarr + i * datasz;
>>>
>>> if (i < MC_URGENT_NENT) {
>>> - tepp = &mctctl.mctc_free[MC_URGENT];
>>> - tep->mcte_flags |= MCTE_F_HOME_URGENT;
>>> + __set_bit(i, &mctctl.mctc_free[MC_URGENT]);
>>> + tep->mcte_flags = MCTE_F_HOME_URGENT;
>>> } else {
>>> - tepp = &mctctl.mctc_free[MC_NONURGENT];
>>> - tep->mcte_flags |= MCTE_F_HOME_NONURGENT;
>>> + __set_bit(i, &mctctl.mctc_free[MC_NONURGENT]);
>>> + tep->mcte_flags = MCTE_F_HOME_NONURGENT;
>>> }
>>>
>>> - tep->mcte_next = *tepp;
>>> + tep->mcte_next = NULL;
>>> tep->mcte_prev = NULL;
>>> - *tepp = tep;
>>> }
>>> }
>>>
>>> @@ -310,18 +315,21 @@ static int mctelem_drop_count;
>>>
>>> /* Reserve a telemetry entry, or return NULL if none available.
>>> * If we return an entry then the caller must subsequently call
>>> exactly one of - * mctelem_unreserve or mctelem_commit for that
>>> entry. + * mctelem_dismiss or mctelem_commit for that entry. */
>>> mctelem_cookie_t mctelem_reserve(mctelem_class_t which) {
>>> - struct mctelem_ent **freelp;
>>> - struct mctelem_ent *oldhead, *newhead;
>>> + unsigned long *freelp;
>>> + unsigned long oldfree;
>>> + unsigned bit;
>>> mctelem_class_t target = (which == MC_URGENT) ?
>>> MC_URGENT : MC_NONURGENT;
>>>
>>> freelp = &mctctl.mctc_free[target];
>>> for (;;) {
>>> - if ((oldhead = *freelp) == NULL) {
>>> + oldfree = *freelp;
>>> +
>>> + if (oldfree == 0) {
>>> if (which == MC_URGENT && target == MC_URGENT) {
>>> /* raid the non-urgent freelist */
>>> target = MC_NONURGENT;
>>> @@ -333,9 +341,11 @@ mctelem_cookie_t
>>> mctelem_reserve(mctelem_class_t which)
>>> } }
>>>
>>> - newhead = oldhead->mcte_next;
>>> - if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
>>> - struct mctelem_ent *tep = oldhead;
>>> + /* try to allocate, atomically clear free bit */
>>> + bit = find_first_set_bit(oldfree);
>>> + if (test_and_clear_bit(bit, freelp)) {
>>> + /* return element we got */
>>> + struct mctelem_ent *tep = mctctl.mctc_elems + bit;
>>>
>>> mctelem_hold(tep);
>>> MCTE_TRANSITION_STATE(tep, FREE, UNCOMMITTED);
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |