Published on

Attempting to Simulate Race Conditions in Python Asyncio

Authors

I have been working with an async library in Python recently and kept running into race conditions.

To try to debug this issue I first started by trying to replicate the issue by simulating io and with multiple jobs as in the below code:

import asyncio
from random import randrange

async def simulate_io(caller: str, sleep_delay):
    print(f"caller {caller}, sleep_delay {sleep_delay}")
    await asyncio.sleep(sleep_delay)

async def critical_block(caller: str, condition: Condition):
    print(f"++++++++++++++++++++++ {caller} entered critical block")
    await simulate_io(caller, randrange(1, 6))
    await simulate_io(caller, randrange(1, 6))
    await simulate_io(caller, randrange(1, 6))
    await simulate_io(caller, randrange(1, 6))
    print(f"---------------------- {caller} exited critical block")

async def job_1():
    while True:
        await critical_block("Job_1")

async def job_2():
    while True:
        await critical_block("Job_2")

async def job_3():
    while True:
        await critical_block("Job_3")

async def main():
    lock = Lock()
    tasks = [asyncio.create_task(job_1()), asyncio.create_task(job_2())]

    await asyncio.gather(*tasks)

if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main())

It is clear from the below output that we are hitting the race condition - multiple jobs are hitting the critical block at the same time.

++++++++++++++++++++++ Job_1 entered critical block
caller Job_1, sleep_delay 2
++++++++++++++++++++++ Job_2 entered critical block
caller Job_2, sleep_delay 3
caller Job_1, sleep_delay 5
caller Job_2, sleep_delay 2
caller Job_2, sleep_delay 4
caller Job_1, sleep_delay 4
caller Job_2, sleep_delay 5
caller Job_1, sleep_delay 1
---------------------- Job_1 exited critical block
++++++++++++++++++++++ Job_1 entered critical block
caller Job_1, sleep_delay 2
caller Job_1, sleep_delay 1
---------------------- Job_2 exited critical block
++++++++++++++++++++++ Job_2 entered critical block
caller Job_2, sleep_delay 1
caller Job_1, sleep_delay 4
caller Job_2, sleep_delay 3
caller Job_2, sleep_delay 1
caller Job_1, sleep_delay 5
caller Job_2, sleep_delay 1

Let's change the code to use locks to prevent the critical block from being hit:

import asyncio
from asyncio import Lock
from random import randrange


async def simulate_io(caller: str, sleep_delay):
    print(f"caller {caller}, sleep_delay {sleep_delay}")
    await asyncio.sleep(sleep_delay)


async def critical_block(caller: str, lock):
    async with lock:
        print(f"++++++++++++++++++++++ {caller} entered critical block")
        await simulate_io(caller, randrange(1, 6))
        await simulate_io(caller, randrange(1, 6))
        await simulate_io(caller, randrange(1, 6))
        await simulate_io(caller, randrange(1, 6))
        print(f"---------------------- {caller} exited critical block")

async def job_1(lock):
    while True:
        await critical_block("Job_1", lock)


async def job_2(lock):
    while True:
        await critical_block("Job_2", lock)


async def job_3(lock):
    while True:
        await critical_block("Job_3", lock)


async def main():
    lock = Lock()
    tasks = [asyncio.create_task(job_1(lock)), asyncio.create_task(job_2(lock)), asyncio.create_task(job_3(lock))]

    await asyncio.gather(*tasks)


if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main())

Looking at the output below this seems to work perfectly.

The problem is despite applying this code to my production code I still run into the race condition although less often. Using a simpler approach to simulate the issue works but not perfectly especially as production code is often more complex.

More asyncio synchronization primitives can be found here.

++++++++++++++++++++++ Job_1 entered critical block
caller Job_1, sleep_delay 4
caller Job_1, sleep_delay 3
caller Job_1, sleep_delay 3
caller Job_1, sleep_delay 3
---------------------- Job_1 exited critical block
++++++++++++++++++++++ Job_2 entered critical block
caller Job_2, sleep_delay 4
caller Job_2, sleep_delay 1
caller Job_2, sleep_delay 3
caller Job_2, sleep_delay 3
---------------------- Job_2 exited critical block
++++++++++++++++++++++ Job_3 entered critical block
caller Job_3, sleep_delay 3
caller Job_3, sleep_delay 1
caller Job_3, sleep_delay 1
caller Job_3, sleep_delay 5
---------------------- Job_3 exited critical block
++++++++++++++++++++++ Job_1 entered critical block
caller Job_1, sleep_delay 5
caller Job_1, sleep_delay 3
caller Job_1, sleep_delay 1
caller Job_1, sleep_delay 4
---------------------- Job_1 exited critical block
++++++++++++++++++++++ Job_2 entered critical block
caller Job_2, sleep_delay 2
caller Job_2, sleep_delay 4
caller Job_2, sleep_delay 4
caller Job_2, sleep_delay 1
---------------------- Job_2 exited critical block
++++++++++++++++++++++ Job_3 entered critical block
caller Job_3, sleep_delay 2
caller Job_3, sleep_delay 2
caller Job_3, sleep_delay 3
caller Job_3, sleep_delay 3
---------------------- Job_3 exited critical block
++++++++++++++++++++++ Job_1 entered critical block
caller Job_1, sleep_delay 1
caller Job_1, sleep_delay 1
caller Job_1, sleep_delay 1
caller Job_1, sleep_delay 1
---------------------- Job_1 exited critical block
++++++++++++++++++++++ Job_2 entered critical block
caller Job_2, sleep_delay 1
caller Job_2, sleep_delay 5
caller Job_2, sleep_delay 3
caller Job_2, sleep_delay 1
---------------------- Job_2 exited critical block