From 834a64c28287f115aa26407a5462f5d3e2048fc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Mr=C3=A1zek?= Date: Fri, 12 Mar 2010 18:29:11 +0100 Subject: [PATCH] Fix horrible race conditions in suspend and resume --- library/DFProcess-linux-SHM.cpp | 27 +++++++++++++++++++-------- shmserver/mod-core.cpp | 5 +++++ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/library/DFProcess-linux-SHM.cpp b/library/DFProcess-linux-SHM.cpp index c1f8b954a..3f9b0e6d0 100644 --- a/library/DFProcess-linux-SHM.cpp +++ b/library/DFProcess-linux-SHM.cpp @@ -100,7 +100,7 @@ class SHMProcess::Private bool SHMProcess::Private::SetAndWait (uint32_t state) { uint32_t cnt = 0; - if(!locked) return false; + if(!attached) return false; SHMCMD = state; while (SHMCMD == state) { @@ -471,18 +471,26 @@ bool SHMProcess::suspend() return true; } + // FIXME: this should be controlled on the server side + // FIXME: IF server got CORE_RUN in this frame, interpret CORE_SUSPEND as CORE_STEP // did we just resume a moment ago? if(D_SHMCMD == CORE_RUN) { //fprintf(stderr,"%d invokes step\n",d->attachmentIdx); // wait for the next window - D_SHMCMD = CORE_STEP; + if(!d->SetAndWait(CORE_STEP)) + { + throw Error::SHMLockingError("if(!d->SetAndWait(CORE_STEP))"); + } } else { //fprintf(stderr,"%d invokes suspend\n",d->attachmentIdx); // lock now - D_SHMCMD = CORE_SUSPEND; + if(!d->SetAndWait(CORE_SUSPEND)) + { + throw Error::SHMLockingError("if(!d->SetAndWait(CORE_SUSPEND))"); + } } //fprintf(stderr,"waiting for lock\n"); // we wait for the server to give up our suspend lock (held by default) @@ -495,6 +503,7 @@ bool SHMProcess::suspend() return false; } +// FIXME: needs a good think-through bool SHMProcess::asyncSuspend() { if(!d->attached) @@ -552,16 +561,18 @@ bool SHMProcess::resume() return false; if(!d->suspended) return true; - // set core to run - D_SHMCMD = CORE_RUN; - d->suspended = false; // unlock the suspend lock if(lockf(d->suspend_lock,F_ULOCK,0) == 0) { + d->suspended = false; d->locked = false; - return true; + if(d->SetAndWait(CORE_RUN)) // we have to make sure the server responds! + { + return true; + } + throw Error::SHMLockingError("if(d->SetAndWait(CORE_RUN))"); } - throw Error::SHMLockingError("bool SHMProcess::resume()"); + throw Error::SHMLockingError("if(lockf(d->suspend_lock,F_ULOCK,0) == 0)"); return false; } diff --git a/shmserver/mod-core.cpp b/shmserver/mod-core.cpp index 3bc27b12e..76fd9dfe9 100644 --- a/shmserver/mod-core.cpp +++ b/shmserver/mod-core.cpp @@ -299,12 +299,17 @@ void SHM_Act (void) if(cmd.nextState != -1) { + /* fprintf(stderr, "Client %d invoked %d:%d = %x = ", currentClient,((shm_cmd)atomic).parts.module,((shm_cmd)atomic).parts.command, cmd._function); fprintf(stderr, "%s\n",cmd.name.c_str()); + */ // FIXME: WHAT HAPPENS WHEN A 'NEXTSTATE' IS FROM A DIFFERENT MODULE THAN 'CORE'? Yeah. It doesn't work. SHMCMD = cmd.nextState; + /* fprintf(stderr, "Server set %d\n",cmd.nextState); + fflush(stderr); // make sure this finds its way to the terminal! + */ } full_barrier