Fix horrible race conditions in suspend and resume

develop
Petr Mrázek 2010-03-12 18:29:11 +01:00
parent 0dd14bb9c8
commit 834a64c282
2 changed files with 24 additions and 8 deletions

@ -100,7 +100,7 @@ class SHMProcess::Private
bool SHMProcess::Private::SetAndWait (uint32_t state) bool SHMProcess::Private::SetAndWait (uint32_t state)
{ {
uint32_t cnt = 0; uint32_t cnt = 0;
if(!locked) return false; if(!attached) return false;
SHMCMD = state; SHMCMD = state;
while (SHMCMD == state) while (SHMCMD == state)
{ {
@ -471,18 +471,26 @@ bool SHMProcess::suspend()
return true; return true;
} }
// FIXME: this should be controlled on the server side
// FIXME: IF server got CORE_RUN in this frame, interpret CORE_SUSPEND as CORE_STEP
// did we just resume a moment ago? // did we just resume a moment ago?
if(D_SHMCMD == CORE_RUN) if(D_SHMCMD == CORE_RUN)
{ {
//fprintf(stderr,"%d invokes step\n",d->attachmentIdx); //fprintf(stderr,"%d invokes step\n",d->attachmentIdx);
// wait for the next window // wait for the next window
D_SHMCMD = CORE_STEP; if(!d->SetAndWait(CORE_STEP))
{
throw Error::SHMLockingError("if(!d->SetAndWait(CORE_STEP))");
}
} }
else else
{ {
//fprintf(stderr,"%d invokes suspend\n",d->attachmentIdx); //fprintf(stderr,"%d invokes suspend\n",d->attachmentIdx);
// lock now // lock now
D_SHMCMD = CORE_SUSPEND; if(!d->SetAndWait(CORE_SUSPEND))
{
throw Error::SHMLockingError("if(!d->SetAndWait(CORE_SUSPEND))");
}
} }
//fprintf(stderr,"waiting for lock\n"); //fprintf(stderr,"waiting for lock\n");
// we wait for the server to give up our suspend lock (held by default) // we wait for the server to give up our suspend lock (held by default)
@ -495,6 +503,7 @@ bool SHMProcess::suspend()
return false; return false;
} }
// FIXME: needs a good think-through
bool SHMProcess::asyncSuspend() bool SHMProcess::asyncSuspend()
{ {
if(!d->attached) if(!d->attached)
@ -552,16 +561,18 @@ bool SHMProcess::resume()
return false; return false;
if(!d->suspended) if(!d->suspended)
return true; return true;
// set core to run
D_SHMCMD = CORE_RUN;
d->suspended = false;
// unlock the suspend lock // unlock the suspend lock
if(lockf(d->suspend_lock,F_ULOCK,0) == 0) if(lockf(d->suspend_lock,F_ULOCK,0) == 0)
{ {
d->suspended = false;
d->locked = false; d->locked = false;
if(d->SetAndWait(CORE_RUN)) // we have to make sure the server responds!
{
return true; return true;
} }
throw Error::SHMLockingError("bool SHMProcess::resume()"); throw Error::SHMLockingError("if(d->SetAndWait(CORE_RUN))");
}
throw Error::SHMLockingError("if(lockf(d->suspend_lock,F_ULOCK,0) == 0)");
return false; return false;
} }

@ -299,12 +299,17 @@ void SHM_Act (void)
if(cmd.nextState != -1) if(cmd.nextState != -1)
{ {
/*
fprintf(stderr, "Client %d invoked %d:%d = %x = ", fprintf(stderr, "Client %d invoked %d:%d = %x = ",
currentClient,((shm_cmd)atomic).parts.module,((shm_cmd)atomic).parts.command, cmd._function); currentClient,((shm_cmd)atomic).parts.module,((shm_cmd)atomic).parts.command, cmd._function);
fprintf(stderr, "%s\n",cmd.name.c_str()); fprintf(stderr, "%s\n",cmd.name.c_str());
*/
// FIXME: WHAT HAPPENS WHEN A 'NEXTSTATE' IS FROM A DIFFERENT MODULE THAN 'CORE'? Yeah. It doesn't work. // FIXME: WHAT HAPPENS WHEN A 'NEXTSTATE' IS FROM A DIFFERENT MODULE THAN 'CORE'? Yeah. It doesn't work.
SHMCMD = cmd.nextState; SHMCMD = cmd.nextState;
/*
fprintf(stderr, "Server set %d\n",cmd.nextState); fprintf(stderr, "Server set %d\n",cmd.nextState);
fflush(stderr); // make sure this finds its way to the terminal!
*/
} }
full_barrier full_barrier