gem5のサイクル精度モデルについて理解したいので、O3CPUのドキュメントを読んでみることにする。
次はストア命令について。ソースコードを追いかけていく。
Store命令
Store命令の挙動は、コミット後から見て行こうと思う。
IEW::tick()->IEW::executeInsts() ->LSQUnit::executeStore() ->StaticInst::initiateAcc() ->LSQ::pushRequest() ->LSQUnit::write() ->LSQUnit::checkViolation() Commit::tick()->Commit::commitInsts()->Commit::commitHead() IEW::tick()->LSQUnit::commitStores() IEW::tick()->LSQUnit::writebackStores() ->LSQRequest::buildPackets() ->LSQRequest::sendPacketToCache() ->LSQUnit::storePostSend() DcachePort::recvTimingResp()->LSQRequest::recvTimingResp() ->LSQUnit::completeDataAccess() ->LSQUnit::completeStore()
commitStores()
は、IEW::tick()
から、LSQUnit
のcommitStore()
が呼ばれる形になる。
// src/cpu/o3/iew.cc void IEW::tick() { /* ... 途中省略 ... */ DPRINTF(IEW,"Processing [tid:%i]\n",tid); // Update structures based on instructions committed. if (fromCommit->commitInfo[tid].doneSeqNum != 0 && !fromCommit->commitInfo[tid].squash && !fromCommit->commitInfo[tid].robSquashing) { ldstQueue.commitStores(fromCommit->commitInfo[tid].doneSeqNum,tid); ldstQueue.commitLoads(fromCommit->commitInfo[tid].doneSeqNum,tid); updateLSQNextCycle = true; instQueue.commit(fromCommit->commitInfo[tid].doneSeqNum,tid); }
// src/cpu/o3/lsq.cc void LSQ::commitStores(InstSeqNum &youngest_inst, ThreadID tid) { thread.at(tid).commitStores(youngest_inst); }
これは要するに、canWB
をtrue
に設定しているだけだ。
// src/cpu/o3/lsq_unit.cc void LSQUnit::commitStores(InstSeqNum &youngest_inst) { assert(storeQueue.size() == 0 || storeQueue.front().valid()); /* Forward iterate the store queue (age order). */ for (auto& x : storeQueue) { assert(x.valid()); // Mark any stores that are now committed and have not yet // been marked as able to write back. if (!x.canWB()) { if (x.instruction()->seqNum > youngest_inst) { break; } DPRINTF(LSQUnit, "Marking store as able to write back, PC " "%s [sn:%lli]\n", x.instruction()->pcState(), x.instruction()->seqNum); x.canWB() = true; ++storesToWB; DPRINTF(LSQUnit, "xpz: commitStores [sn:%llu] storesToWB %d\n", youngest_inst, storesToWB); } } }
さらに、writebackStores()
によって実際の書き込みが行われる。
void IEW::tick() { /* ... 途中省略 ... */ // Writeback any stores using any leftover bandwidth. ldstQueue.writebackStores(); /* ... 途中省略 ... */
// src/cpu/o3/lsq.cc void LSQ::writebackStores() { std::list<ThreadID>::iterator threads = activeThreads->begin(); std::list<ThreadID>::iterator end = activeThreads->end(); while (threads != end) { ThreadID tid = *threads++; if (numStoresToWB(tid) > 0) { DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores " "available for Writeback.\n", tid, numStoresToWB(tid)); } thread[tid].writebackStores(); } }
LSQUnit::writebackStores()
によって、パケットが作成されてリクエストが発行される。
// src/cpu/o3/lsq_unit.cc void LSQUnit::writebackStores() { if (isStoreBlocked) { DPRINTF(LSQUnit, "Writing back blocked store\n"); writebackBlockedStore(); } // try completing store from store queue and issue request_repeat StoreQueue::iterator sqIt = storeQueue.begin(); while (!storeQueue.empty() && sqIt->completed()) { if(sqIt->request_repeat() == nullptr) { completeStore(sqIt); } else { if (!sqIt->reissued()) { sqIt->request_repeat()->buildPackets(); sqIt->request_repeat()->sendPacket(); sqIt->completed() = false; if (sqIt->request_repeat()->isSent()){ sqIt->reissued() = true; } break; } else { completeStore(sqIt); // head has completed reissue, complete } } sqIt = storeQueue.begin(); } /* ... 途中省略 ... */ /* Send to cache */ request->sendPacket(); /* If successful, do the post send */ if (request->isSent()) { storePostSend(); } else { DPRINTF(LSQUnit, "D-Cache became blocked when writing [sn:%lli], " "will retry later\n", inst->seqNum); } } assert(storesToWB >= 0); }
最終的に、DCacheからのレスポンスによって書き込みが完了する。
// src/cpu/o3/lsq_unit.cc void LSQUnit::completeDataAccess(PacketPtr pkt) { LSQRequest *request = dynamic_cast<LSQRequest *>(pkt->senderState); DynInstPtr inst = request->instruction(); /* ... 途中省略 ... */ } else if (inst->isStore()) { // This is a regular store (i.e., not store conditionals and // atomics), so it can complete without writing back // If store is at the top of STQ, reissue it to dcache to mimic merak fifo store StoreQueue::iterator sqIt = request->instruction()->sqIt; bool oldest = (sqIt == storeQueue.begin()); if (oldest && sqIt->reissued()) { completeStore(sqIt); } else if (oldest) { sqIt->request_repeat()->buildPackets(); sqIt->request_repeat()->sendPacket(); sqIt->completed() = false; if (sqIt->request_repeat()->isSent()){ sqIt->reissued() = true; } } else { // flag as completed, but not reissued since it has not become the oldest sqIt->completed() = true; } }