FPGA開発日記

カテゴリ別記事インデックス https://msyksphinz.github.io/github_pages , English Version https://fpgadevdiary.hatenadiary.com/

gem5のドキュメントを読む (3. O3CPUのサイクル計算の挙動をソースコード上で追いかける)

gem5のサイクル精度モデルについて理解したいので、O3CPUのドキュメントを読んでみることにする。

www.gem5.org

次はストア命令について。ソースコードを追いかけていく。

Store命令

Store命令の挙動は、コミット後から見て行こうと思う。

IEW::tick()->IEW::executeInsts()
  ->LSQUnit::executeStore()
    ->StaticInst::initiateAcc()
      ->LSQ::pushRequest()
        ->LSQUnit::write()
    ->LSQUnit::checkViolation()
Commit::tick()->Commit::commitInsts()->Commit::commitHead()
IEW::tick()->LSQUnit::commitStores()
IEW::tick()->LSQUnit::writebackStores()
  ->LSQRequest::buildPackets()
  ->LSQRequest::sendPacketToCache()
  ->LSQUnit::storePostSend()
DcachePort::recvTimingResp()->LSQRequest::recvTimingResp()
  ->LSQUnit::completeDataAccess()
    ->LSQUnit::completeStore()

commitStores()は、IEW::tick()から、LSQUnitcommitStore()が呼ばれる形になる。

// src/cpu/o3/iew.cc
void
IEW::tick()
{
/* ... 途中省略 ... */
                DPRINTF(IEW,"Processing [tid:%i]\n",tid);

        // Update structures based on instructions committed.
        if (fromCommit->commitInfo[tid].doneSeqNum != 0 &&
            !fromCommit->commitInfo[tid].squash &&
            !fromCommit->commitInfo[tid].robSquashing) {

            ldstQueue.commitStores(fromCommit->commitInfo[tid].doneSeqNum,tid);

            ldstQueue.commitLoads(fromCommit->commitInfo[tid].doneSeqNum,tid);

            updateLSQNextCycle = true;
            instQueue.commit(fromCommit->commitInfo[tid].doneSeqNum,tid);
        }
// src/cpu/o3/lsq.cc

void
LSQ::commitStores(InstSeqNum &youngest_inst, ThreadID tid)
{
    thread.at(tid).commitStores(youngest_inst);
}

これは要するに、canWBtrueに設定しているだけだ。

// src/cpu/o3/lsq_unit.cc

void
LSQUnit::commitStores(InstSeqNum &youngest_inst)
{
    assert(storeQueue.size() == 0 || storeQueue.front().valid());

    /* Forward iterate the store queue (age order). */
    for (auto& x : storeQueue) {
        assert(x.valid());
        // Mark any stores that are now committed and have not yet
        // been marked as able to write back.
        if (!x.canWB()) {
            if (x.instruction()->seqNum > youngest_inst) {
                break;
            }
            DPRINTF(LSQUnit, "Marking store as able to write back, PC "
                    "%s [sn:%lli]\n",
                    x.instruction()->pcState(),
                    x.instruction()->seqNum);

            x.canWB() = true;

            ++storesToWB;
            DPRINTF(LSQUnit, "xpz: commitStores [sn:%llu] storesToWB %d\n", youngest_inst, storesToWB);
        }
    }
}

さらに、writebackStores()によって実際の書き込みが行われる。

void
IEW::tick()
{
/* ... 途中省略 ... */
        // Writeback any stores using any leftover bandwidth.
    ldstQueue.writebackStores();
/* ... 途中省略 ... */
// src/cpu/o3/lsq.cc

void
LSQ::writebackStores()
{
    std::list<ThreadID>::iterator threads = activeThreads->begin();
    std::list<ThreadID>::iterator end = activeThreads->end();

    while (threads != end) {
        ThreadID tid = *threads++;

        if (numStoresToWB(tid) > 0) {
            DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
                "available for Writeback.\n", tid, numStoresToWB(tid));
        }

        thread[tid].writebackStores();
    }
}

LSQUnit::writebackStores()によって、パケットが作成されてリクエストが発行される。

// src/cpu/o3/lsq_unit.cc

void
LSQUnit::writebackStores()
{
    if (isStoreBlocked) {
        DPRINTF(LSQUnit, "Writing back  blocked store\n");
        writebackBlockedStore();
    }

    // try completing store from store queue and issue request_repeat
    StoreQueue::iterator sqIt = storeQueue.begin();
    while (!storeQueue.empty() && sqIt->completed()) {
        if(sqIt->request_repeat() == nullptr) {
            completeStore(sqIt);
        } else {
            if (!sqIt->reissued()) {
                sqIt->request_repeat()->buildPackets();
                sqIt->request_repeat()->sendPacket();
                sqIt->completed() = false;
                if (sqIt->request_repeat()->isSent()){
                    sqIt->reissued() = true;
                }
                break;
            } else {
                completeStore(sqIt); // head has completed reissue, complete
            }
        }
        sqIt = storeQueue.begin();
    }
/* ... 途中省略 ... */
                /* Send to cache */
        request->sendPacket();

        /* If successful, do the post send */
        if (request->isSent()) {
            storePostSend();
        } else {
            DPRINTF(LSQUnit, "D-Cache became blocked when writing [sn:%lli], "
                    "will retry later\n",
                    inst->seqNum);
        }
    }
    assert(storesToWB >= 0);
}

最終的に、DCacheからのレスポンスによって書き込みが完了する。

// src/cpu/o3/lsq_unit.cc

void
LSQUnit::completeDataAccess(PacketPtr pkt)
{
    LSQRequest *request = dynamic_cast<LSQRequest *>(pkt->senderState);
    DynInstPtr inst = request->instruction();
/* ... 途中省略 ... */
                } else if (inst->isStore()) {
            // This is a regular store (i.e., not store conditionals and
            // atomics), so it can complete without writing back

            // If store is at the top of STQ, reissue it to dcache to mimic merak fifo store
            StoreQueue::iterator sqIt = request->instruction()->sqIt;
            bool oldest = (sqIt == storeQueue.begin());
            if (oldest && sqIt->reissued()) {
                completeStore(sqIt);
            } else if (oldest) {
                sqIt->request_repeat()->buildPackets();
                sqIt->request_repeat()->sendPacket();
                sqIt->completed() = false;
                if (sqIt->request_repeat()->isSent()){
                    sqIt->reissued() = true;
                }
            } else {
                // flag as completed, but not reissued since it has not become the oldest
                sqIt->completed() = true;
            }
        }

msyksphinz.hatenablog.com