From 520b6fb008d4087505b9eba04c769c97eebe0243 Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Sat, 10 Jan 2026 01:22:57 +0800 Subject: [PATCH 01/23] fix(MainPipe, Directory): fix the incorrect handling of valid registers related to multi-cycle-path --- src/main/scala/coupledL2/Directory.scala | 3 +++ src/main/scala/coupledL2/tl2chi/MainPipe.scala | 13 +++---------- src/main/scala/coupledL2/tl2tl/MainPipe.scala | 11 +++-------- 3 files changed, 9 insertions(+), 18 deletions(-) diff --git a/src/main/scala/coupledL2/Directory.scala b/src/main/scala/coupledL2/Directory.scala index e0762dcf2..d58abc476 100644 --- a/src/main/scala/coupledL2/Directory.scala +++ b/src/main/scala/coupledL2/Directory.scala @@ -98,6 +98,7 @@ class ReplacerResult(implicit p: Parameters) extends L2Bundle { val meta = new MetaEntry() val mshrId = UInt(mshrBits.W) val retry = Bool() + val validHold = Bool() } class MetaWrite(implicit p: Parameters) extends L2Bundle { @@ -210,6 +211,7 @@ class Directory(implicit p: Parameters) extends L2Module { val refillReqValid_s2 = RegNext(io.read.fire && io.read.bits.refill, false.B) val refillReqValid_s3 = RegNext(refillReqValid_s2, false.B) + val refillReqValid_hold_s3 = RegEnable(refillReqValid_s2, false.B, !RegNext(refillReqValid_s2)) // Tag(ECC) R/W val tagWrite = if (enableTagECC) { @@ -344,6 +346,7 @@ class Directory(implicit p: Parameters) extends L2Module { io.replResp.bits.meta := metaAll_s3(finalWay) io.replResp.bits.mshrId := req_s3.mshrId io.replResp.bits.retry := refillRetry + io.replResp.bits.validHold := refillReqValid_hold_s3 /* ====== Update ====== */ // PLRU: update replacer only when A hit or refill, at stage 3 diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index 75b0252b6..fe9544d4c 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -219,9 +219,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val l2Error_s3 = io.dirResp_s3.error val mshr_refill_s3 = mshr_accessackdata_s3 || mshr_hintack_s3 || mshr_grant_s3 // needs refill to L2 DS - val replResp_valid_s3 = io.replResp.valid - val replResp_valid_s4 = RegNext(io.replResp.valid, init = false.B) - val replResp_valid_hold = replResp_valid_s3 || replResp_valid_s4 + val replResp_valid_hold = io.replResp.bits.validHold val retry = replResp_valid_hold && io.replResp.bits.retry val need_repl = replResp_valid_hold && io.replResp.bits.meta.state =/= INVALID && req_s3.replTask @@ -482,15 +480,10 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val wen = wen_c || wen_mshr // This is to let io.toDS.req_s3.valid hold for 2 cycles (see DataStorage for details) - val task_s3_valid_hold2 = RegInit(0.U(2.W)) - when(task_s2.valid) { - task_s3_valid_hold2 := "b11".U - }.otherwise { - task_s3_valid_hold2 := task_s3_valid_hold2 >> 1.U - } + val task_s3_valid_hold2 = RegEnable(task_s2.valid, false.B, !RegNext(task_s2.valid)) io.toDS.en_s3 := task_s3.valid && (ren || wen) - io.toDS.req_s3.valid := task_s3_valid_hold2(0) && (ren || wen) + io.toDS.req_s3.valid := task_s3_valid_hold2 && (ren || wen) io.toDS.req_s3.bits.way := Mux( mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, diff --git a/src/main/scala/coupledL2/tl2tl/MainPipe.scala b/src/main/scala/coupledL2/tl2tl/MainPipe.scala index 3f96031ce..3b618ac17 100644 --- a/src/main/scala/coupledL2/tl2tl/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2tl/MainPipe.scala @@ -304,15 +304,10 @@ class MainPipe(implicit p: Parameters) extends L2Module with HasPerfEvents { val wen = wen_c || wen_mshr // This is to let io.toDS.req_s3.valid hold for 2 cycles (see DataStorage for details) - val task_s3_valid_hold2 = RegInit(0.U(2.W)) - when(task_s2.valid) { - task_s3_valid_hold2 := "b11".U - }.otherwise { - task_s3_valid_hold2 := task_s3_valid_hold2 >> 1.U - } + val task_s3_valid_hold2 = RegEnable(task_s2.valid, false.B, !RegNext(task_s2.valid)) - io.toDS.en_s3 := task_s3.valid && (ren || wen) - io.toDS.req_s3.valid := task_s3_valid_hold2(0) && (ren || wen) + io.toDS.en_s3 := task_s3.valid && (ren || wen) + io.toDS.req_s3.valid := task_s3_valid_hold2 && (ren || wen) io.toDS.req_s3.bits.way := Mux(mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, Mux(mshr_req_s3, req_s3.way, dirResult_s3.way)) io.toDS.req_s3.bits.set := Mux(mshr_req_s3, req_s3.set, dirResult_s3.set) From 0fe90069e5835ddd7bffd225e91e502c2a7c40c7 Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Thu, 26 Mar 2026 16:18:54 +0800 Subject: [PATCH 02/23] fix(Common, CoupledL2): redundant width in l2Hint sourceId --- src/main/scala/coupledL2/Common.scala | 4 ++-- src/main/scala/coupledL2/CoupledL2.scala | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/scala/coupledL2/Common.scala b/src/main/scala/coupledL2/Common.scala index 5fdb5fe63..cf69bb6a5 100644 --- a/src/main/scala/coupledL2/Common.scala +++ b/src/main/scala/coupledL2/Common.scala @@ -399,8 +399,8 @@ class PrefetchRecv extends Bundle { } // custom l2 - l1 interface -class L2ToL1Hint(implicit p: Parameters) extends Bundle { - val sourceId = UInt(32.W) // tilelink sourceID +class L2ToL1Hint(implicit p: Parameters) extends L2Bundle { + val sourceId = UInt(sourceIdBits.W) // tilelink sourceID val isKeyword = Bool() // miss entry keyword } diff --git a/src/main/scala/coupledL2/CoupledL2.scala b/src/main/scala/coupledL2/CoupledL2.scala index f6e3e8ac9..6a8571967 100644 --- a/src/main/scala/coupledL2/CoupledL2.scala +++ b/src/main/scala/coupledL2/CoupledL2.scala @@ -327,7 +327,7 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has val hartId = Input(UInt(hartIdLen.W)) val pfCtrlFromCore = Input(new PrefetchCtrlFromCore) // val l2_hint = Valid(UInt(32.W)) - val l2_hint = ValidIO(new L2ToL1Hint()) + val l2_hint = ValidIO(new L2ToL1Hint()(l2ECCParams)) val l2_tlb_req = new L2ToL1TlbIO(nRespDups = 1)(l2TlbParams) val debugTopDown = new Bundle { val robTrueCommit = Input(UInt(64.W)) @@ -526,7 +526,7 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has if (enableHintGuidedGrant) { // for timing consideration, hint should latch one cycle before sending to L1 // instead of adding a Pipeline/Queue to latch here, we just set hintQueue in GrantBuf & CustomL1Hint "flow=false" - val l1HintArb = Module(new Arbiter(new L2ToL1Hint(), slices.size)) + val l1HintArb = Module(new Arbiter(new L2ToL1Hint()(l2ECCParams), slices.size)) val slices_l1Hint = slices.zipWithIndex.map { case (s, i) => s.io.l1Hint } From 6464c77ad73a81d7e3c756c0d81ca30b6984479c Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Thu, 16 Apr 2026 15:14:15 +0800 Subject: [PATCH 03/23] fix(Directory): optimize replace way selection When a way is occupied by mshr, it must be hit, or chosen as refill way in mainpipe stage 3, which means it is definitely not invalid. On the other hand, a selected invalid replace way must be a freeWay. So don't use invalidWay to lookup freeWayMask when choose replace way. --- src/main/scala/coupledL2/Directory.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/scala/coupledL2/Directory.scala b/src/main/scala/coupledL2/Directory.scala index d58abc476..780cad81b 100644 --- a/src/main/scala/coupledL2/Directory.scala +++ b/src/main/scala/coupledL2/Directory.scala @@ -137,7 +137,7 @@ class Directory(implicit p: Parameters) extends L2Module { val msInfo = Vec(mshrsAll, Flipped(ValidIO(new MSHRInfo))) }) - def invalid_way_sel(metaVec: Seq[MetaEntry], repl: UInt) = { + def invalid_way_sel(metaVec: Seq[MetaEntry]) = { val invalid_vec = metaVec.map(_.state === MetaData.INVALID) val has_invalid_way = Cat(invalid_vec).orR val way = ParallelPriorityMux(invalid_vec.zipWithIndex.map(x => x._1 -> x._2.U(wayBits.W))) @@ -277,11 +277,11 @@ class Directory(implicit p: Parameters) extends L2Module { )).reduceTree(_ | _) val freeWayMask_s3 = RegEnable(~occWayMask_s2, refillReqValid_s2) - val refillRetry = !(freeWayMask_s3.orR) + val refillRetry = RegEnable(occWayMask_s2.andR, refillReqValid_s2) val hitWay = OHToUInt(hitVec) val replaceWay = WireInit(UInt(wayBits.W), 0.U) - val (inv, invalidWay) = invalid_way_sel(metaAll_s3, replaceWay) + val (inv, invalidWay) = invalid_way_sel(metaAll_s3) val chosenWay = Mux(inv, invalidWay, replaceWay) // if chosenWay not in wayMask, then choose a way in wayMask // for retry bug fixing: if the chosenway cause retry last time, choose another way From f2b9435ff6968d5633a5c77ae27b81c1c42593c0 Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Thu, 26 Mar 2026 17:56:04 +0800 Subject: [PATCH 04/23] fix(Directory): remove the reset init of some sram After reset, the first access to a certain way will inevitably be selected as invalid way, and the read value from the replacer SRAM and origin bit SRAM won't be used at this time. At the same time, these SRAMs are written after selection, which is equivalent to initializing them. Therefore, the initialization of these SRAMs can be removed. --- src/main/scala/coupledL2/Directory.scala | 37 +++--------------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/src/main/scala/coupledL2/Directory.scala b/src/main/scala/coupledL2/Directory.scala index 780cad81b..75c18f96f 100644 --- a/src/main/scala/coupledL2/Directory.scala +++ b/src/main/scala/coupledL2/Directory.scala @@ -189,9 +189,6 @@ class Directory(implicit p: Parameters) extends L2Module { val metaRead = Wire(Vec(ways, new MetaEntry())) val errorRead = Wire(Vec(ways, Bool())) - val resetFinish = RegInit(false.B) - val resetIdx = RegInit((sets - 1).U) - // Replacer val repl = ReplacementPolicy.fromString(cacheParams.replacement, ways) val random_repl = cacheParams.replacement == "random" @@ -370,12 +367,7 @@ class Directory(implicit p: Parameters) extends L2Module { val origin_bits_r = origin_bit_opt.get.io.r(io.read.fire, io.read.bits.set).resp.data val origin_bits_hold = Wire(Vec(ways, Bool())) origin_bits_hold := HoldUnless(origin_bits_r, RegNext(io.read.fire, false.B)) - origin_bit_opt.get.io.w( - !resetFinish || replacerWen, - Mux(resetFinish, hit_s3, false.B), - Mux(resetFinish, req_s3.set, resetIdx), - UIntToOH(way_s3) - ) + origin_bit_opt.get.io.w(replacerWen, hit_s3, req_s3.set, UIntToOH(way_s3)) val rrip_req_type = WireInit(0.U(4.W)) // [3]: 0-firstuse, 1-reuse; // [2]: 0-acquire, 1-release; @@ -391,12 +383,7 @@ class Directory(implicit p: Parameters) extends L2Module { val next_state_s3 = repl.get_next_state(repl_state_s3, way_s3, hit_s3, inv, rrip_req_type) val repl_init = Wire(Vec(ways, UInt(2.W))) repl_init.foreach(_ := 2.U(2.W)) - replacer_sram_opt.get.io.w( - !resetFinish || replacerWen, - Mux(resetFinish, next_state_s3, repl_init.asUInt), - Mux(resetFinish, set_s3, resetIdx), - 1.U - ) + replacer_sram_opt.get.io.w(replacerWen, next_state_s3, set_s3, 1.U) } else if(cacheParams.replacement == "drrip"){ // Set Dueling @@ -425,29 +412,13 @@ class Directory(implicit p: Parameters) extends L2Module { val repl_init = Wire(Vec(ways, UInt(2.W))) repl_init.foreach(_ := 2.U(2.W)) - replacer_sram_opt.get.io.w( - !resetFinish || replacerWen, - Mux(resetFinish, next_state_s3, repl_init.asUInt), - Mux(resetFinish, set_s3, resetIdx), - 1.U - ) + replacer_sram_opt.get.io.w(replacerWen, next_state_s3, set_s3, 1.U) } else { val next_state_s3 = repl.get_next_state(repl_state_s3, way_s3) - replacer_sram_opt.get.io.w( - !resetFinish || replacerWen, - Mux(resetFinish, next_state_s3, 0.U), - Mux(resetFinish, set_s3, resetIdx), - 1.U - ) + replacer_sram_opt.get.io.w(replacerWen, next_state_s3, set_s3, 1.U) } /* ====== Reset ====== */ - when(resetIdx === 0.U) { - resetFinish := true.B - } - when(!resetFinish) { - resetIdx := resetIdx - 1.U - } XSPerfAccumulate("dirRead_cnt", io.read.fire) XSPerfAccumulate("choose_busy_way", reqValid_s3 && !req_s3.wayMask(chosenWay)) From dd6cb1cad20e20506abdd7fb7dabe7d57a3be90e Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Sun, 29 Mar 2026 00:42:54 +0800 Subject: [PATCH 05/23] fix(DataStorage): refactor io of DataStorage --- src/main/scala/coupledL2/DataStorage.scala | 11 ++++++----- src/main/scala/coupledL2/tl2chi/MainPipe.scala | 8 +++++--- src/main/scala/coupledL2/tl2tl/MainPipe.scala | 8 +++++--- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/main/scala/coupledL2/DataStorage.scala b/src/main/scala/coupledL2/DataStorage.scala index de7015f8f..9fe793430 100644 --- a/src/main/scala/coupledL2/DataStorage.scala +++ b/src/main/scala/coupledL2/DataStorage.scala @@ -27,6 +27,7 @@ class DSRequest(implicit p: Parameters) extends L2Bundle { val way = UInt(wayBits.W) val set = UInt(setBits.W) val wen = Bool() + val ren = Bool() } // mask not used @@ -78,12 +79,12 @@ class DataStorage(implicit p: Parameters) extends L2Module { extraHold = true, withClockGate = true )) - array.io_en := io.en + array.io_en := io.en && (io.req.bits.ren || io.req.bits.wen) private val mbistPl = MbistPipeline.PlaceMbistPipeline(1, "L2DataStorage", p(L2ParamKey).hasMbist) val arrayIdx = Cat(io.req.bits.way, io.req.bits.set) val wen = io.req.valid && io.req.bits.wen - val ren = io.req.valid && !io.req.bits.wen + val ren = io.req.valid && !io.req.bits.wen && io.req.bits.ren val arrayWrite = Wire(new DSECCBankBlock) val arrayWriteData = if (enableDataECC) { @@ -121,12 +122,12 @@ class DataStorage(implicit p: Parameters) extends L2Module { io.rdata := dataRead io.error := error - assert(!io.en || !RegNext(io.en, false.B), + assert(!array.io_en || !RegNext(array.io_en, false.B), "Continuous SRAM req prohibited under MCP2!") - assert(!(RegNext(io.en) && (io.req.asUInt =/= RegNext(io.req.asUInt))), + assert(!(RegNext(array.io_en) && (io.req.asUInt =/= RegNext(io.req.asUInt))), s"DataStorage req fails to hold for 2 cycles!") - assert(!(RegNext(io.en && io.req.bits.wen) && (io.wdata.asUInt =/= RegNext(io.wdata.asUInt))), + assert(!(RegNext(array.io_en && io.req.bits.wen) && (io.wdata.asUInt =/= RegNext(io.wdata.asUInt))), s"DataStorage wdata fails to hold for 2 cycles!") } diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index fe9544d4c..744628b32 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -482,15 +482,17 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes // This is to let io.toDS.req_s3.valid hold for 2 cycles (see DataStorage for details) val task_s3_valid_hold2 = RegEnable(task_s2.valid, false.B, !RegNext(task_s2.valid)) - io.toDS.en_s3 := task_s3.valid && (ren || wen) - io.toDS.req_s3.valid := task_s3_valid_hold2 && (ren || wen) + io.toDS.en_s3 := task_s3.valid + io.toDS.req_s3.valid := task_s3_valid_hold2 io.toDS.req_s3.bits.way := Mux( mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, Mux(mshr_req_s3, req_s3.way, dirResult_s3.way) ) - io.toDS.req_s3.bits.set := Mux(mshr_req_s3, req_s3.set, dirResult_s3.set) + // io.toDS.req_s3.bits.set := Mux(mshr_req_s3, req_s3.set, dirResult_s3.set) + io.toDS.req_s3.bits.set := req_s3.set io.toDS.req_s3.bits.wen := wen + io.toDS.req_s3.bits.ren := ren io.toDS.wdata_s3.data := Mux( !mshr_req_s3, c_releaseData_s3, diff --git a/src/main/scala/coupledL2/tl2tl/MainPipe.scala b/src/main/scala/coupledL2/tl2tl/MainPipe.scala index 3b618ac17..3ca8ec546 100644 --- a/src/main/scala/coupledL2/tl2tl/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2tl/MainPipe.scala @@ -306,12 +306,14 @@ class MainPipe(implicit p: Parameters) extends L2Module with HasPerfEvents { // This is to let io.toDS.req_s3.valid hold for 2 cycles (see DataStorage for details) val task_s3_valid_hold2 = RegEnable(task_s2.valid, false.B, !RegNext(task_s2.valid)) - io.toDS.en_s3 := task_s3.valid && (ren || wen) - io.toDS.req_s3.valid := task_s3_valid_hold2 && (ren || wen) + io.toDS.en_s3 := task_s3.valid + io.toDS.req_s3.valid := task_s3_valid_hold2 io.toDS.req_s3.bits.way := Mux(mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, Mux(mshr_req_s3, req_s3.way, dirResult_s3.way)) - io.toDS.req_s3.bits.set := Mux(mshr_req_s3, req_s3.set, dirResult_s3.set) + // io.toDS.req_s3.bits.set := Mux(mshr_req_s3, req_s3.set, dirResult_s3.set) + io.toDS.req_s3.bits.set := req_s3.set io.toDS.req_s3.bits.wen := wen + io.toDS.req_s3.bits.ren := ren io.toDS.wdata_s3.data := Mux( !mshr_req_s3, c_releaseData_s3, // Among all sinkTasks, only C-Release writes DS From ac37adc98badfd493ccb88ecbb31c7da6ddafa70 Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Thu, 16 Apr 2026 10:42:24 +0800 Subject: [PATCH 06/23] fix(MainPipe, Directory): decouple meta choose logic of hit and refill For hit related logic, it won't use meta chosen by replace logic, so does replace related logic. So decouple them. --- src/main/scala/coupledL2/Directory.scala | 11 +++ .../scala/coupledL2/tl2chi/MainPipe.scala | 69 ++++++++++--------- 2 files changed, 47 insertions(+), 33 deletions(-) diff --git a/src/main/scala/coupledL2/Directory.scala b/src/main/scala/coupledL2/Directory.scala index 75c18f96f..44fb1224f 100644 --- a/src/main/scala/coupledL2/Directory.scala +++ b/src/main/scala/coupledL2/Directory.scala @@ -87,7 +87,9 @@ class DirResult(implicit p: Parameters) extends L2Bundle { val set = UInt(setBits.W) val way = UInt(wayBits.W) // hit way or victim way val meta = new MetaEntry() + val metaOnHit = new MetaEntry() val error = Bool() + val errOnSnp = Bool() val replacerInfo = new ReplacerInfo() // for TopDown usage } @@ -297,9 +299,16 @@ class Directory(implicit p: Parameters) extends L2Module { val hit_s3 = Cat(hitVec).orR || req_s3.cmoAll val way_s3 = Mux(req_s3.cmoAll, req_s3.cmoWay, Mux(hit_s3, hitWay, finalWay)) val meta_s3 = metaAll_s3(way_s3) + val metaOnHit_s3 = metaAll_s3(hitWay) val tag_s3 = tagAll_s3(way_s3) val set_s3 = req_s3.set val replacerInfo_s3 = req_s3.replacerInfo + val errorOnSNP_s3 = if (enableTagECC) { + errorAll_s3(hitWay) + } else { + false.B + } + val error_s3 = if (enableTagECC) { errorAll_s3(way_s3) && reqValid_s3 && !req_s3.cmoAll && meta_s3.state =/= MetaData.INVALID } else { @@ -310,9 +319,11 @@ class Directory(implicit p: Parameters) extends L2Module { io.resp.bits.hit := hit_s3 io.resp.bits.way := way_s3 io.resp.bits.meta := meta_s3 + io.resp.bits.metaOnHit := metaOnHit_s3 io.resp.bits.tag := tag_s3 io.resp.bits.set := set_s3 io.resp.bits.error := error_s3 // depends on ECC + io.resp.bits.errOnSnp := errorOnSNP_s3 io.resp.bits.replacerInfo := replacerInfo_s3 dontTouch(io) diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index 744628b32..7042be349 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -148,6 +148,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes /* ======== Enchantment ======== */ val dirResult_s3 = io.dirResp_s3 val meta_s3 = dirResult_s3.meta + val metaOnHit_s3 = dirResult_s3.metaOnHit val req_s3 = task_s3.bits val cmoHitInvalid = io.cmoAllBlock.getOrElse(false.B) && (meta_s3.state === INVALID) @@ -191,13 +192,14 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val mshr_cbWrData_s3 = mshr_req_s3 && req_s3.toTXDAT && req_s3.chiOpcode.get === CopyBackWrData val meta_has_clients_s3 = meta_s3.clients.orR + val metaOnHit_has_clients_s3 = metaOnHit_s3.clients.orR val req_needT_s3 = needT(req_s3.opcode, req_s3.param) val cmo_cbo_retention_s3 = req_cbo_clean_s3 || req_cbo_flush_s3 val cmo_cbo_s3 = req_cbo_clean_s3 || req_cbo_flush_s3 || req_cbo_inval_s3 - val cache_alias = req_acquire_s3 && dirResult_s3.hit && meta_s3.clients(0) && - meta_s3.alias.getOrElse(0.U) =/= req_s3.alias.getOrElse(0.U) + val cache_alias = req_acquire_s3 && dirResult_s3.hit && metaOnHit_s3.clients(0) && + metaOnHit_s3.alias.getOrElse(0.U) =/= req_s3.alias.getOrElse(0.U) // *NOTICE: 'nestable_*' must not be used in A Channel related logics. val nestable_dirResult_s3 = Wire(chiselTypeOf(dirResult_s3)) @@ -210,6 +212,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes // was always non-hit on cache replacement subsequent release. nestable_dirResult_s3.hit := req_s3.snpHitReleaseMeta.state =/= INVALID nestable_dirResult_s3.meta := req_s3.snpHitReleaseMeta + nestable_dirResult_s3.metaOnHit := req_s3.snpHitReleaseMeta nestable_dirResult_s3.set := req_s3.set nestable_dirResult_s3.tag := req_s3.tag } @@ -227,22 +230,22 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes // *NOTICE: A Channel requests should be blocked by RequestBuffer when MSHR nestable, // 'nestable_*' must not be used here. val acquire_on_miss_s3 = req_acquire_s3 || req_prefetch_s3 || req_get_s3 - val acquire_on_hit_s3 = meta_s3.state === BRANCH && req_needT_s3 && !req_prefetch_s3 + val acquire_on_hit_s3 = metaOnHit_s3.state === BRANCH && req_needT_s3 && !req_prefetch_s3 val need_acquire_s3_a = req_s3.fromA && (Mux( dirResult_s3.hit, acquire_on_hit_s3, acquire_on_miss_s3 ) || cmo_cbo_s3) - val need_probe_s3_a = dirResult_s3.hit && meta_has_clients_s3 && ( - req_get_s3 && (meta_s3.state === TRUNK) || - req_cbo_clean_s3 && (meta_s3.state === TRUNK) || + val need_probe_s3_a = dirResult_s3.hit && metaOnHit_has_clients_s3 && ( + req_get_s3 && (metaOnHit_s3.state === TRUNK) || + req_cbo_clean_s3 && (metaOnHit_s3.state === TRUNK) || req_cbo_flush_s3 || req_cbo_inval_s3 ) val need_release_s3_a = dirResult_s3.hit && ( - req_cbo_clean_s3 && (!need_probe_s3_a && meta_s3.dirty) || - req_cbo_flush_s3 && (isValid(meta_s3.state)) || - req_cbo_inval_s3 && (isValid(meta_s3.state)) + req_cbo_clean_s3 && (!need_probe_s3_a && metaOnHit_s3.dirty) || + req_cbo_flush_s3 && (isValid(metaOnHit_s3.state)) || + req_cbo_inval_s3 && (isValid(metaOnHit_s3.state)) ) val need_cmoresp_s3_a = cmo_cbo_s3 val need_compack_s3_a = !cmo_cbo_s3 @@ -265,21 +268,21 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes */ // whether L2 should do forwarding or not val expectFwd = isSnpXFwd(req_s3.chiOpcode.get) - val canFwd = nestable_dirResult_s3.hit && !(nestable_dirResult_s3.meta.tagErr || nestable_dirResult_s3.error) + val canFwd = nestable_dirResult_s3.hit && !(nestable_dirResult_s3.metaOnHit.tagErr || nestable_dirResult_s3.errOnSnp) val doFwd = expectFwd && canFwd val need_pprobe_s3_b_snpStable = req_s3.fromB && ( isSnpOnceX(req_s3.chiOpcode.get) || isSnpQuery(req_s3.chiOpcode.get) || isSnpStashX(req_s3.chiOpcode.get) - ) && dirResult_s3.hit && meta_s3.state === TRUNK && meta_has_clients_s3 + ) && dirResult_s3.hit && metaOnHit_s3.state === TRUNK && metaOnHit_has_clients_s3 val need_pprobe_s3_b_snpToB = req_s3.fromB && ( isSnpToB(req_s3.chiOpcode.get) || req_s3.chiOpcode.get === SnpCleanShared - ) && dirResult_s3.hit && meta_s3.state === TRUNK && meta_has_clients_s3 + ) && dirResult_s3.hit && metaOnHit_s3.state === TRUNK && metaOnHit_has_clients_s3 val need_pprobe_s3_b_snpToN = req_s3.fromB && ( isSnpUniqueX(req_s3.chiOpcode.get) || req_s3.chiOpcode.get === SnpCleanInvalid || isSnpMakeInvalidX(req_s3.chiOpcode.get) - ) && dirResult_s3.hit && meta_has_clients_s3 - val need_pprobe_s3_b_snpNDERR = req_s3.fromB && tagError_s3 && dirResult_s3.hit + ) && dirResult_s3.hit && metaOnHit_has_clients_s3 + val need_pprobe_s3_b_snpNDERR = req_s3.fromB && (io.dirResp_s3.errOnSnp || io.dirResp_s3.metaOnHit.tagErr) && dirResult_s3.hit val need_pprobe_s3_b = need_pprobe_s3_b_snpStable || need_pprobe_s3_b_snpToB || need_pprobe_s3_b_snpToN || need_pprobe_s3_b_snpNDERR val need_dct_s3_b = doFwd // DCT val need_mshr_s3_b = need_pprobe_s3_b || need_dct_s3_b @@ -303,7 +306,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes /* ======== Resps to SinkA/B/C Reqs ======== */ val sink_resp_s3 = WireInit(0.U.asTypeOf(Valid(new TaskBundle))) - val sink_resp_s3_a_promoteT = dirResult_s3.hit && isT(meta_s3.state) + val sink_resp_s3_a_promoteT = dirResult_s3.hit && isT(metaOnHit_s3.state) // whether L2 should respond data to HN or not val retToSrc = req_s3.retToSrc.getOrElse(false.B) @@ -526,7 +529,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val metaW_valid_s3_b = sinkB_req_s3 && !need_mshr_s3_b && dirResult_s3.hit && (!isSnpOnce(req_s3.chiOpcode.get) || (req_s3.snpHitReleaseToClean && req_s3.snpHitReleaseMeta.dirty)) && !isSnpStashX(req_s3.chiOpcode.get) && !isSnpQuery(req_s3.chiOpcode.get) && ( - meta_s3.state === TIP || meta_s3.state === BRANCH && isSnpToN(req_s3.chiOpcode.get) + metaOnHit_s3.state === TIP || metaOnHit_s3.state === BRANCH && isSnpToN(req_s3.chiOpcode.get) ) val metaW_valid_s3_c = sinkC_req_s3 && dirResult_s3.hit val metaW_valid_s3_mshr = mshr_req_s3 && req_s3.metaWen && !(mshr_refill_s3 && retry) @@ -535,37 +538,37 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val metaW_s3_a_alias = Mux( req_get_s3 || req_prefetch_s3, - meta_s3.alias.getOrElse(0.U), + metaOnHit_s3.alias.getOrElse(0.U), req_s3.alias.getOrElse(0.U) ) val metaW_s3_a = MetaEntry( - dirty = meta_s3.dirty, - state = Mux(req_needT_s3 || sink_resp_s3_a_promoteT, TRUNK, meta_s3.state), + dirty = metaOnHit_s3.dirty, + state = Mux(req_needT_s3 || sink_resp_s3_a_promoteT, TRUNK, metaOnHit_s3.state), clients = Fill(clientBits, Mux(l2Error_s3, false.B, true.B)), alias = Some(metaW_s3_a_alias), accessed = true.B, - tagErr = meta_s3.tagErr, - dataErr = meta_s3.dataErr + tagErr = metaOnHit_s3.tagErr, + dataErr = metaOnHit_s3.dataErr ) val metaW_s3_b = Mux(isSnpToN(req_s3.chiOpcode.get), MetaEntry(), MetaEntry( dirty = false.B, - state = Mux(req_s3.chiOpcode.get === SnpCleanShared, meta_s3.state, BRANCH), - clients = meta_s3.clients, - alias = meta_s3.alias, - accessed = meta_s3.accessed, - tagErr = meta_s3.tagErr, - dataErr = meta_s3.dataErr + state = Mux(req_s3.chiOpcode.get === SnpCleanShared, metaOnHit_s3.state, BRANCH), + clients = metaOnHit_s3.clients, + alias = metaOnHit_s3.alias, + accessed = metaOnHit_s3.accessed, + tagErr = metaOnHit_s3.tagErr, + dataErr = metaOnHit_s3.dataErr ) ) val metaW_s3_c = MetaEntry( - dirty = meta_s3.dirty || wen_c, - state = Mux(isParamFromT(req_s3.param), TIP, meta_s3.state), + dirty = metaOnHit_s3.dirty || wen_c, + state = Mux(isParamFromT(req_s3.param), TIP, metaOnHit_s3.state), clients = Fill(clientBits, !isToN(req_s3.param)), - alias = meta_s3.alias, - accessed = meta_s3.accessed, - tagErr = Mux(wen_c, req_s3.denied, meta_s3.tagErr), - dataErr = Mux(wen_c, req_s3.corrupt, meta_s3.dataErr) // update error when write DS + alias = metaOnHit_s3.alias, + accessed = metaOnHit_s3.accessed, + tagErr = Mux(wen_c, req_s3.denied, metaOnHit_s3.tagErr), + dataErr = Mux(wen_c, req_s3.corrupt, metaOnHit_s3.dataErr) // update error when write DS ) // use merge_meta if mergeA val metaW_s3_mshr = WireInit(Mux(req_s3.mergeA, req_s3.aMergeTask.meta, req_s3.meta)) From 75d4937aacea960c3fbd20b119b41edb4a996010 Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Wed, 29 Apr 2026 11:21:47 +0800 Subject: [PATCH 07/23] fix: use wayOH instead of way for meta choose and Dir write --- src/main/scala/coupledL2/Directory.scala | 89 ++++++++++--------- .../scala/coupledL2/tl2chi/MainPipe.scala | 23 +++-- src/main/scala/coupledL2/tl2chi/Slice.scala | 4 + src/main/scala/coupledL2/tl2tl/MainPipe.scala | 2 +- src/main/scala/coupledL2/utils/Replacer.scala | 31 +++---- src/main/scala/coupledL2/utils/maskToOH.scala | 21 +++++ 6 files changed, 104 insertions(+), 66 deletions(-) create mode 100644 src/main/scala/coupledL2/utils/maskToOH.scala diff --git a/src/main/scala/coupledL2/Directory.scala b/src/main/scala/coupledL2/Directory.scala index 44fb1224f..fc3232b19 100644 --- a/src/main/scala/coupledL2/Directory.scala +++ b/src/main/scala/coupledL2/Directory.scala @@ -26,6 +26,7 @@ import utility.sram.SRAMTemplate import org.chipsalliance.cde.config.Parameters import coupledL2.prefetch.PfSource import freechips.rocketchip.tilelink.TLMessages._ +import freechips.rocketchip.util.SeqToAugmentedSeq class MetaEntry(implicit p: Parameters) extends L2Bundle { val dirty = Bool() @@ -87,9 +88,7 @@ class DirResult(implicit p: Parameters) extends L2Bundle { val set = UInt(setBits.W) val way = UInt(wayBits.W) // hit way or victim way val meta = new MetaEntry() - val metaOnHit = new MetaEntry() val error = Bool() - val errOnSnp = Bool() val replacerInfo = new ReplacerInfo() // for TopDown usage } @@ -111,7 +110,7 @@ class MetaWrite(implicit p: Parameters) extends L2Bundle { class TagWrite(implicit p: Parameters) extends L2Bundle { val set = UInt(setBits.W) - val way = UInt(wayBits.W) + val wayOH = UInt(cacheParams.ways.W) val wtag = UInt(tagBits.W) } @@ -137,13 +136,18 @@ class Directory(implicit p: Parameters) extends L2Module { val replResp = ValidIO(new ReplacerResult) // used to count occWays for Grant to retry val msInfo = Vec(mshrsAll, Flipped(ValidIO(new MSHRInfo))) + val metaOnHit = new MetaEntry() + val errOnSnp = Bool() + val wayOH = Output(UInt(cacheParams.ways.W)) + val replWayOH = Output(UInt(cacheParams.ways.W)) }) def invalid_way_sel(metaVec: Seq[MetaEntry]) = { val invalid_vec = metaVec.map(_.state === MetaData.INVALID) val has_invalid_way = Cat(invalid_vec).orR - val way = ParallelPriorityMux(invalid_vec.zipWithIndex.map(x => x._1 -> x._2.U(wayBits.W))) - (has_invalid_way, way) + val invalid_oh = MaskToOH(invalid_vec.asUInt) + val invalid_way = OHToUInt(invalid_oh) + (has_invalid_way, invalid_way, invalid_oh) // one-hot of invalid ways } val sets = cacheParams.sets @@ -228,11 +232,14 @@ class Directory(implicit p: Parameters) extends L2Module { tagRead } tagRead_s3 := bankTagRead + when (io.tagWReq.valid) { + assert(PopCount(io.tagWReq.bits.wayOH) === 1.U, "Tag write should be one-hot") + } tagArray.io.w( tagWen, tagWrite, io.tagWReq.bits.set, - UIntToOH(io.tagWReq.bits.way) + io.tagWReq.bits.wayOH ) val bankTagError = if (enableTagECC) { @@ -278,39 +285,35 @@ class Directory(implicit p: Parameters) extends L2Module { val freeWayMask_s3 = RegEnable(~occWayMask_s2, refillReqValid_s2) val refillRetry = RegEnable(occWayMask_s2.andR, refillReqValid_s2) - val hitWay = OHToUInt(hitVec) + // val hitWay = OHToUInt(hitVec) + val hitOH = hitVec.asUInt + assert(PopCount(hitVec) <= 1.U, "Set should not have more than one hit") val replaceWay = WireInit(UInt(wayBits.W), 0.U) - val (inv, invalidWay) = invalid_way_sel(metaAll_s3) - val chosenWay = Mux(inv, invalidWay, replaceWay) - // if chosenWay not in wayMask, then choose a way in wayMask - // for retry bug fixing: if the chosenway cause retry last time, choose another way - /*val finalWay = Mux( - req_s3.wayMask(chosenWay), - chosenWay, - PriorityEncoder(req_s3.wayMask) - )*/ + val replaceOH = WireInit(UInt(ways.W), 0.U) + val (inv, invalidWay, invOH) = invalid_way_sel(metaAll_s3) + val chosenOH = Mux(inv, invOH, replaceOH) // for retry bug fixing: if the chosenway not in freewaymask, choose another way - // TODO: req_s3.wayMask not take into consideration - val finalWay = Mux( - freeWayMask_s3(chosenWay), - chosenWay, - PriorityEncoder(freeWayMask_s3) + val finalReplOH = Mux( + Mux1H(chosenOH, freeWayMask_s3), + chosenOH, + MaskToOH(freeWayMask_s3) ) val hit_s3 = Cat(hitVec).orR || req_s3.cmoAll - val way_s3 = Mux(req_s3.cmoAll, req_s3.cmoWay, Mux(hit_s3, hitWay, finalWay)) - val meta_s3 = metaAll_s3(way_s3) - val metaOnHit_s3 = metaAll_s3(hitWay) - val tag_s3 = tagAll_s3(way_s3) + val wayOH_s3 = Mux(req_s3.cmoAll, UIntToOH(req_s3.cmoWay), Mux(hit_s3, hitOH, finalReplOH)) + val way_s3 = OHToUInt(wayOH_s3) + val meta_s3 = Mux1H(wayOH_s3, metaAll_s3) + val metaOnHit_s3 = Mux1H(hitOH, metaAll_s3) // only valid when hit + val tag_s3 = Mux1H(wayOH_s3, tagAll_s3) val set_s3 = req_s3.set val replacerInfo_s3 = req_s3.replacerInfo val errorOnSNP_s3 = if (enableTagECC) { - errorAll_s3(hitWay) + Mux1H(hitOH, errorAll_s3) } else { false.B } val error_s3 = if (enableTagECC) { - errorAll_s3(way_s3) && reqValid_s3 && !req_s3.cmoAll && meta_s3.state =/= MetaData.INVALID + Mux1H(wayOH_s3, errorAll_s3) && reqValid_s3 && !req_s3.cmoAll && meta_s3.state =/= MetaData.INVALID } else { false.B } @@ -319,12 +322,13 @@ class Directory(implicit p: Parameters) extends L2Module { io.resp.bits.hit := hit_s3 io.resp.bits.way := way_s3 io.resp.bits.meta := meta_s3 - io.resp.bits.metaOnHit := metaOnHit_s3 + io.metaOnHit := metaOnHit_s3 io.resp.bits.tag := tag_s3 io.resp.bits.set := set_s3 io.resp.bits.error := error_s3 // depends on ECC - io.resp.bits.errOnSnp := errorOnSNP_s3 + io.errOnSnp := errorOnSNP_s3 io.resp.bits.replacerInfo := replacerInfo_s3 + io.wayOH := wayOH_s3 dontTouch(io) dontTouch(metaArray.io) @@ -345,16 +349,19 @@ class Directory(implicit p: Parameters) extends L2Module { repl_state } - replaceWay := repl.get_replace_way(repl_state_s3) + replaceOH := repl.get_replace_way(repl_state_s3) + assert(PopCount(replaceOH) === 1.U, "Replacement way should be one-hot") + replaceWay := OHToUInt(replaceOH) io.replResp.valid := refillReqValid_s3 - io.replResp.bits.tag := tagAll_s3(finalWay) + io.replResp.bits.tag := Mux1H(finalReplOH, tagAll_s3) io.replResp.bits.set := req_s3.set - io.replResp.bits.way := finalWay - io.replResp.bits.meta := metaAll_s3(finalWay) + io.replResp.bits.way := OHToUInt(finalReplOH) + io.replResp.bits.meta := Mux1H(finalReplOH, metaAll_s3) io.replResp.bits.mshrId := req_s3.mshrId io.replResp.bits.retry := refillRetry io.replResp.bits.validHold := refillReqValid_hold_s3 + io.replWayOH := finalReplOH /* ====== Update ====== */ // PLRU: update replacer only when A hit or refill, at stage 3 @@ -378,20 +385,20 @@ class Directory(implicit p: Parameters) extends L2Module { val origin_bits_r = origin_bit_opt.get.io.r(io.read.fire, io.read.bits.set).resp.data val origin_bits_hold = Wire(Vec(ways, Bool())) origin_bits_hold := HoldUnless(origin_bits_r, RegNext(io.read.fire, false.B)) - origin_bit_opt.get.io.w(replacerWen, hit_s3, req_s3.set, UIntToOH(way_s3)) + origin_bit_opt.get.io.w(replacerWen, hit_s3, req_s3.set, wayOH_s3) val rrip_req_type = WireInit(0.U(4.W)) // [3]: 0-firstuse, 1-reuse; // [2]: 0-acquire, 1-release; // [1]: 0-non-prefetch, 1-prefetch; // [0]: 0-not-refill, 1-refill - rrip_req_type := Cat(origin_bits_hold(way_s3), + rrip_req_type := Cat(Mux1H(hitOH, origin_bits_hold), req_s3.replacerInfo.channel(2), - (!refillReqValid_s3 && req_s3.replacerInfo.channel(0) && req_s3.replacerInfo.opcode === Hint) || (req_s3.replacerInfo.channel(2) && metaAll_s3(way_s3).prefetch.getOrElse(false.B)) || (refillReqValid_s3 && req_s3.replacerInfo.refill_prefetch), + (!refillReqValid_s3 && req_s3.replacerInfo.channel(0) && req_s3.replacerInfo.opcode === Hint) || (req_s3.replacerInfo.channel(2) && Mux1H(wayOH_s3, metaAll_s3).prefetch.getOrElse(false.B)) || (refillReqValid_s3 && req_s3.replacerInfo.refill_prefetch), req_s3.refill ) private val mbistPl = MbistPipeline.PlaceMbistPipeline(1, "L2Directory", mbist) if(cacheParams.replacement == "srrip"){ - val next_state_s3 = repl.get_next_state(repl_state_s3, way_s3, hit_s3, inv, rrip_req_type) + val next_state_s3 = repl.get_next_state(repl_state_s3, wayOH_s3, hit_s3, inv, rrip_req_type) val repl_init = Wire(Vec(ways, UInt(2.W))) repl_init.foreach(_ := 2.U(2.W)) replacer_sram_opt.get.io.w(replacerWen, next_state_s3, set_s3, 1.U) @@ -419,7 +426,7 @@ class Directory(implicit p: Parameters) extends L2Module { Mux(match_b, true.B, Mux(PSEL(9)===0.U, false.B, true.B))) // false.B - srrip, true.B - brrip - val next_state_s3 = repl.get_next_state(repl_state_s3, way_s3, hit_s3, inv, repl_type, rrip_req_type) + val next_state_s3 = repl.get_next_state(repl_state_s3, wayOH_s3, hit_s3, inv, repl_type, rrip_req_type) val repl_init = Wire(Vec(ways, UInt(2.W))) repl_init.foreach(_ := 2.U(2.W)) @@ -432,7 +439,7 @@ class Directory(implicit p: Parameters) extends L2Module { /* ====== Reset ====== */ XSPerfAccumulate("dirRead_cnt", io.read.fire) - XSPerfAccumulate("choose_busy_way", reqValid_s3 && !req_s3.wayMask(chosenWay)) + XSPerfAccumulate("choose_busy_way", reqValid_s3 && !Mux1H(chosenOH, req_s3.wayMask)) /* ====== ChiselDB logging for prefetcher lifecycle ====== */ if (cacheParams.enableMonitor && !cacheParams.FPGAPlatform) { @@ -447,7 +454,7 @@ class Directory(implicit p: Parameters) extends L2Module { val pfReqWriteEn = io.metaWReq.valid && wmeta.prefetch.getOrElse(false.B) val pfReqWrite = Wire(new PrefetchDbEntry) val writeHasTag = io.tagWReq.valid && (io.tagWReq.bits.set === io.metaWReq.bits.set) && - (OHToUInt(io.metaWReq.bits.wayOH) === io.tagWReq.bits.way) // try to attach tag when tagWReq coincides with metaWReq + (io.metaWReq.bits.wayOH === io.tagWReq.bits.wayOH) // try to attach tag when tagWReq coincides with metaWReq pfReqWrite.isHit := false.B //useless for write req, just set it to false.B pfReqWrite.setIdx := io.metaWReq.bits.set // when meta write, the set idx to be written @@ -472,7 +479,7 @@ class Directory(implicit p: Parameters) extends L2Module { // Eviction: when Directory issues a replacement for a prefetched block val evictBlockEn = io.replResp.valid && !io.replResp.bits.retry - val evictBlockMeta = metaAll_s3(finalWay) // meta of the block to be evicted + val evictBlockMeta = Mux1H(finalReplOH, metaAll_s3) // meta of the block to be evicted val pfReqEvictEn = evictBlockEn && evictBlockMeta.prefetch.getOrElse(false.B) val pfReqEvict = Wire(new PrefetchDbEntry) diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index 7042be349..dc9bb1279 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -53,6 +53,10 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes /* get dir result at stage 3 */ val dirResp_s3 = Input(new DirResult()) + val metaOnHit_s3 = Input(new MetaEntry()) + val errOnSnp_s3 = Input(Bool()) + val dirWayOH_s3 = Input(UInt(cacheParams.ways.W)) + val dirReplWayOH_s3 = Input(UInt(cacheParams.ways.W)) val replResp = Flipped(ValidIO(new ReplacerResult())) /* send task to MSHRCtl at stage 3 */ @@ -137,6 +141,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes /* ======== Stage 2 ======== */ val task_s2 = io.taskFromArb_s2 + val reqWayOH_s2 = UIntToOH(task_s2.bits.way) /* ======== Stage 3 ======== */ val task_s3 = RegInit(0.U.asTypeOf(Valid(new TaskBundle))) @@ -144,11 +149,12 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes when (task_s2.valid) { task_s3.bits := task_s2.bits } + val reqWayOH_s3 = RegEnable(reqWayOH_s2, task_s2.valid) /* ======== Enchantment ======== */ val dirResult_s3 = io.dirResp_s3 val meta_s3 = dirResult_s3.meta - val metaOnHit_s3 = dirResult_s3.metaOnHit + val metaOnHit_s3 = io.metaOnHit_s3 val req_s3 = task_s3.bits val cmoHitInvalid = io.cmoAllBlock.getOrElse(false.B) && (meta_s3.state === INVALID) @@ -212,7 +218,6 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes // was always non-hit on cache replacement subsequent release. nestable_dirResult_s3.hit := req_s3.snpHitReleaseMeta.state =/= INVALID nestable_dirResult_s3.meta := req_s3.snpHitReleaseMeta - nestable_dirResult_s3.metaOnHit := req_s3.snpHitReleaseMeta nestable_dirResult_s3.set := req_s3.set nestable_dirResult_s3.tag := req_s3.tag } @@ -268,7 +273,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes */ // whether L2 should do forwarding or not val expectFwd = isSnpXFwd(req_s3.chiOpcode.get) - val canFwd = nestable_dirResult_s3.hit && !(nestable_dirResult_s3.metaOnHit.tagErr || nestable_dirResult_s3.errOnSnp) + val canFwd = nestable_dirResult_s3.hit && !(Mux(req_s3.snpHitRelease, req_s3.snpHitReleaseMeta.tagErr, io.metaOnHit_s3.tagErr) || io.errOnSnp_s3) val doFwd = expectFwd && canFwd val need_pprobe_s3_b_snpStable = req_s3.fromB && ( isSnpOnceX(req_s3.chiOpcode.get) || isSnpQuery(req_s3.chiOpcode.get) || isSnpStashX(req_s3.chiOpcode.get) @@ -282,7 +287,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes req_s3.chiOpcode.get === SnpCleanInvalid || isSnpMakeInvalidX(req_s3.chiOpcode.get) ) && dirResult_s3.hit && metaOnHit_has_clients_s3 - val need_pprobe_s3_b_snpNDERR = req_s3.fromB && (io.dirResp_s3.errOnSnp || io.dirResp_s3.metaOnHit.tagErr) && dirResult_s3.hit + val need_pprobe_s3_b_snpNDERR = req_s3.fromB && (io.errOnSnp_s3 || io.metaOnHit_s3.tagErr) && dirResult_s3.hit val need_pprobe_s3_b = need_pprobe_s3_b_snpStable || need_pprobe_s3_b_snpToB || need_pprobe_s3_b_snpToN || need_pprobe_s3_b_snpNDERR val need_dct_s3_b = doFwd // DCT val need_mshr_s3_b = need_pprobe_s3_b || need_dct_s3_b @@ -576,17 +581,17 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes metaW_s3_mshr.dataErr := req_s3.corrupt val metaW_s3_cmo = MetaEntry() // invalid the block - val metaW_way = Mux( + val metaW_wayOH = Mux( mshr_refill_s3 && req_s3.replTask, - io.replResp.bits.way, // grant always use replResp way - Mux(mshr_req_s3, req_s3.way, dirResult_s3.way) + io.dirReplWayOH_s3, // grant always use replResp way + Mux(mshr_req_s3, reqWayOH_s3, io.dirWayOH_s3) ) io.metaWReq.valid := !resetFinish || task_s3.valid && ( metaW_valid_s3_a || metaW_valid_s3_b || metaW_valid_s3_c || metaW_valid_s3_mshr || metaW_valid_s3_cmo ) io.metaWReq.bits.set := Mux(resetFinish, req_s3.set, resetIdx) - io.metaWReq.bits.wayOH := Mux(resetFinish, UIntToOH(metaW_way), Fill(cacheParams.ways, true.B)) + io.metaWReq.bits.wayOH := Mux(resetFinish, metaW_wayOH, Fill(cacheParams.ways, true.B)) io.metaWReq.bits.wmeta := Mux( resetFinish, ParallelPriorityMux( @@ -598,7 +603,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes io.tagWReq.valid := task_s3.valid && req_s3.tagWen && mshr_refill_s3 && !retry io.tagWReq.bits.set := req_s3.set - io.tagWReq.bits.way := Mux(mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, req_s3.way) + io.tagWReq.bits.wayOH := Mux(mshr_refill_s3 && req_s3.replTask, io.dirReplWayOH_s3, reqWayOH_s3) io.tagWReq.bits.wtag := req_s3.tag sink_resp_s3_b_metaWen := metaW_valid_s3_b diff --git a/src/main/scala/coupledL2/tl2chi/Slice.scala b/src/main/scala/coupledL2/tl2chi/Slice.scala index d3d23fb44..4f59ca3db 100644 --- a/src/main/scala/coupledL2/tl2chi/Slice.scala +++ b/src/main/scala/coupledL2/tl2chi/Slice.scala @@ -116,6 +116,10 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] mainPipe.io.bufResp := sinkC.io.bufResp mainPipe.io.dirResp_s3 := directory.io.resp.bits mainPipe.io.replResp := directory.io.replResp + mainPipe.io.metaOnHit_s3 := directory.io.metaOnHit + mainPipe.io.errOnSnp_s3 := directory.io.errOnSnp + mainPipe.io.dirWayOH_s3 := directory.io.wayOH + mainPipe.io.dirReplWayOH_s3 := directory.io.replWayOH mainPipe.io.fromMSHRCtl <> mshrCtl.io.toMainPipe mainPipe.io.bufResp := sinkC.io.bufResp mainPipe.io.refillBufResp_s3.valid := RegNext(refillBuf.io.r.valid, false.B) diff --git a/src/main/scala/coupledL2/tl2tl/MainPipe.scala b/src/main/scala/coupledL2/tl2tl/MainPipe.scala index 3ca8ec546..c1988309b 100644 --- a/src/main/scala/coupledL2/tl2tl/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2tl/MainPipe.scala @@ -401,7 +401,7 @@ class MainPipe(implicit p: Parameters) extends L2Module with HasPerfEvents { io.tagWReq.valid := task_s3.valid && req_s3.tagWen && mshr_refill_s3 && !retry io.tagWReq.bits.set := req_s3.set - io.tagWReq.bits.way := Mux(mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, req_s3.way) + io.tagWReq.bits.wayOH := UIntToOH(Mux(mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, req_s3.way)) io.tagWReq.bits.wtag := req_s3.tag /* ======== Interact with Channels (C & D) ======== */ diff --git a/src/main/scala/coupledL2/utils/Replacer.scala b/src/main/scala/coupledL2/utils/Replacer.scala index 9ca7c623a..a17102aae 100644 --- a/src/main/scala/coupledL2/utils/Replacer.scala +++ b/src/main/scala/coupledL2/utils/Replacer.scala @@ -330,28 +330,28 @@ class StaticRRIP(n_ways: Int) extends ReplacementPolicy { def access(touch_ways: Seq[Valid[UInt]]) = {} def get_next_state(state: UInt, touch_way: UInt) = 0.U //DontCare - override def get_next_state(state: UInt, touch_way: UInt, hit: Bool, invalid: Bool, req_type: UInt): UInt = { + override def get_next_state(state: UInt, touch_wayOH: UInt, hit: Bool, invalid: Bool, req_type: UInt): UInt = { val State = Wire(Vec(n_ways, UInt(2.W))) val nextState = Wire(Vec(n_ways, UInt(2.W))) State.zipWithIndex.map { case (e, i) => e := state(2*i+1,2*i) } // hit-Promotion, miss-Insertion & Aging - val increcement = 3.U(2.W) - State(touch_way) + val increcement = 3.U(2.W) - Mux1H(touch_wayOH, State) // req_type[3]: 0-firstuse, 1-reuse; req_type[2]: 0-acquire, 1-release; // req_type[1]: 0-non-prefetch, 1-prefetch; req_type[0]: 0-not-refill, 1-refill // rrpv: non-pref_hit/non-pref_refill(miss)/non-pref_release_reuse = 0; // pref_hit do nothing; pref_refill = 1; non-pref_release_firstuse/pref_release = 2; - nextState.zipWithIndex.map { case (e, i) => - e := Mux(i.U === touch_way, + nextState.zip(State).zip(touch_wayOH.asBools).map { case ((e, s), w) => + e := Mux(w, // for touch_way - MuxCase(State(i), Seq( + MuxCase(s, Seq( ((req_type(2,0) === 0.U && hit) || req_type(2,0) === 1.U || req_type === 12.U) -> 0.U, (req_type(2,0) === 3.U) -> 1.U, (req_type === 4.U || req_type(2,0) === 6.U) -> 2.U )), // for other ways - Mux(hit || invalid, State(i), State(i)+increcement) + Mux(hit || invalid, s, s+increcement) ) } Cat(nextState.map(x=>x).reverse) @@ -392,7 +392,7 @@ class BRRIP(n_ways: Int) extends ReplacementPolicy { def access(touch_ways: Seq[Valid[UInt]]) = {} def get_next_state(state: UInt, touch_way: UInt) = 0.U //DontCare - override def get_next_state(state: UInt, touch_way: UInt, hit: Bool, invalid: Bool, req_type: UInt): UInt = { + override def get_next_state(state: UInt, touch_wayOH: UInt, hit: Bool, invalid: Bool, req_type: UInt): UInt = { val State = Wire(Vec(n_ways, UInt(2.W))) val nextState = Wire(Vec(n_ways, UInt(2.W))) State.zipWithIndex.map { case (e, i) => @@ -400,21 +400,21 @@ class BRRIP(n_ways: Int) extends ReplacementPolicy { } // hit-Promotion, miss-Insertion & Aging - val increcement = 3.U(2.W) - State(touch_way) + val increcement = 3.U(2.W) - Mux1H(touch_wayOH, State) // req_type[3]: 0-firstuse, 1-reuse; req_type[2]: 0-acquire, 1-release; // req_type[1]: 0-non-prefetch, 1-prefetch; req_type[0]: 0-not-refill, 1-refill // rrpv: non-pref_hit/non-pref_refill(miss)/non-pref_release_reuse = 0; // pref_hit do nothing; pref_refill = 1; non-pref_release_firstuse/pref_release = 3; - nextState.zipWithIndex.map { case (e, i) => - e := Mux(i.U === touch_way, + nextState.zip(State).zip(touch_wayOH.asBools).map { case ((e, s), w) => + e := Mux(w, // for touch_way - MuxCase(State(i), Seq( + MuxCase(s, Seq( ((req_type(2,0) === 0.U && hit) || req_type(2,0) === 1.U || req_type === 12.U) -> 0.U, (req_type(2,0) === 3.U) -> 1.U, (req_type === 4.U || req_type(2,0) === 6.U) -> 3.U )), // for other ways - Mux(hit || invalid, State(i), State(i)+increcement) + Mux(hit || invalid, s, s+increcement) ) } /* val random = (rand.nextInt(32)).U @@ -465,8 +465,8 @@ class DRRIP(n_ways: Int) extends ReplacementPolicy { def hit = {} def get_next_state(state: UInt, touch_way: UInt) = 0.U //DontCare - override def get_next_state(state: UInt, touch_way: UInt, hit: Bool, invalid: Bool, chosen_type: Bool, req_type: UInt): UInt = { - Mux(chosen_type, repl_BRRIP.get_next_state(state, touch_way, hit, invalid, req_type), repl_SRRIP.get_next_state(state, touch_way, hit, invalid, req_type)) + override def get_next_state(state: UInt, touch_wayOH: UInt, hit: Bool, invalid: Bool, chosen_type: Bool, req_type: UInt): UInt = { + Mux(chosen_type, repl_BRRIP.get_next_state(state, touch_wayOH, hit, invalid, req_type), repl_SRRIP.get_next_state(state, touch_wayOH, hit, invalid, req_type)) } def get_replace_way(state: UInt): UInt = { val RRPVVec = Wire(Vec(n_ways, UInt(2.W))) @@ -482,7 +482,8 @@ class DRRIP(n_ways: Int) extends ReplacementPolicy { } e := !(isLarger.contains(true.B)) } - PriorityEncoder(lrrWayVec) + // PriorityEncoder(lrrWayVec) + MaskToOH(lrrWayVec.asUInt) } } \ No newline at end of file diff --git a/src/main/scala/coupledL2/utils/maskToOH.scala b/src/main/scala/coupledL2/utils/maskToOH.scala new file mode 100644 index 000000000..a4adef0c1 --- /dev/null +++ b/src/main/scala/coupledL2/utils/maskToOH.scala @@ -0,0 +1,21 @@ +package coupledL2.utils + +import chisel3._ +import chisel3.util._ +import utility.ParallelOR + +object MaskToOH { + def apply(mask: UInt): UInt = { + val width = mask.getWidth + val oh = Wire(Vec(width, Bool())) + for (i <- 0 until width) { + if (i == 0) { + oh(i) := mask(i) + } else { + oh(i) := mask(i) && !mask(i-1, 0).orR + } + } + assert(oh.asUInt === PriorityEncoderOH(mask), "MaskToOH should give the same result as PriorityEncoderOH") + oh.asUInt + } +} \ No newline at end of file From e77c0165eb67f8cdedeb41634902d39a72e24d7e Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Sat, 9 May 2026 11:05:05 +0800 Subject: [PATCH 08/23] timing(Directory): move tag ecc to s3 --- src/main/scala/coupledL2/Directory.scala | 42 +++++++++---------- .../scala/coupledL2/tl2chi/MainPipe.scala | 6 ++- .../{maskToOH.scala => OHOperation.scala} | 2 +- 3 files changed, 24 insertions(+), 26 deletions(-) rename src/main/scala/coupledL2/utils/{maskToOH.scala => OHOperation.scala} (87%) diff --git a/src/main/scala/coupledL2/Directory.scala b/src/main/scala/coupledL2/Directory.scala index fc3232b19..fa0e4a4a5 100644 --- a/src/main/scala/coupledL2/Directory.scala +++ b/src/main/scala/coupledL2/Directory.scala @@ -191,9 +191,7 @@ class Directory(implicit p: Parameters) extends L2Module { val metaArray = Module(new SRAMTemplate(new MetaEntry, sets, ways, singlePort = true, hasMbist = mbist, hasSramCtl = hasSramCtl)) - val tagRead_s3 = Wire(Vec(ways, UInt(tagBits.W))) val metaRead = Wire(Vec(ways, new MetaEntry())) - val errorRead = Wire(Vec(ways, Bool())) // Replacer val repl = ReplacementPolicy.fromString(cacheParams.replacement, ways) @@ -224,14 +222,6 @@ class Directory(implicit p: Parameters) extends L2Module { io.tagWReq.bits.wtag } val tagRead = tagArray.io.r(io.read.fire, io.read.bits.set).resp.data - val bankTagRead = if (enableTagECC) { - tagRead.map(x => - Cat(VecInit(Seq.tabulate(tagBankSplit)(i => x(encTagBankBits * (i + 1) - 1, encTagBankBits * i)(tagBankBits - 1, 0)))) - ) - } else { - tagRead - } - tagRead_s3 := bankTagRead when (io.tagWReq.valid) { assert(PopCount(io.tagWReq.bits.wayOH) === 1.U, "Tag write should be one-hot") } @@ -242,16 +232,6 @@ class Directory(implicit p: Parameters) extends L2Module { io.tagWReq.bits.wayOH ) - val bankTagError = if (enableTagECC) { - tagRead.map(x => - VecInit(Seq.tabulate(tagBankSplit)(i => x(encTagBankBits * (i + 1) - 1, encTagBankBits * i))). - map(tag => cacheParams.dataCode.decode(tag).error).reduce(_ | _) - ) - } else { - VecInit(Seq.fill(ways)(false.B)) - } - errorRead := bankTagError - // Meta R/W metaRead := metaArray.io.r(io.read.fire, io.read.bits.set).resp.data metaArray.io.w( @@ -262,8 +242,22 @@ class Directory(implicit p: Parameters) extends L2Module { ) val metaAll_s3 = RegEnable(metaRead, 0.U.asTypeOf(metaRead), reqValid_s2) - val tagAll_s3 = RegEnable(tagRead_s3, 0.U.asTypeOf(tagRead_s3), reqValid_s2) - val errorAll_s3 = RegEnable(errorRead, 0.U.asTypeOf(errorRead), reqValid_s2) + val tagRead_s3 = RegEnable(tagRead, reqValid_s2) + val tagAll_s3 = if (enableTagECC) { + tagRead_s3.map(x => + Cat(VecInit(Seq.tabulate(tagBankSplit)(i => x(encTagBankBits * (i + 1) - 1, encTagBankBits * i)(tagBankBits - 1, 0)))) + ) + } else { + tagRead_s3 + } + val errorAll_s3 = if (enableTagECC) { + tagRead_s3.map(x => + VecInit(Seq.tabulate(tagBankSplit)(i => x(encTagBankBits * (i + 1) - 1, encTagBankBits * i))). + map(tag => cacheParams.dataCode.decode(tag).error).reduce(_ | _) + ) + } else { + VecInit(Seq.fill(ways)(false.B)) + } val tagMatchVec = tagAll_s3.map(_ (tagBits - 1, 0) === req_s3.tag) val metaValidVec = metaAll_s3.map(_.state =/= MetaData.INVALID) @@ -393,7 +387,9 @@ class Directory(implicit p: Parameters) extends L2Module { // [0]: 0-not-refill, 1-refill rrip_req_type := Cat(Mux1H(hitOH, origin_bits_hold), req_s3.replacerInfo.channel(2), - (!refillReqValid_s3 && req_s3.replacerInfo.channel(0) && req_s3.replacerInfo.opcode === Hint) || (req_s3.replacerInfo.channel(2) && Mux1H(wayOH_s3, metaAll_s3).prefetch.getOrElse(false.B)) || (refillReqValid_s3 && req_s3.replacerInfo.refill_prefetch), + (!refillReqValid_s3 && req_s3.replacerInfo.channel(0) && req_s3.replacerInfo.opcode === Hint) || + (req_s3.replacerInfo.channel(2) && Mux1H(wayOH_s3, metaAll_s3).prefetch.getOrElse(false.B)) || + (refillReqValid_s3 && req_s3.replacerInfo.refill_prefetch), req_s3.refill ) private val mbistPl = MbistPipeline.PlaceMbistPipeline(1, "L2Directory", mbist) diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index dc9bb1279..2d4e268cc 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -273,7 +273,8 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes */ // whether L2 should do forwarding or not val expectFwd = isSnpXFwd(req_s3.chiOpcode.get) - val canFwd = nestable_dirResult_s3.hit && !(Mux(req_s3.snpHitRelease, req_s3.snpHitReleaseMeta.tagErr, io.metaOnHit_s3.tagErr) || io.errOnSnp_s3) + val canFwd = nestable_dirResult_s3.hit && + !(Mux(req_s3.snpHitRelease, req_s3.snpHitReleaseMeta.tagErr, io.metaOnHit_s3.tagErr) || io.errOnSnp_s3) val doFwd = expectFwd && canFwd val need_pprobe_s3_b_snpStable = req_s3.fromB && ( isSnpOnceX(req_s3.chiOpcode.get) || isSnpQuery(req_s3.chiOpcode.get) || isSnpStashX(req_s3.chiOpcode.get) @@ -288,7 +289,8 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes isSnpMakeInvalidX(req_s3.chiOpcode.get) ) && dirResult_s3.hit && metaOnHit_has_clients_s3 val need_pprobe_s3_b_snpNDERR = req_s3.fromB && (io.errOnSnp_s3 || io.metaOnHit_s3.tagErr) && dirResult_s3.hit - val need_pprobe_s3_b = need_pprobe_s3_b_snpStable || need_pprobe_s3_b_snpToB || need_pprobe_s3_b_snpToN || need_pprobe_s3_b_snpNDERR + val need_pprobe_s3_b = need_pprobe_s3_b_snpStable || need_pprobe_s3_b_snpToB || + need_pprobe_s3_b_snpToN || need_pprobe_s3_b_snpNDERR val need_dct_s3_b = doFwd // DCT val need_mshr_s3_b = need_pprobe_s3_b || need_dct_s3_b diff --git a/src/main/scala/coupledL2/utils/maskToOH.scala b/src/main/scala/coupledL2/utils/OHOperation.scala similarity index 87% rename from src/main/scala/coupledL2/utils/maskToOH.scala rename to src/main/scala/coupledL2/utils/OHOperation.scala index a4adef0c1..88c3e72a9 100644 --- a/src/main/scala/coupledL2/utils/maskToOH.scala +++ b/src/main/scala/coupledL2/utils/OHOperation.scala @@ -12,7 +12,7 @@ object MaskToOH { if (i == 0) { oh(i) := mask(i) } else { - oh(i) := mask(i) && !mask(i-1, 0).orR + oh(i) := mask(i) && !ParallelOR(mask(i-1, 0).asBools) } } assert(oh.asUInt === PriorityEncoderOH(mask), "MaskToOH should give the same result as PriorityEncoderOH") From 54e559c1ab743f7070308b966bacc93d3cc12637 Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Wed, 1 Apr 2026 16:14:56 +0800 Subject: [PATCH 09/23] timing(MainPipe, TXxxx): decouple source_req_s3 and status_vec_toTX Connection of source_req_s3 and status_vec_toTX will generate a path from replace logic to TX Module, which causes critical path. So just use task_s3 as status_vec_toTX[0]. This may cause a decrease performance of snoop. --- src/main/scala/coupledL2/tl2chi/MainPipe.scala | 2 +- src/main/scala/coupledL2/tl2chi/TXDAT.scala | 10 +++++----- src/main/scala/coupledL2/tl2chi/TXRSP.scala | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index 2d4e268cc..03098a843 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -920,7 +920,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes // capacity control of TX channels val tx_task_s3 = Wire(Valid(new TaskBundle)) tx_task_s3.valid := task_s3.valid // TODO: review this - tx_task_s3.bits := source_req_s3 + tx_task_s3.bits := req_s3 val tasks = Seq(tx_task_s3, task_s4, task_s5) io.status_vec_toTX.zip(tasks).foreach { case (status, task) => status.valid := task.valid diff --git a/src/main/scala/coupledL2/tl2chi/TXDAT.scala b/src/main/scala/coupledL2/tl2chi/TXDAT.scala index 98e37a11b..1b71a9da1 100644 --- a/src/main/scala/coupledL2/tl2chi/TXDAT.scala +++ b/src/main/scala/coupledL2/tl2chi/TXDAT.scala @@ -62,13 +62,13 @@ class TXDAT(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { val queueCnt = queue.io.count // TODO: this may be imprecise, review this later val pipeStatus_s1_s5 = io.pipeStatusVec - val pipeStatus_s1_s2 = pipeStatus_s1_s5.take(2) - val pipeStatus_s2 = pipeStatus_s1_s2.tail - val pipeStatus_s3_s5 = pipeStatus_s1_s5.drop(2) + val pipeStatus_s1_s3 = pipeStatus_s1_s5.take(3) + val pipeStatus_s2_s3 = pipeStatus_s1_s3.tail + val pipeStatus_s4_s5 = pipeStatus_s1_s5.drop(3) // inflightCnt equals the number of reqs on s2~s5 that may flow into TXDAT soon, plus queueCnt. // The calculation of inflightCnt might be imprecise and leads to false positive back pressue. - val inflightCnt = PopCount(Cat(pipeStatus_s3_s5.map(s => s.valid && s.bits.toTXDAT && (s.bits.fromB || s.bits.mshrTask)))) + - PopCount(Cat(pipeStatus_s2.map(s => s.valid && Mux(s.bits.mshrTask, s.bits.toTXDAT, s.bits.fromB)))) + + val inflightCnt = PopCount(Cat(pipeStatus_s4_s5.map(s => s.valid && s.bits.toTXDAT && (s.bits.fromB || s.bits.mshrTask)))) + + PopCount(Cat(pipeStatus_s2_s3.map(s => s.valid && Mux(s.bits.mshrTask, s.bits.toTXDAT, s.bits.fromB)))) + queueCnt assert(inflightCnt <= mshrsAll.U, "in-flight overflow at TXDAT") diff --git a/src/main/scala/coupledL2/tl2chi/TXRSP.scala b/src/main/scala/coupledL2/tl2chi/TXRSP.scala index 53079f4f6..1a136b9b8 100644 --- a/src/main/scala/coupledL2/tl2chi/TXRSP.scala +++ b/src/main/scala/coupledL2/tl2chi/TXRSP.scala @@ -51,13 +51,13 @@ class TXRSP(implicit p: Parameters) extends TL2CHIL2Module { val queueCnt = queue.io.count // TODO: this may be imprecise, review this later val pipeStatus_s1_s5 = io.pipeStatusVec - val pipeStatus_s1_s2 = pipeStatus_s1_s5.take(2) - val pipeStatus_s2 = pipeStatus_s1_s2.tail - val pipeStatus_s3_s5 = pipeStatus_s1_s5.drop(2) + val pipeStatus_s1_s3 = pipeStatus_s1_s5.take(3) + val pipeStatus_s2_s3 = pipeStatus_s1_s3.tail + val pipeStatus_s4_s5 = pipeStatus_s1_s5.drop(3) // inflightCnt equals the number of reqs on s2~s5 that may flow into TXRSP soon, plus queueCnt. // The calculation of inflightCnt might be imprecise and leads to false positive back pressue. - val inflightCnt = PopCount(Cat(pipeStatus_s3_s5.map(s => s.valid && s.bits.toTXRSP && (s.bits.fromB || s.bits.mshrTask)))) + - PopCount(Cat(pipeStatus_s2.map(s => s.valid && Mux(s.bits.mshrTask, s.bits.toTXRSP, s.bits.fromB)))) + + val inflightCnt = PopCount(Cat(pipeStatus_s4_s5.map(s => s.valid && s.bits.toTXRSP && (s.bits.fromB || s.bits.mshrTask)))) + + PopCount(Cat(pipeStatus_s2_s3.map(s => s.valid && Mux(s.bits.mshrTask, s.bits.toTXRSP, s.bits.fromB)))) + queueCnt assert(inflightCnt <= mshrsAll.U, "in-flight overflow at TXRSP") From a616c52156c1b0923d913233d6864c02c9f38713 Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Thu, 16 Apr 2026 10:43:47 +0800 Subject: [PATCH 10/23] fix(Directory): pass origin bit read using RegEnable Passing origin bit sram read by HoldUless will cause critical path from origin bit sram read resp port to origin bit sram write port. So use RegEnable to pass it. --- src/main/scala/coupledL2/Directory.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/coupledL2/Directory.scala b/src/main/scala/coupledL2/Directory.scala index fa0e4a4a5..85aac89d8 100644 --- a/src/main/scala/coupledL2/Directory.scala +++ b/src/main/scala/coupledL2/Directory.scala @@ -378,8 +378,8 @@ class Directory(implicit p: Parameters) extends L2Module { Some(Module(new SRAMTemplate(Bool(), sets, ways, singlePort = true, shouldReset = true, hasMbist = mbist, hasSramCtl = hasSramCtl))) val origin_bits_r = origin_bit_opt.get.io.r(io.read.fire, io.read.bits.set).resp.data val origin_bits_hold = Wire(Vec(ways, Bool())) - origin_bits_hold := HoldUnless(origin_bits_r, RegNext(io.read.fire, false.B)) - origin_bit_opt.get.io.w(replacerWen, hit_s3, req_s3.set, wayOH_s3) + origin_bits_hold := RegEnable(origin_bits_r, reqValid_s2) + origin_bit_opt.get.io.w(replacerWen, hit_s3, set_s3, wayOH_s3) val rrip_req_type = WireInit(0.U(4.W)) // [3]: 0-firstuse, 1-reuse; // [2]: 0-acquire, 1-release; From 3ccc4210abb513d2897574d9bff74ddb53f91c09 Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Thu, 2 Apr 2026 16:55:45 +0800 Subject: [PATCH 11/23] fix(RequestBuffer): use ParallelOR to merge compare set result --- src/main/scala/coupledL2/RequestBuffer.scala | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main/scala/coupledL2/RequestBuffer.scala b/src/main/scala/coupledL2/RequestBuffer.scala index 2e5eb941c..2c1dd7f0b 100644 --- a/src/main/scala/coupledL2/RequestBuffer.scala +++ b/src/main/scala/coupledL2/RequestBuffer.scala @@ -115,6 +115,7 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete def conflictMask(a: TaskBundle): UInt = VecInit(io.mshrInfo.map(s => s.valid && addrConflict(a, s.bits) && !s.bits.willFree)).asUInt def conflict(a: TaskBundle): Bool = conflictMask(a).orR + def parallelConflict(a: TaskBundle): Bool = ParallelOR(conflictMask(a).asBools) def conflictMaskFromA(a: TaskBundle): UInt = conflictMask(a) & VecInit(io.mshrInfo.map(_.bits.fromA)).asUInt @@ -144,11 +145,11 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete val in = io.in.bits val full = Cat(buffer.map(_.valid)).andR - // - val mshrConflictMask = conflictMask(in) - val mshrConflictMaskFromA = conflictMaskFromA(in) - dontTouch(mshrConflictMask) - dontTouch(mshrConflictMaskFromA) + + // val mshrConflictMask = conflictMask(in) + // val mshrConflictMaskFromA = conflictMaskFromA(in) + // dontTouch(mshrConflictMask) + // dontTouch(mshrConflictMaskFromA) // incoming Acquire can be merged with late_pf MSHR block val mergeAMask = VecInit(io.mshrInfo.map(s => @@ -177,7 +178,7 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete def noFreeWay(task: TaskBundle): Bool = noFreeWayForSet(task.set) // flow not allowed when full, or entries might starve - val canFlow = flow.B && !full && !conflict(in) && !chosenQValid && !Cat(io.mainPipeBlock).orR && !noFreeWay(in) + val canFlow = flow.B && !full && !parallelConflict(in) && !chosenQValid && !Cat(io.mainPipeBlock).orR && !noFreeWay(in) val doFlow = canFlow && io.out.ready // val depMask = buffer.map(e => e.valid && sameAddr(io.in.bits, e.task)) From fb8aac105599a056fe5b785560d96a292a5ffb04 Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Thu, 2 Apr 2026 17:24:05 +0800 Subject: [PATCH 12/23] Fix(aMergeTask): Fix aMergeTask timing When receiving aMergeTask during mshr task fire, mshr should immediatly pass aMergeTask ti mshr_tasj_arb, causing direct connection from TLBuffer --> SinkA --> reqBuffer --> mshr --> mshrCtl/arb. So dong't merge aMergeTask when mshr_task is going to fire. --- src/main/scala/coupledL2/Common.scala | 1 + src/main/scala/coupledL2/RequestBuffer.scala | 2 +- src/main/scala/coupledL2/tl2chi/MSHR.scala | 5 +++-- src/main/scala/coupledL2/tl2tl/MSHR.scala | 5 +++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/main/scala/coupledL2/Common.scala b/src/main/scala/coupledL2/Common.scala index cf69bb6a5..8025d1fe8 100644 --- a/src/main/scala/coupledL2/Common.scala +++ b/src/main/scala/coupledL2/Common.scala @@ -262,6 +262,7 @@ class MSHRInfo(implicit p: Parameters) extends L2Bundle with HasTLChannelBits { val mergeA = Bool() // whether the mshr already merge an acquire(avoid alias merge) val w_grantfirst = Bool() + val w_grantlast = Bool() val s_release = Bool() val s_refill = Bool() val s_cmoresp = Bool() diff --git a/src/main/scala/coupledL2/RequestBuffer.scala b/src/main/scala/coupledL2/RequestBuffer.scala index 2c1dd7f0b..4674924ea 100644 --- a/src/main/scala/coupledL2/RequestBuffer.scala +++ b/src/main/scala/coupledL2/RequestBuffer.scala @@ -153,7 +153,7 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete // incoming Acquire can be merged with late_pf MSHR block val mergeAMask = VecInit(io.mshrInfo.map(s => - s.valid && s.bits.isPrefetch && sameAddr(in, s.bits) && !s.bits.willFree && !s.bits.dirHit && !s.bits.s_refill && + s.valid && s.bits.isPrefetch && sameAddr(in, s.bits) && !s.bits.w_grantlast && in.fromA && (in.opcode === AcquireBlock || in.opcode === AcquirePerm) && !s.bits.mergeA && !(in.param === NtoT && s.bits.param === NtoB) )).asUInt val mergeA = mergeAMask.orR diff --git a/src/main/scala/coupledL2/tl2chi/MSHR.scala b/src/main/scala/coupledL2/tl2chi/MSHR.scala index 7773465e9..b671e6459 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHR.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHR.scala @@ -808,8 +808,8 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { // Add merge grant task for Acquire and late Prefetch mp_grant.mergeA := mergeA || io.aMergeTask.valid val merge_task_r = RegEnable(io.aMergeTask.bits, 0.U.asTypeOf(new TaskBundle), io.aMergeTask.valid) - val merge_task = Mux(io.aMergeTask.valid, io.aMergeTask.bits, merge_task_r) - val merge_task_isKeyword = Mux(io.aMergeTask.valid, io.aMergeTask.bits.isKeyword.getOrElse(false.B), merge_task_r.isKeyword.getOrElse(false.B) ) + val merge_task = merge_task_r + val merge_task_isKeyword = merge_task_r.isKeyword.getOrElse(false.B) mp_grant.aMergeTask.off := merge_task.off mp_grant.aMergeTask.alias.foreach(_ := merge_task.alias.getOrElse(0.U)) @@ -1340,6 +1340,7 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { io.msInfo.bits.param := req.param io.msInfo.bits.mergeA := mergeA io.msInfo.bits.w_grantfirst := state.w_grantfirst + io.msInfo.bits.w_grantlast := state.w_grantlast io.msInfo.bits.s_release := state.s_release io.msInfo.bits.s_refill := state.s_refill io.msInfo.bits.s_cmoresp := state.s_cmoresp diff --git a/src/main/scala/coupledL2/tl2tl/MSHR.scala b/src/main/scala/coupledL2/tl2tl/MSHR.scala index 05c5611bb..179faca9e 100644 --- a/src/main/scala/coupledL2/tl2tl/MSHR.scala +++ b/src/main/scala/coupledL2/tl2tl/MSHR.scala @@ -374,8 +374,8 @@ class MSHR(implicit p: Parameters) extends L2Module { // Add merge grant task for Acquire and late Prefetch mp_grant.mergeA := mergeA || io.aMergeTask.valid val merge_task_r = RegEnable(io.aMergeTask.bits, 0.U.asTypeOf(new TaskBundle), io.aMergeTask.valid) - val merge_task = Mux(io.aMergeTask.valid, io.aMergeTask.bits, merge_task_r) - val merge_task_isKeyword = Mux(io.aMergeTask.valid, io.aMergeTask.bits.isKeyword.getOrElse(false.B), merge_task_r.isKeyword.getOrElse(false.B) ) + val merge_task = merge_task_r + val merge_task_isKeyword = merge_task_r.isKeyword.getOrElse(false.B) mp_grant.aMergeTask.off := merge_task.off mp_grant.aMergeTask.alias.foreach(_ := merge_task.alias.getOrElse(0.U)) @@ -571,6 +571,7 @@ class MSHR(implicit p: Parameters) extends L2Module { io.msInfo.bits.param := req.param io.msInfo.bits.mergeA := mergeA io.msInfo.bits.w_grantfirst := state.w_grantfirst + io.msInfo.bits.w_grantlast := state.w_grantlast io.msInfo.bits.s_refill := state.s_refill io.msInfo.bits.s_release := state.s_release io.msInfo.bits.s_cmoresp := true.B From c48509c9c74927f16f865e84b03f8603138a3b4a Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Thu, 2 Apr 2026 18:23:12 +0800 Subject: [PATCH 13/23] fix(CustomL1Hint): refactor CustomL1Hint for better timing --- src/main/scala/coupledL2/CustomL1Hint.scala | 47 ++++++++++++--------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/src/main/scala/coupledL2/CustomL1Hint.scala b/src/main/scala/coupledL2/CustomL1Hint.scala index 3cd350f31..d19200319 100644 --- a/src/main/scala/coupledL2/CustomL1Hint.scala +++ b/src/main/scala/coupledL2/CustomL1Hint.scala @@ -67,13 +67,13 @@ class CustomL1Hint(implicit p: Parameters) extends L2Module { val mshr_Grant_s1 = task_s1.valid && mshrReq_s1 && (isGrant(task_s1.bits) || isMergeGrant(task_s1.bits)) val chn_Release_s1 = task_s1.valid && !mshrReq_s1 && isRelease(task_s1.bits) - val enqValid_s1 = mshr_GrantData_s1 || mshr_Grant_s1 || chn_Release_s1 - val enqSource_s1 = Mux(task_s1.bits.mergeA, task_s1.bits.aMergeTask.sourceId, task_s1.bits.sourceId) - val enqKeyWord_s1 = Mux(task_s1.bits.mergeA, + val enqBits_s1 = Wire(new HintQueueEntry) + enqBits_s1.source := Mux(task_s1.bits.mergeA, task_s1.bits.aMergeTask.sourceId, task_s1.bits.sourceId) + enqBits_s1.isKeyword := Mux(task_s1.bits.mergeA, task_s1.bits.aMergeTask.isKeyword.getOrElse(false.B), task_s1.bits.isKeyword.getOrElse(false.B) ) - val enqOpcode_s1 = ParallelPriorityMux( + enqBits_s1.opcode := ParallelPriorityMux( Seq( mshr_Grant_s1 -> Grant, mshr_GrantData_s1 -> GrantData, @@ -84,10 +84,11 @@ class CustomL1Hint(implicit p: Parameters) extends L2Module { // Hint for "chnTask Hit" will fire@s3 val chn_Grant_s3 = task_s3.valid && !mshrReq_s3 && !need_mshr_s3 && isGrant(task_s3.bits) val chn_GrantData_s3 = task_s3.valid && !mshrReq_s3 && !need_mshr_s3 && isGrantData(task_s3.bits) + val enqBits_s3 = Wire(new HintQueueEntry) val enqValid_s3 = chn_Grant_s3 || chn_GrantData_s3 - val enqSource_s3 = task_s3.bits.sourceId - val enqKeyWord_s3 = task_s3.bits.isKeyword.getOrElse(false.B) - val enqOpcode_s3 = ParallelPriorityMux( + enqBits_s3.source := task_s3.bits.sourceId + enqBits_s3.isKeyword := task_s3.bits.isKeyword.getOrElse(false.B) + enqBits_s3.opcode := ParallelPriorityMux( Seq( chn_Grant_s3 -> Grant, chn_GrantData_s3 -> GrantData @@ -98,22 +99,26 @@ class CustomL1Hint(implicit p: Parameters) extends L2Module { val hintEntries = mshrsAll val hintEntriesWidth = log2Ceil(hintEntries) val hintQueue = Module(new Queue(new HintQueueEntry, hintEntries)) + val canFlow_s1 = !hintQueue.io.deq.valid || hintQueue.io.count === 1.U && hintQueue.io.deq.fire + val valid_s1 = mshr_GrantData_s1 || mshr_Grant_s1 || chn_Release_s1 + val flow_s1, arbOut_s1, enq_s3 = Wire(Decoupled(new HintQueueEntry)) + // noSpaceForSinkReq in GrantBuffer may ensure that these queues will not overflow + assert(enq_s3.ready || !enq_s3.valid) // this will have at most 2 entries - val hint_s1Queue = Module(new Queue(new HintQueueEntry, 4, flow = true)) - hint_s1Queue.io.enq.valid := enqValid_s1 - hint_s1Queue.io.enq.bits.opcode := enqOpcode_s1 - hint_s1Queue.io.enq.bits.source := enqSource_s1 - hint_s1Queue.io.enq.bits.isKeyword := enqKeyWord_s1 - hint_s1Queue.io.deq.ready := hintQueue.io.enq.ready && !enqValid_s3 - // WARNING:TODO: ensure queue will never overflow - assert(hint_s1Queue.io.enq.ready, "hint_s1Queue should never be full") - assert(hintQueue.io.enq.ready, "hintQueue should never be full") - - hintQueue.io.enq.valid := enqValid_s3 || hint_s1Queue.io.deq.valid - hintQueue.io.enq.bits.opcode := Mux(enqValid_s3, enqOpcode_s3, hint_s1Queue.io.deq.bits.opcode) - hintQueue.io.enq.bits.source := Mux(enqValid_s3, enqSource_s3, hint_s1Queue.io.deq.bits.source) - hintQueue.io.enq.bits.isKeyword := Mux(enqValid_s3, enqKeyWord_s3, hint_s1Queue.io.deq.bits.isKeyword) + val hint_s1Queue = Module(new Pipeline(new HintQueueEntry)) + hint_s1Queue.io.in.valid := valid_s1 && (!canFlow_s1 || !flow_s1.ready) + hint_s1Queue.io.in.bits := enqBits_s1 + assert(!valid_s1 || hint_s1Queue.io.in.ready || flow_s1.ready) + + flow_s1.valid := valid_s1 && canFlow_s1 + flow_s1.bits := enqBits_s1 + + arb(Seq(hint_s1Queue.io.out, flow_s1), arbOut_s1, Some("s1")) + + enq_s3.valid := enqValid_s3 + enq_s3.bits := enqBits_s3 + arb(Seq(enq_s3, arbOut_s1), hintQueue.io.enq, Some("Hint")) hintQueue.io.deq.ready := io.l1Hint.ready io.l1Hint.valid := hintQueue.io.deq.valid && hintQueue.io.deq.bits.opcode === GrantData From d8cc9e8e55c0b27fa75a974073dd520f59a1a535 Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Thu, 16 Apr 2026 16:43:08 +0800 Subject: [PATCH 14/23] fix(ReqBuf, MSHR): fix timing of ReqBuf output and aMerge --- src/main/scala/coupledL2/Common.scala | 1 + src/main/scala/coupledL2/RequestBuffer.scala | 17 ++++++++-------- .../scala/coupledL2/tl2chi/MMIOBridge.scala | 20 +++++++++---------- src/main/scala/coupledL2/tl2chi/MSHR.scala | 10 +++++----- src/main/scala/coupledL2/tl2tl/MSHR.scala | 10 +++++----- 5 files changed, 30 insertions(+), 28 deletions(-) diff --git a/src/main/scala/coupledL2/Common.scala b/src/main/scala/coupledL2/Common.scala index 8025d1fe8..d86e61ff0 100644 --- a/src/main/scala/coupledL2/Common.scala +++ b/src/main/scala/coupledL2/Common.scala @@ -263,6 +263,7 @@ class MSHRInfo(implicit p: Parameters) extends L2Bundle with HasTLChannelBits { val w_grantfirst = Bool() val w_grantlast = Bool() + val w_grant = Bool() val s_release = Bool() val s_refill = Bool() val s_cmoresp = Bool() diff --git a/src/main/scala/coupledL2/RequestBuffer.scala b/src/main/scala/coupledL2/RequestBuffer.scala index 4674924ea..8e5d46dfc 100644 --- a/src/main/scala/coupledL2/RequestBuffer.scala +++ b/src/main/scala/coupledL2/RequestBuffer.scala @@ -152,10 +152,11 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete // dontTouch(mshrConflictMaskFromA) // incoming Acquire can be merged with late_pf MSHR block - val mergeAMask = VecInit(io.mshrInfo.map(s => - s.valid && s.bits.isPrefetch && sameAddr(in, s.bits) && !s.bits.w_grantlast && + val mergeAMask = VecInit(io.mshrInfo.map { case s => + val mshrInflight = !(s.bits.w_grantlast && s.bits.w_grant) + s.valid && s.bits.isPrefetch && sameAddr(in, s.bits) && !s.bits.dirHit && mshrInflight && in.fromA && (in.opcode === AcquireBlock || in.opcode === AcquirePerm) && !s.bits.mergeA && !(in.param === NtoT && s.bits.param === NtoB) - )).asUInt + }).asUInt val mergeA = mergeAMask.orR val mergeAId = OHToUInt(mergeAMask) io.aMergeTask.valid := io.in.valid && mergeA @@ -233,10 +234,7 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete /* ======== Issue ======== */ issueArb.io.in zip buffer foreach { case(in, e) => - // when io.out.valid, we temporarily stall all entries of the same set - val pipeBlockOut = io.out.valid && sameSet(e.task, io.out.bits) - - in.valid := e.valid && e.rdy && !pipeBlockOut + in.valid := e.valid && e.rdy in.bits := e } @@ -299,7 +297,10 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete chosenQ.io.deq.ready := io.out.ready || cancel io.out.valid := chosenQValid && !cancel || io.in.valid && canFlow - io.out.bits := Mux(canFlow, io.in.bits, chosenQ.io.deq.bits.bits.task) + io.out.bits := { + if (!flow) chosenQ.io.deq.bits.bits.task + else Mux(chosenQValid, chosenQ.io.deq.bits.bits.task, io.in.bits) + } when(chosenQ.io.deq.fire && !cancel) { buffer(chosenQ.io.deq.bits.id).valid := false.B diff --git a/src/main/scala/coupledL2/tl2chi/MMIOBridge.scala b/src/main/scala/coupledL2/tl2chi/MMIOBridge.scala index 410d000cc..f965d08d6 100644 --- a/src/main/scala/coupledL2/tl2chi/MMIOBridge.scala +++ b/src/main/scala/coupledL2/tl2chi/MMIOBridge.scala @@ -89,6 +89,7 @@ class MMIOBridgeEntry(edge: TLEdgeIn)(implicit p: Parameters) extends TL2CHIL2Mo val id = Input(UInt()) val pCrd = new PCrdQueryBundle val waitOnReadReceipt = Option.when(needRR)(Output(Bool())) + val isRead = Output(Bool()) // only for better timing }) val s_txreq = RegInit(true.B) @@ -115,7 +116,7 @@ class MMIOBridgeEntry(edge: TLEdgeIn)(implicit p: Parameters) extends TL2CHIL2Mo val denied = Reg(Bool()) val corrupt = Reg(Bool()) val traceTag = Reg(Bool()) - val isRead = req.opcode === Get + val isRead = RegEnable(io.req.bits.opcode === Get, false.B, io.req.fire) val isBackTypeMM = req.user.lift(MemBackTypeMM).getOrElse(false.B) val isPageTypeNC = req.user.lift(MemPageTypeNC).getOrElse(false.B) @@ -228,11 +229,7 @@ class MMIOBridgeEntry(edge: TLEdgeIn)(implicit p: Parameters) extends TL2CHIL2Mo txreq.bits.qos := Fill(QOS_WIDTH, 1.U(1.W)) - 1.U txreq.bits.tgtID := SAM(sam).lookup(txreq.bits.addr) txreq.bits.txnID := io.id - txreq.bits.opcode := ParallelLookUp(req.opcode, Seq( - Get -> ReadNoSnp, - PutFullData -> WriteNoSnpPtl, - PutPartialData -> WriteNoSnpPtl - )) + txreq.bits.opcode := Mux(isRead, ReadNoSnp, WriteNoSnpPtl) txreq.bits.size := req.size txreq.bits.addr := req.address txreq.bits.ns := enableNS.B @@ -325,6 +322,7 @@ class MMIOBridgeEntry(edge: TLEdgeIn)(implicit p: Parameters) extends TL2CHIL2Mo io.pCrd.query.bits.srcID := srcID io.waitOnReadReceipt.foreach(_ := !w_readreceipt.get && s_txreq) + io.isRead := isRead /** * performance counters @@ -384,12 +382,14 @@ class MMIOBridgeImp(outer: MMIOBridge) extends LazyModuleImp(outer) } val txreqArb = Module(new RRArbiterInit(chiselTypeOf(io.tx.req.bits), mmioBridgeSize)) - for ((a, req) <- txreqArb.io.in.zip(entries.map(_.io.chi.tx.req))) { - a <> req - val isReadNoSnp = req.bits.opcode === ReadNoSnp - val block = isReadNoSnp && waitOnReadReceipt + for ((a, entry) <- txreqArb.io.in.zip(entries)) { + val req = entry.io.chi.tx.req + val isRead = entry.io.isRead + val block = isRead && waitOnReadReceipt + assert(!req.valid || !isRead || req.bits.opcode === ReadNoSnp) req.ready := a.ready && !block a.valid := req.valid && !block + a.bits := req.bits } io.tx.req <> txreqArb.io.out // arb(entries.map(_.io.chi.tx.req), io.tx.req, Some("mmio_txreq")) diff --git a/src/main/scala/coupledL2/tl2chi/MSHR.scala b/src/main/scala/coupledL2/tl2chi/MSHR.scala index b671e6459..b536c1564 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHR.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHR.scala @@ -806,15 +806,14 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { mp_grant.reqSource := 0.U(MemReqSource.reqSourceBits.W) // Add merge grant task for Acquire and late Prefetch - mp_grant.mergeA := mergeA || io.aMergeTask.valid - val merge_task_r = RegEnable(io.aMergeTask.bits, 0.U.asTypeOf(new TaskBundle), io.aMergeTask.valid) - val merge_task = merge_task_r - val merge_task_isKeyword = merge_task_r.isKeyword.getOrElse(false.B) + mp_grant.mergeA := mergeA + + val merge_task = RegEnable(io.aMergeTask.bits, 0.U.asTypeOf(new TaskBundle), io.aMergeTask.valid) mp_grant.aMergeTask.off := merge_task.off mp_grant.aMergeTask.alias.foreach(_ := merge_task.alias.getOrElse(0.U)) mp_grant.aMergeTask.vaddr.foreach(_ := merge_task.vaddr.getOrElse(0.U)) - mp_grant.aMergeTask.isKeyword.foreach(_ := merge_task_isKeyword) + mp_grant.aMergeTask.isKeyword.foreach(_ := merge_task.isKeyword.getOrElse(false.B)) mp_grant.aMergeTask.opcode := odOpGen(merge_task.opcode) mp_grant.aMergeTask.param := MuxLookup( // Acquire -> Grant merge_task.param, @@ -1341,6 +1340,7 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { io.msInfo.bits.mergeA := mergeA io.msInfo.bits.w_grantfirst := state.w_grantfirst io.msInfo.bits.w_grantlast := state.w_grantlast + io.msInfo.bits.w_grant := state.w_grant io.msInfo.bits.s_release := state.s_release io.msInfo.bits.s_refill := state.s_refill io.msInfo.bits.s_cmoresp := state.s_cmoresp diff --git a/src/main/scala/coupledL2/tl2tl/MSHR.scala b/src/main/scala/coupledL2/tl2tl/MSHR.scala index 179faca9e..902d1f403 100644 --- a/src/main/scala/coupledL2/tl2tl/MSHR.scala +++ b/src/main/scala/coupledL2/tl2tl/MSHR.scala @@ -372,15 +372,14 @@ class MSHR(implicit p: Parameters) extends L2Module { mp_grant.corrupt := corrupt // Add merge grant task for Acquire and late Prefetch - mp_grant.mergeA := mergeA || io.aMergeTask.valid - val merge_task_r = RegEnable(io.aMergeTask.bits, 0.U.asTypeOf(new TaskBundle), io.aMergeTask.valid) - val merge_task = merge_task_r - val merge_task_isKeyword = merge_task_r.isKeyword.getOrElse(false.B) + mp_grant.mergeA := mergeA + + val merge_task = RegEnable(io.aMergeTask.bits, 0.U.asTypeOf(new TaskBundle), io.aMergeTask.valid) mp_grant.aMergeTask.off := merge_task.off mp_grant.aMergeTask.alias.foreach(_ := merge_task.alias.getOrElse(0.U)) mp_grant.aMergeTask.vaddr.foreach(_ := merge_task.vaddr.getOrElse(0.U)) - mp_grant.aMergeTask.isKeyword.foreach(_ := merge_task_isKeyword) + mp_grant.aMergeTask.isKeyword.foreach(_ := merge_task.isKeyword.getOrElse(false.B)) mp_grant.aMergeTask.opcode := odOpGen(merge_task.opcode) mp_grant.aMergeTask.param := MuxLookup( // Acquire -> Grant merge_task.param, @@ -572,6 +571,7 @@ class MSHR(implicit p: Parameters) extends L2Module { io.msInfo.bits.mergeA := mergeA io.msInfo.bits.w_grantfirst := state.w_grantfirst io.msInfo.bits.w_grantlast := state.w_grantlast + io.msInfo.bits.w_grant := state.w_grant io.msInfo.bits.s_refill := state.s_refill io.msInfo.bits.s_release := state.s_release io.msInfo.bits.s_cmoresp := true.B From 33d5b15f11d283f5f0638077b001838314013be2 Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Thu, 16 Apr 2026 12:14:02 +0800 Subject: [PATCH 15/23] fix(CustomL1Hint): refactor CustomL1Hint again --- src/main/scala/coupledL2/CustomL1Hint.scala | 69 ++++++++----------- src/main/scala/coupledL2/RequestArb.scala | 9 ++- .../scala/coupledL2/tl2chi/MainPipe.scala | 7 +- src/main/scala/coupledL2/tl2chi/Slice.scala | 3 +- src/main/scala/coupledL2/tl2tl/MainPipe.scala | 8 ++- src/main/scala/coupledL2/tl2tl/Slice.scala | 3 +- 6 files changed, 48 insertions(+), 51 deletions(-) diff --git a/src/main/scala/coupledL2/CustomL1Hint.scala b/src/main/scala/coupledL2/CustomL1Hint.scala index d19200319..11d27e812 100644 --- a/src/main/scala/coupledL2/CustomL1Hint.scala +++ b/src/main/scala/coupledL2/CustomL1Hint.scala @@ -26,13 +26,14 @@ import coupledL2.utils._ class HintQueueEntry(implicit p: Parameters) extends L2Bundle { val source = UInt(sourceIdBits.W) - val opcode = UInt(3.W) + val isGrantData = Bool() val isKeyword = Bool() } class CustomL1HintIOBundle(implicit p: Parameters) extends L2Bundle { // input information - val s1 = Flipped(ValidIO(new TaskBundle())) + val mshrHintQInfo = Flipped(ValidIO(new TaskBundle())) + val sinkCHintQInfo = Flipped(ValidIO(new TaskBundle())) val s3 = new L2Bundle { val task = Flipped(ValidIO(new TaskBundle())) val need_mshr = Input(Bool()) @@ -47,53 +48,43 @@ class CustomL1HintIOBundle(implicit p: Parameters) extends L2Bundle { class CustomL1Hint(implicit p: Parameters) extends L2Module { val io = IO(new CustomL1HintIOBundle) - val task_s1 = io.s1 + val mshr_s1 = io.mshrHintQInfo.bits + val mshrMerge_s1 = mshr_s1.aMergeTask + val sinkC_s1 = io.sinkCHintQInfo.bits val task_s3 = io.s3.task - val mshrReq_s1 = task_s1.bits.mshrTask val mshrReq_s3 = task_s3.bits.mshrTask - val mergeA_s1 = task_s1.bits.mergeA val need_mshr_s3 = io.s3.need_mshr - def isGrantData(t: TaskBundle): Bool = t.fromA && t.opcode === GrantData - def isGrant(t: TaskBundle): Bool = t.fromA && t.opcode === Grant - def isHintAck(t: TaskBundle): Bool = t.fromA && t.opcode === HintAck // HintAck has no effect on Hint - def isRelease(t: TaskBundle): Bool = t.fromC && (t.opcode === Release || t.opcode === ReleaseData) - def isMergeGrantData(t: TaskBundle): Bool = t.fromA && t.mergeA && t.aMergeTask.opcode === GrantData - def isMergeGrant(t: TaskBundle): Bool = t.fromA && t.mergeA && t.aMergeTask.opcode === Grant - // ==================== Hint Generation ==================== // Hint for "MSHRTask and ReleaseAck" will fire@s1 - val mshr_GrantData_s1 = task_s1.valid && mshrReq_s1 && (isGrantData(task_s1.bits) || isMergeGrantData(task_s1.bits)) - val mshr_Grant_s1 = task_s1.valid && mshrReq_s1 && (isGrant(task_s1.bits) || isMergeGrant(task_s1.bits)) - val chn_Release_s1 = task_s1.valid && !mshrReq_s1 && isRelease(task_s1.bits) + // val mshr_GrantData_s1 = task_s1.valid && mshrReq_s1 && (isGrantData(task_s1.bits) || isMergeGrantData(task_s1.bits)) + // val mshr_Grant_s1 = task_s1.valid && mshrReq_s1 && (isGrant(task_s1.bits) || isMergeGrant(task_s1.bits)) + // val chn_Release_s1 = task_s1.valid && !mshrReq_s1 && isRelease(task_s1.bits) + val mshr_GrantData_s1 = io.mshrHintQInfo.valid && (mshr_s1.fromA && (mshr_s1.opcode === GrantData || (mshr_s1.mergeA && mshrMerge_s1.opcode === GrantData))) + val mshr_Grant_s1 = io.mshrHintQInfo.valid && (mshr_s1.fromA && (mshr_s1.opcode === Grant || (mshr_s1.mergeA && mshrMerge_s1.opcode === Grant))) + val chn_Release_s1 = io.sinkCHintQInfo.valid + assert(Mux(chn_Release_s1, sinkC_s1.fromC, true.B)) + assert(Mux(chn_Release_s1, sinkC_s1.opcode === Release || sinkC_s1.opcode === ReleaseData, true.B)) val enqBits_s1 = Wire(new HintQueueEntry) - enqBits_s1.source := Mux(task_s1.bits.mergeA, task_s1.bits.aMergeTask.sourceId, task_s1.bits.sourceId) - enqBits_s1.isKeyword := Mux(task_s1.bits.mergeA, - task_s1.bits.aMergeTask.isKeyword.getOrElse(false.B), - task_s1.bits.isKeyword.getOrElse(false.B) - ) - enqBits_s1.opcode := ParallelPriorityMux( - Seq( - mshr_Grant_s1 -> Grant, - mshr_GrantData_s1 -> GrantData, - chn_Release_s1 -> ReleaseAck - ) - ) + // enqBits_s1.source := Mux(task_s1.bits.mergeA, task_s1.bits.aMergeTask.sourceId, task_s1.bits.sourceId) + enqBits_s1.source := Mux1H(Seq( + (io.mshrHintQInfo.valid && mshr_s1.mergeA) -> mshrMerge_s1.sourceId, + (io.mshrHintQInfo.valid && !mshr_s1.mergeA) -> mshr_s1.sourceId, + io.sinkCHintQInfo.valid -> sinkC_s1.sourceId + )) + OneHot.checkOneHot(Cat(io.mshrHintQInfo.valid && mshr_s1.mergeA, io.mshrHintQInfo.valid && !mshr_s1.mergeA, io.sinkCHintQInfo.valid)) + enqBits_s1.isKeyword := Mux(mshr_s1.mergeA, mshrMerge_s1.isKeyword.getOrElse(false.B), mshr_s1.isKeyword.getOrElse(false.B)) + enqBits_s1.isGrantData := mshr_GrantData_s1 // Hint for "chnTask Hit" will fire@s3 - val chn_Grant_s3 = task_s3.valid && !mshrReq_s3 && !need_mshr_s3 && isGrant(task_s3.bits) - val chn_GrantData_s3 = task_s3.valid && !mshrReq_s3 && !need_mshr_s3 && isGrantData(task_s3.bits) + val chn_Grant_s3 = task_s3.valid && !mshrReq_s3 && !need_mshr_s3 && task_s3.bits.fromA && task_s3.bits.opcode === Grant + val chn_GrantData_s3 = task_s3.valid && !mshrReq_s3 && !need_mshr_s3 && task_s3.bits.fromA && task_s3.bits.opcode === GrantData val enqBits_s3 = Wire(new HintQueueEntry) val enqValid_s3 = chn_Grant_s3 || chn_GrantData_s3 enqBits_s3.source := task_s3.bits.sourceId enqBits_s3.isKeyword := task_s3.bits.isKeyword.getOrElse(false.B) - enqBits_s3.opcode := ParallelPriorityMux( - Seq( - chn_Grant_s3 -> Grant, - chn_GrantData_s3 -> GrantData - ) - ) + enqBits_s3.isGrantData := chn_GrantData_s3 // ==================== Hint Queue ==================== val hintEntries = mshrsAll @@ -101,7 +92,7 @@ class CustomL1Hint(implicit p: Parameters) extends L2Module { val hintQueue = Module(new Queue(new HintQueueEntry, hintEntries)) val canFlow_s1 = !hintQueue.io.deq.valid || hintQueue.io.count === 1.U && hintQueue.io.deq.fire val valid_s1 = mshr_GrantData_s1 || mshr_Grant_s1 || chn_Release_s1 - val flow_s1, arbOut_s1, enq_s3 = Wire(Decoupled(new HintQueueEntry)) + val flow_s1, enq_s3 = Wire(Decoupled(new HintQueueEntry)) // noSpaceForSinkReq in GrantBuffer may ensure that these queues will not overflow assert(enq_s3.ready || !enq_s3.valid) @@ -114,14 +105,12 @@ class CustomL1Hint(implicit p: Parameters) extends L2Module { flow_s1.valid := valid_s1 && canFlow_s1 flow_s1.bits := enqBits_s1 - arb(Seq(hint_s1Queue.io.out, flow_s1), arbOut_s1, Some("s1")) - enq_s3.valid := enqValid_s3 enq_s3.bits := enqBits_s3 - arb(Seq(enq_s3, arbOut_s1), hintQueue.io.enq, Some("Hint")) + fastArb(Seq(enq_s3, hint_s1Queue.io.out, flow_s1), hintQueue.io.enq, Some("Hint")) hintQueue.io.deq.ready := io.l1Hint.ready - io.l1Hint.valid := hintQueue.io.deq.valid && hintQueue.io.deq.bits.opcode === GrantData + io.l1Hint.valid := hintQueue.io.deq.valid && hintQueue.io.deq.bits.isGrantData io.l1Hint.bits.sourceId := hintQueue.io.deq.bits.source io.l1Hint.bits.isKeyword := hintQueue.io.deq.bits.isKeyword } \ No newline at end of file diff --git a/src/main/scala/coupledL2/RequestArb.scala b/src/main/scala/coupledL2/RequestArb.scala index 6e7c05628..9156a52c0 100644 --- a/src/main/scala/coupledL2/RequestArb.scala +++ b/src/main/scala/coupledL2/RequestArb.scala @@ -48,7 +48,8 @@ class RequestArb(implicit p: Parameters) extends L2Module /* send task to mainpipe */ val taskToPipe_s2 = ValidIO(new TaskBundle()) /* send s1 task info to mainpipe to help hint */ - val taskInfo_s1 = ValidIO(new TaskBundle()) + val mshrHintQInfo = ValidIO(new TaskBundle) + val sinkCHintQInfo = ValidIO(new TaskBundle) /* send mshrBuf read request */ val refillBufRead_s2 = ValidIO(new MSHRBufRead) @@ -168,8 +169,10 @@ class RequestArb(implicit p: Parameters) extends L2Module s1_cango := task_s1.valid && !mshr_replRead_stall s1_fire := s1_cango && s2_ready - io.taskInfo_s1.valid := s1_fire - io.taskInfo_s1.bits := task_s1.bits + io.mshrHintQInfo.valid := mshr_task_s1.valid && !mshr_replRead_stall && s2_ready + io.mshrHintQInfo.bits := mshr_task_s1.bits + io.sinkCHintQInfo.valid := io.sinkC.fire + io.sinkCHintQInfo.bits := io.sinkC.bits /* Meta read request */ // ^ only sinkA/B/C tasks need to read directory diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index 03098a843..20bdc3b4a 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -34,7 +34,8 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes /* receive task from arbiter at stage 2 */ val taskFromArb_s2 = Flipped(ValidIO(new TaskBundle())) /* status from arbiter at stage1 */ - val taskInfo_s1 = Flipped(ValidIO(new TaskBundle())) + val mshrHintQInfo = Flipped(ValidIO(new TaskBundle)) + val sinkCHintQInfo = Flipped(ValidIO(new TaskBundle)) /* handle set conflict in req arb */ val fromReqArb = Input(new Bundle() { @@ -820,8 +821,8 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes // TODO: check this val customL1Hint = Module(new CustomL1Hint) - customL1Hint.io.s1 := io.taskInfo_s1 - // customL1Hint.io.s2 := task_s2 + customL1Hint.io.mshrHintQInfo := io.mshrHintQInfo + customL1Hint.io.sinkCHintQInfo := io.sinkCHintQInfo customL1Hint.io.s3.task := task_s3 // overwrite opcode: if sinkReq can respond, use sink_resp_s3.bits.opcode = Grant/GrantData diff --git a/src/main/scala/coupledL2/tl2chi/Slice.scala b/src/main/scala/coupledL2/tl2chi/Slice.scala index 4f59ca3db..d006e71bf 100644 --- a/src/main/scala/coupledL2/tl2chi/Slice.scala +++ b/src/main/scala/coupledL2/tl2chi/Slice.scala @@ -111,7 +111,8 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] reqBuf.io.taskFromArb_s2 := reqArb.io.taskToPipe_s2 mainPipe.io.taskFromArb_s2 := reqArb.io.taskToPipe_s2 - mainPipe.io.taskInfo_s1 := reqArb.io.taskInfo_s1 + mainPipe.io.mshrHintQInfo := reqArb.io.mshrHintQInfo + mainPipe.io.sinkCHintQInfo := reqArb.io.sinkCHintQInfo mainPipe.io.fromReqArb.status_s1 := reqArb.io.status_s1 mainPipe.io.bufResp := sinkC.io.bufResp mainPipe.io.dirResp_s3 := directory.io.resp.bits diff --git a/src/main/scala/coupledL2/tl2tl/MainPipe.scala b/src/main/scala/coupledL2/tl2tl/MainPipe.scala index c1988309b..caaea8b20 100644 --- a/src/main/scala/coupledL2/tl2tl/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2tl/MainPipe.scala @@ -34,6 +34,9 @@ class MainPipe(implicit p: Parameters) extends L2Module with HasPerfEvents { val io = IO(new Bundle() { /* receive task from arbiter at stage 2 */ val taskFromArb_s2 = Flipped(ValidIO(new TaskBundle())) + /* receive s1 info for Hint */ + val mshrHintQInfo = Flipped(ValidIO(new TaskBundle)) + val sinkCHintQInfo = Flipped(ValidIO(new TaskBundle)) /* handle set conflict in req arb */ val fromReqArb = Input(new Bundle() { @@ -97,8 +100,6 @@ class MainPipe(implicit p: Parameters) extends L2Module with HasPerfEvents { /* send Hint to L1 */ val l1Hint = DecoupledIO(new L2ToL1Hint()) - /* receive s1 info for Hint */ - val taskInfo_s1 = Flipped(ValidIO(new TaskBundle())) /* send prefetchTrain to Prefetch to trigger a prefetch req */ val prefetchTrain = prefetchOpt.map(_ => DecoupledIO(new PrefetchTrain)) @@ -544,7 +545,8 @@ class MainPipe(implicit p: Parameters) extends L2Module with HasPerfEvents { val customL1Hint = Module(new CustomL1Hint) - customL1Hint.io.s1 := io.taskInfo_s1 + customL1Hint.io.mshrHintQInfo := io.mshrHintQInfo + customL1Hint.io.sinkCHintQInfo := io.sinkCHintQInfo customL1Hint.io.s3.task := task_s3 // overwrite opcode: if sinkReq can respond, use sink_resp_s3.bits.opcode = Grant/GrantData diff --git a/src/main/scala/coupledL2/tl2tl/Slice.scala b/src/main/scala/coupledL2/tl2tl/Slice.scala index 88cf00a34..7f5356b03 100644 --- a/src/main/scala/coupledL2/tl2tl/Slice.scala +++ b/src/main/scala/coupledL2/tl2tl/Slice.scala @@ -108,7 +108,8 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] { mainPipe.io.releaseBufResp_s3.valid := RegNext(releaseBuf.io.r.valid, false.B) mainPipe.io.releaseBufResp_s3.bits := releaseBuf.io.resp.data mainPipe.io.fromReqArb.status_s1 := reqArb.io.status_s1 - mainPipe.io.taskInfo_s1 <> reqArb.io.taskInfo_s1 + mainPipe.io.mshrHintQInfo := reqArb.io.mshrHintQInfo + mainPipe.io.sinkCHintQInfo := reqArb.io.sinkCHintQInfo // priority: nested-ReleaseData / probeAckData [NEW] > mainPipe DS rdata [OLD] // 0/1 might happen at the same cycle with 2 From 9335658982a691650b4e632a8fb07411e712b4a0 Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Fri, 3 Apr 2026 20:22:40 +0800 Subject: [PATCH 16/23] fix(ManPipe): fix cmo logic timing Current cmo logic will use result from replace logic by using dirReault.meta. So decouple replace logic and cmo logic. --- src/main/scala/coupledL2/Directory.scala | 6 +++++- src/main/scala/coupledL2/RequestArb.scala | 4 ++-- src/main/scala/coupledL2/tl2chi/MainPipe.scala | 9 +++++---- src/main/scala/coupledL2/tl2chi/Slice.scala | 1 + 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/main/scala/coupledL2/Directory.scala b/src/main/scala/coupledL2/Directory.scala index 85aac89d8..7f058b241 100644 --- a/src/main/scala/coupledL2/Directory.scala +++ b/src/main/scala/coupledL2/Directory.scala @@ -140,6 +140,7 @@ class Directory(implicit p: Parameters) extends L2Module { val errOnSnp = Bool() val wayOH = Output(UInt(cacheParams.ways.W)) val replWayOH = Output(UInt(cacheParams.ways.W)) + val cmoHitInvalid = Output(Bool()) }) def invalid_way_sel(metaVec: Seq[MetaEntry]) = { @@ -209,6 +210,7 @@ class Directory(implicit p: Parameters) extends L2Module { val reqValid_s3 = RegNext(reqValid_s2, false.B) val req_s2 = RegEnable(io.read.bits, 0.U.asTypeOf(io.read.bits), io.read.fire) val req_s3 = RegEnable(req_s2, 0.U.asTypeOf(req_s2), reqValid_s2) + val cmoWayOH_s3 = RegEnable(UIntToOH(req_s2.cmoWay), reqValid_s2) val refillReqValid_s2 = RegNext(io.read.fire && io.read.bits.refill, false.B) val refillReqValid_s3 = RegNext(refillReqValid_s2, false.B) @@ -293,7 +295,7 @@ class Directory(implicit p: Parameters) extends L2Module { MaskToOH(freeWayMask_s3) ) val hit_s3 = Cat(hitVec).orR || req_s3.cmoAll - val wayOH_s3 = Mux(req_s3.cmoAll, UIntToOH(req_s3.cmoWay), Mux(hit_s3, hitOH, finalReplOH)) + val wayOH_s3 = Mux(req_s3.cmoAll, cmoWayOH_s3, Mux(hit_s3, hitOH, finalReplOH)) val way_s3 = OHToUInt(wayOH_s3) val meta_s3 = Mux1H(wayOH_s3, metaAll_s3) val metaOnHit_s3 = Mux1H(hitOH, metaAll_s3) // only valid when hit @@ -432,6 +434,8 @@ class Directory(implicit p: Parameters) extends L2Module { replacer_sram_opt.get.io.w(replacerWen, next_state_s3, set_s3, 1.U) } + io.cmoHitInvalid := Mux1H(cmoWayOH_s3, metaAll_s3).state === MetaData.INVALID + /* ====== Reset ====== */ XSPerfAccumulate("dirRead_cnt", io.read.fire) diff --git a/src/main/scala/coupledL2/RequestArb.scala b/src/main/scala/coupledL2/RequestArb.scala index 9156a52c0..fa166d8ba 100644 --- a/src/main/scala/coupledL2/RequestArb.scala +++ b/src/main/scala/coupledL2/RequestArb.scala @@ -188,8 +188,8 @@ class RequestArb(implicit p: Parameters) extends L2Module io.dirRead_s1.bits.replacerInfo.refill_prefetch := s1_needs_replRead && (mshr_task_s1.bits.opcode === HintAck && mshr_task_s1.bits.dsWen) io.dirRead_s1.bits.refill := s1_needs_replRead io.dirRead_s1.bits.mshrId := task_s1.bits.mshrId - io.dirRead_s1.bits.cmoAll := task_s1.bits.cmoAll - io.dirRead_s1.bits.cmoWay := task_s1.bits.way + io.dirRead_s1.bits.cmoAll := A_task.cmoAll + io.dirRead_s1.bits.cmoWay := A_task.way // block same-set A req io.s1Entrance.valid := mshr_task_s1.valid && s2_ready && mshr_task_s1.bits.metaWen || io.sinkC.fire || io.sinkB.fire diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index 20bdc3b4a..1fe722506 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -58,6 +58,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val errOnSnp_s3 = Input(Bool()) val dirWayOH_s3 = Input(UInt(cacheParams.ways.W)) val dirReplWayOH_s3 = Input(UInt(cacheParams.ways.W)) + val cmoHitInvalid = Input(Bool()) val replResp = Flipped(ValidIO(new ReplacerResult())) /* send task to MSHRCtl at stage 3 */ @@ -157,7 +158,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val meta_s3 = dirResult_s3.meta val metaOnHit_s3 = io.metaOnHit_s3 val req_s3 = task_s3.bits - val cmoHitInvalid = io.cmoAllBlock.getOrElse(false.B) && (meta_s3.state === INVALID) + val cmoHitInvalid = io.cmoAllBlock.getOrElse(false.B) && io.cmoHitInvalid val mshr_req_s3 = req_s3.mshrTask val sink_req_s3 = !mshr_req_s3 @@ -642,18 +643,18 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val isTXRSP_s3 = Mux( mshr_req_s3, mshr_snpRespX_s3, - req_s3.fromB && !need_mshr_s3 && !hasData_s3 + req_s3.fromB && !need_mshr_s3_b && !hasData_s3 ) val isTXDAT_s3 = Mux( mshr_req_s3, mshr_snpRespDataX_s3 || mshr_cbWrData_s3 || mshr_dct_s3, - req_s3.fromB && !need_mshr_s3 && + req_s3.fromB && !need_mshr_s3_b && (doRespData && (!data_unready_s3 || req_s3.snpHitRelease && req_s3.snpHitReleaseWithData)) ) val isTXDAT_s3_ready = Mux( mshr_req_s3, mshr_snpRespDataX_s3 || mshr_cbWrData_s3 || mshr_dct_s3, - req_s3.fromB && !need_mshr_s3 && !txdat_s3_latch.B && + req_s3.fromB && !need_mshr_s3_b && !txdat_s3_latch.B && (doRespData && (!data_unready_s3 || req_s3.snpHitRelease && req_s3.snpHitReleaseWithData)) ) val isTXREQ_s3 = mshr_req_s3 && (mshr_writeBackFull_s3 || mshr_writeCleanFull_s3 || diff --git a/src/main/scala/coupledL2/tl2chi/Slice.scala b/src/main/scala/coupledL2/tl2chi/Slice.scala index d006e71bf..8040204fa 100644 --- a/src/main/scala/coupledL2/tl2chi/Slice.scala +++ b/src/main/scala/coupledL2/tl2chi/Slice.scala @@ -121,6 +121,7 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] mainPipe.io.errOnSnp_s3 := directory.io.errOnSnp mainPipe.io.dirWayOH_s3 := directory.io.wayOH mainPipe.io.dirReplWayOH_s3 := directory.io.replWayOH + mainPipe.io.cmoHitInvalid := directory.io.cmoHitInvalid mainPipe.io.fromMSHRCtl <> mshrCtl.io.toMainPipe mainPipe.io.bufResp := sinkC.io.bufResp mainPipe.io.refillBufResp_s3.valid := RegNext(refillBuf.io.r.valid, false.B) From 7600db1c66799d3497e3dd121ab81ef579873efd Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Tue, 7 Apr 2026 22:23:43 +0800 Subject: [PATCH 17/23] perf(Arbiter): perf input conflict of Arbiters --- src/main/scala/coupledL2/CoupledL2.scala | 4 ++- src/main/scala/coupledL2/CustomL1Hint.scala | 2 +- src/main/scala/coupledL2/RequestBuffer.scala | 1 + src/main/scala/coupledL2/SinkC.scala | 2 ++ src/main/scala/coupledL2/SourceB.scala | 1 + .../prefetch/BestOffsetPrefetch.scala | 3 ++ .../scala/coupledL2/tl2chi/MMIOBridge.scala | 2 ++ .../coupledL2/tl2chi/TL2CHICoupledL2.scala | 2 ++ src/main/scala/coupledL2/utils/Arb.scala | 36 +++++++++++++++++++ 9 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 src/main/scala/coupledL2/utils/Arb.scala diff --git a/src/main/scala/coupledL2/CoupledL2.scala b/src/main/scala/coupledL2/CoupledL2.scala index 6a8571967..b536ea51c 100644 --- a/src/main/scala/coupledL2/CoupledL2.scala +++ b/src/main/scala/coupledL2/CoupledL2.scala @@ -34,6 +34,7 @@ import coupledL2.prefetch._ import huancun.{BankBitsKey, TPmetaReq, TPmetaResp} import utility.mbist.{MbistInterface, MbistPipeline} import utility.sram.{SramBroadcastBundle, SramHelper} +import coupledL2.utils.ArbPerf trait HasCoupledL2Parameters { val p: Parameters @@ -218,11 +219,12 @@ trait HasCoupledL2Parameters { out <> arb.io.out } - def fastArb[T <: Bundle](in: Seq[DecoupledIO[T]], out: DecoupledIO[T], name: Option[String] = None): Unit = { + def fastArb[T <: Bundle](in: Seq[DecoupledIO[T]], out: DecoupledIO[T], name: Option[String] = None)(implicit p: Parameters): Unit = { val arb = Module(new FastArbiter[T](chiselTypeOf(out.bits), in.size)) if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } for ((a, req) <- arb.io.in.zip(in)) { a <> req } out <> arb.io.out + ArbPerf(arb, name.getOrElse("fastArb")) } def odOpGen(r: UInt) = { diff --git a/src/main/scala/coupledL2/CustomL1Hint.scala b/src/main/scala/coupledL2/CustomL1Hint.scala index 11d27e812..f9e282977 100644 --- a/src/main/scala/coupledL2/CustomL1Hint.scala +++ b/src/main/scala/coupledL2/CustomL1Hint.scala @@ -107,7 +107,7 @@ class CustomL1Hint(implicit p: Parameters) extends L2Module { enq_s3.valid := enqValid_s3 enq_s3.bits := enqBits_s3 - fastArb(Seq(enq_s3, hint_s1Queue.io.out, flow_s1), hintQueue.io.enq, Some("Hint")) + arb(Seq(enq_s3, hint_s1Queue.io.out, flow_s1), hintQueue.io.enq, Some("Hint")) hintQueue.io.deq.ready := io.l1Hint.ready io.l1Hint.valid := hintQueue.io.deq.valid && hintQueue.io.deq.bits.isGrantData diff --git a/src/main/scala/coupledL2/RequestBuffer.scala b/src/main/scala/coupledL2/RequestBuffer.scala index 8e5d46dfc..b9901b4fc 100644 --- a/src/main/scala/coupledL2/RequestBuffer.scala +++ b/src/main/scala/coupledL2/RequestBuffer.scala @@ -99,6 +99,7 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete val buffer = RegInit(VecInit(Seq.fill(entries)(0.U.asTypeOf(new ReqEntry)))) val issueArb = Module(new FastArbiter(new ReqEntry, entries)) + ArbPerf(issueArb, "issueArb") val chosenQ = Module(new Queue(new ChosenQBundle(log2Ceil(entries)), entries = 1, pipe = true, flow = false)) val chosenQValid = chosenQ.io.deq.valid diff --git a/src/main/scala/coupledL2/SinkC.scala b/src/main/scala/coupledL2/SinkC.scala index 81961860b..18b8f93ed 100644 --- a/src/main/scala/coupledL2/SinkC.scala +++ b/src/main/scala/coupledL2/SinkC.scala @@ -23,6 +23,7 @@ import freechips.rocketchip.tilelink._ import freechips.rocketchip.tilelink.TLMessages._ import org.chipsalliance.cde.config.Parameters import utility.{MemReqSource, XSPerfAccumulate, RRArbiterInit} +import coupledL2.utils.ArbPerf class PipeBufferResp(implicit p: Parameters) extends L2Bundle { val data = Vec(beatSize, UInt((beatBytes * 8).W)) @@ -56,6 +57,7 @@ class SinkC(implicit p: Parameters) extends L2Module { val taskValids = RegInit(VecInit(Seq.fill(bufBlocks)(false.B))) val taskArb = Module(new RRArbiterInit(new TaskBundle, bufBlocks)) val bufValids = taskValids.asUInt | dataValids + ArbPerf(taskArb, "taskArb") val full = bufValids.andR val noSpace = full && hasData diff --git a/src/main/scala/coupledL2/SourceB.scala b/src/main/scala/coupledL2/SourceB.scala index 5fb75629b..ad4b648ea 100644 --- a/src/main/scala/coupledL2/SourceB.scala +++ b/src/main/scala/coupledL2/SourceB.scala @@ -99,6 +99,7 @@ class SourceB(implicit p: Parameters) extends L2Module { /* ======== Issue ======== */ val issueArb = Module(new FastArbiter(new SourceBReq, entries)) + ArbPerf(issueArb, "issueArb") issueArb.io.in zip probes foreach{ case (i, p) => i.valid := p.valid && p.rdy diff --git a/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala b/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala index 7e9bf19a1..569186cc7 100644 --- a/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala +++ b/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala @@ -36,6 +36,7 @@ import coupledL2.{HasCoupledL2Parameters, L2TlbReq, L2ToL1TlbIO, TlbCmd, Pbmt} import coupledL2.utils.ReplacementPolicy import scopt.Read import freechips.rocketchip.util.SeqToAugmentedSeq +import coupledL2.utils.ArbPerf case class BOPParameters( virtualTrain: Boolean = true, @@ -420,6 +421,8 @@ class PrefetchReqBuffer(name: String = "vbop")(implicit p: Parameters) extends B //val replacement = ReplacementPolicy.fromString("plru", REQ_FILTER_SIZE) val tlb_req_arb = Module(new RRArbiterInit(new L2TlbReq, REQ_FILTER_SIZE)) val pf_req_arb = Module(new RRArbiterInit(new PrefetchReq, REQ_FILTER_SIZE)) + ArbPerf(tlb_req_arb, "bop_tlb_req_arb") + ArbPerf(pf_req_arb, "bop_pf_req_arb") def wayMap[T <: Data](f: Int => T) = VecInit((0 until REQ_FILTER_SIZE).map(f)) diff --git a/src/main/scala/coupledL2/tl2chi/MMIOBridge.scala b/src/main/scala/coupledL2/tl2chi/MMIOBridge.scala index f965d08d6..960066cdf 100644 --- a/src/main/scala/coupledL2/tl2chi/MMIOBridge.scala +++ b/src/main/scala/coupledL2/tl2chi/MMIOBridge.scala @@ -27,6 +27,7 @@ import freechips.rocketchip.tilelink._ import freechips.rocketchip.tilelink.TLMessages._ import coupledL2.HasCoupledL2Parameters import coupledL2.{MemBackTypeMM, MemPageTypeNC} +import coupledL2.utils.ArbPerf class MMIOBridge()(implicit p: Parameters) extends LazyModule with HasCoupledL2Parameters @@ -382,6 +383,7 @@ class MMIOBridgeImp(outer: MMIOBridge) extends LazyModuleImp(outer) } val txreqArb = Module(new RRArbiterInit(chiselTypeOf(io.tx.req.bits), mmioBridgeSize)) + ArbPerf(txreqArb, "mmio_txreq_arb") for ((a, entry) <- txreqArb.io.in.zip(entries)) { val req = entry.io.chi.tx.req val isRead = entry.io.isRead diff --git a/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala b/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala index 38b51a56a..e9529195f 100644 --- a/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala +++ b/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala @@ -28,6 +28,7 @@ import org.chipsalliance.cde.config.{Parameters, Field} import scala.math.max import coupledL2._ import coupledL2.prefetch._ +import coupledL2.utils.ArbPerf abstract class TL2CHIL2Bundle(implicit val p: Parameters) extends Bundle with HasCoupledL2Parameters @@ -129,6 +130,7 @@ class TL2CHICoupledL2(implicit p: Parameters) extends CoupledL2Base { case slices: Seq[Slice] => // TXREQ val txreq_arb = Module(new RRArbiterInit(new CHIREQ, slices.size + 1)) // plus 1 for MMIO + ArbPerf(txreq_arb, "txreq_arb") val txreq = Wire(DecoupledIO(new CHIREQ)) slices.zip(txreq_arb.io.in.init).foreach { case (s, in) => in <> s.io.out.tx.req } txreq_arb.io.in.last <> mmio.io.tx.req diff --git a/src/main/scala/coupledL2/utils/Arb.scala b/src/main/scala/coupledL2/utils/Arb.scala new file mode 100644 index 000000000..050303517 --- /dev/null +++ b/src/main/scala/coupledL2/utils/Arb.scala @@ -0,0 +1,36 @@ +package coupledL2.utils +import chisel3._ +import chisel3.util._ +import utility.XSPerfAccumulate +import freechips.rocketchip.util.SeqToAugmentedSeq +import org.chipsalliance.cde.config.Parameters +import svsim.CommonCompilationSettings.Timescale.Unit.s +import utility.LockingRRArbiterInit +import utility.FastArbiter +import utility.XSPerfHistogram + +object ArbPerf { + def apply(valids: Seq[Bool], readys: Seq[Bool], outRdy: Bool, name: String)(implicit p: Parameters): Unit = { + require(valids.size == readys.size) + XSPerfHistogram(s"${name}ArbConflict", PopCount(valids), valids.asUInt.orR, 1, valids.size + 1) + val n = valids.size + val a = valids.zip(readys).zipWithIndex.map { case ((v, r), i) => + val w = RegInit(0.U(64.W)) + when (v && !r && outRdy) { w := w + 1.U } + when (v && r) { w := 0.U } + 0.until(n*2).map(x => ((x.U === w && v && r).asUInt, x)) :+ (((w >= (n*2).U && v && r).asUInt, n*2)) + }.reduce { (left, right) => + left.zip(right).map { case (a, b) => (a._1 + b._1, a._2) } + }.foreach { case (cnt, time) => + XSPerfAccumulate(s"${name}ArbWaitTime_${time}", cnt) + } + } + + def apply[T <: Data](arb: LockingRRArbiterInit[T], name: String)(implicit p: Parameters): Unit = { + apply(arb.io.in.map(_.valid), arb.io.in.map(_.ready), arb.io.out.ready, name) + } + + def apply[T <: Data](arb: FastArbiter[T], name: String)(implicit p: Parameters): Unit = { + apply(arb.io.in.map(_.valid), arb.io.in.map(_.ready), arb.io.out.ready, name) + } +} \ No newline at end of file From f6d57ae74806eaca2d73bf9353519ef806148e0b Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Wed, 8 Apr 2026 18:53:18 +0800 Subject: [PATCH 18/23] fix(MainPie): fix nestable_meta timing Current nestable_meta of snoop will use the result from replace logic by using dirResult.meta. So decouple them. --- src/main/scala/coupledL2/tl2chi/MainPipe.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index 1fe722506..afa68ab25 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -214,6 +214,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val nestable_meta_s3 = nestable_dirResult_s3.meta val nestable_meta_has_clients_s3 = nestable_dirResult_s3.meta.clients.orR nestable_dirResult_s3 := dirResult_s3 + nestable_dirResult_s3.meta := metaOnHit_s3 when (req_s3.snpHitRelease) { // Meta states from MSHRs were considered as directory result here. // Therefore, meta states were always inferred to be hit when nesting release, no matter the fact that directory From 8402061e72d7c76f0d26abe392a4430a9004141b Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Thu, 9 Apr 2026 02:13:11 +0800 Subject: [PATCH 19/23] fix(MainPipe): fix prefetch train timing Current prefetch train will use the meta from replace logic. So decouple them. --- src/main/scala/coupledL2/tl2chi/MainPipe.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index afa68ab25..020d1ceaf 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -715,7 +715,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes // train on request(with needHint flag) miss or hit on prefetched block // trigger train also in a_merge here train.valid := task_s3.valid && ((req_acquire_s3 || req_get_s3) && req_s3.needHint.getOrElse(false.B) && - (!dirResult_s3.hit || meta_s3.prefetch.get) || req_s3.mergeA) + (!dirResult_s3.hit || metaOnHit_s3.prefetch.get) || req_s3.mergeA) train.bits.tag := req_s3.tag train.bits.set := req_s3.set train.bits.needT := Mux( @@ -729,8 +729,8 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes pc := Mux(req_s3.mergeA, req_s3.aMergeTask.pc.getOrElse(0.U), req_s3.pc.getOrElse(0.U)) } train.bits.hit := Mux(req_s3.mergeA, true.B, dirResult_s3.hit) - train.bits.prefetched := Mux(req_s3.mergeA, true.B, meta_s3.prefetch.getOrElse(false.B)) - train.bits.pfsource := meta_s3.prefetchSrc.getOrElse(PfSource.NoWhere.id.U) // TODO + train.bits.prefetched := Mux(req_s3.mergeA, true.B, metaOnHit_s3.prefetch.getOrElse(false.B)) + train.bits.pfsource := Mux(req_s3.mergeA, req_s3.meta.prefetchSrc.getOrElse(PfSource.NoWhere.id.U), metaOnHit_s3.prefetchSrc.getOrElse(PfSource.NoWhere.id.U)) // TODO train.bits.reqsource := req_s3.reqSource } From 51a859e235acfc4b277c1ef316e0e11349c19088 Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Thu, 9 Apr 2026 02:16:02 +0800 Subject: [PATCH 20/23] fix(Arb): fix FastArbiter timing --- src/main/scala/coupledL2/CoupledL2.scala | 4 +- src/main/scala/coupledL2/RequestBuffer.scala | 2 +- src/main/scala/coupledL2/SinkC.scala | 6 +- src/main/scala/coupledL2/SourceB.scala | 2 +- .../prefetch/BestOffsetPrefetch.scala | 8 +- .../scala/coupledL2/tl2chi/MMIOBridge.scala | 4 +- .../coupledL2/tl2chi/TL2CHICoupledL2.scala | 6 +- src/main/scala/coupledL2/utils/Arb.scala | 109 +++++++++++++++++- .../scala/coupledL2/utils/OHOperation.scala | 22 ++-- 9 files changed, 131 insertions(+), 32 deletions(-) diff --git a/src/main/scala/coupledL2/CoupledL2.scala b/src/main/scala/coupledL2/CoupledL2.scala index b536ea51c..e03d25002 100644 --- a/src/main/scala/coupledL2/CoupledL2.scala +++ b/src/main/scala/coupledL2/CoupledL2.scala @@ -34,7 +34,7 @@ import coupledL2.prefetch._ import huancun.{BankBitsKey, TPmetaReq, TPmetaResp} import utility.mbist.{MbistInterface, MbistPipeline} import utility.sram.{SramBroadcastBundle, SramHelper} -import coupledL2.utils.ArbPerf +import coupledL2.utils._ trait HasCoupledL2Parameters { val p: Parameters @@ -220,7 +220,7 @@ trait HasCoupledL2Parameters { } def fastArb[T <: Bundle](in: Seq[DecoupledIO[T]], out: DecoupledIO[T], name: Option[String] = None)(implicit p: Parameters): Unit = { - val arb = Module(new FastArbiter[T](chiselTypeOf(out.bits), in.size)) + val arb = Module(new TwoLevelRRArbiter[T](chiselTypeOf(out.bits), in.size)) if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } for ((a, req) <- arb.io.in.zip(in)) { a <> req } out <> arb.io.out diff --git a/src/main/scala/coupledL2/RequestBuffer.scala b/src/main/scala/coupledL2/RequestBuffer.scala index b9901b4fc..61199d73a 100644 --- a/src/main/scala/coupledL2/RequestBuffer.scala +++ b/src/main/scala/coupledL2/RequestBuffer.scala @@ -98,7 +98,7 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete io.ASet := io.in.bits.set val buffer = RegInit(VecInit(Seq.fill(entries)(0.U.asTypeOf(new ReqEntry)))) - val issueArb = Module(new FastArbiter(new ReqEntry, entries)) + val issueArb = Module(new TwoLevelRRArbiter(new ReqEntry, entries)) ArbPerf(issueArb, "issueArb") val chosenQ = Module(new Queue(new ChosenQBundle(log2Ceil(entries)), entries = 1, pipe = true, flow = false)) val chosenQValid = chosenQ.io.deq.valid diff --git a/src/main/scala/coupledL2/SinkC.scala b/src/main/scala/coupledL2/SinkC.scala index 18b8f93ed..6d59cf589 100644 --- a/src/main/scala/coupledL2/SinkC.scala +++ b/src/main/scala/coupledL2/SinkC.scala @@ -22,8 +22,8 @@ import chisel3.util._ import freechips.rocketchip.tilelink._ import freechips.rocketchip.tilelink.TLMessages._ import org.chipsalliance.cde.config.Parameters -import utility.{MemReqSource, XSPerfAccumulate, RRArbiterInit} -import coupledL2.utils.ArbPerf +import utility.{MemReqSource, XSPerfAccumulate} +import coupledL2.utils._ class PipeBufferResp(implicit p: Parameters) extends L2Bundle { val data = Vec(beatSize, UInt((beatBytes * 8).W)) @@ -55,7 +55,7 @@ class SinkC(implicit p: Parameters) extends L2Module { val dataValids = VecInit(beatValids.map(_.asUInt.orR)).asUInt val taskBuf = RegInit(VecInit(Seq.fill(bufBlocks)(0.U.asTypeOf(new TaskBundle)))) val taskValids = RegInit(VecInit(Seq.fill(bufBlocks)(false.B))) - val taskArb = Module(new RRArbiterInit(new TaskBundle, bufBlocks)) + val taskArb = Module(new TwoLevelRRArbiter(new TaskBundle, bufBlocks)) val bufValids = taskValids.asUInt | dataValids ArbPerf(taskArb, "taskArb") diff --git a/src/main/scala/coupledL2/SourceB.scala b/src/main/scala/coupledL2/SourceB.scala index ad4b648ea..713036f5b 100644 --- a/src/main/scala/coupledL2/SourceB.scala +++ b/src/main/scala/coupledL2/SourceB.scala @@ -98,7 +98,7 @@ class SourceB(implicit p: Parameters) extends L2Module { } /* ======== Issue ======== */ - val issueArb = Module(new FastArbiter(new SourceBReq, entries)) + val issueArb = Module(new TwoLevelRRArbiter(new SourceBReq, entries)) ArbPerf(issueArb, "issueArb") issueArb.io.in zip probes foreach{ case (i, p) => diff --git a/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala b/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala index 569186cc7..c831fb5a0 100644 --- a/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala +++ b/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala @@ -26,7 +26,7 @@ package coupledL2.prefetch -import utility.{ChiselDB, Constantin, MemReqSource, ParallelPriorityMux, RRArbiterInit, XSPerfAccumulate} +import utility.{ChiselDB, Constantin, MemReqSource, ParallelPriorityMux, XSPerfAccumulate} import utility.sram.SRAMTemplate import org.chipsalliance.cde.config.Parameters import chisel3.DontCare.:= @@ -36,7 +36,7 @@ import coupledL2.{HasCoupledL2Parameters, L2TlbReq, L2ToL1TlbIO, TlbCmd, Pbmt} import coupledL2.utils.ReplacementPolicy import scopt.Read import freechips.rocketchip.util.SeqToAugmentedSeq -import coupledL2.utils.ArbPerf +import coupledL2.utils._ case class BOPParameters( virtualTrain: Boolean = true, @@ -419,8 +419,8 @@ class PrefetchReqBuffer(name: String = "vbop")(implicit p: Parameters) extends B val valids = Seq.fill(REQ_FILTER_SIZE)(RegInit(false.B)) val entries = Seq.fill(REQ_FILTER_SIZE)(Reg(new BopReqBufferEntry)) //val replacement = ReplacementPolicy.fromString("plru", REQ_FILTER_SIZE) - val tlb_req_arb = Module(new RRArbiterInit(new L2TlbReq, REQ_FILTER_SIZE)) - val pf_req_arb = Module(new RRArbiterInit(new PrefetchReq, REQ_FILTER_SIZE)) + val tlb_req_arb = Module(new TwoLevelRRArbiter(new L2TlbReq, REQ_FILTER_SIZE)) + val pf_req_arb = Module(new TwoLevelRRArbiter(new PrefetchReq, REQ_FILTER_SIZE)) ArbPerf(tlb_req_arb, "bop_tlb_req_arb") ArbPerf(pf_req_arb, "bop_pf_req_arb") diff --git a/src/main/scala/coupledL2/tl2chi/MMIOBridge.scala b/src/main/scala/coupledL2/tl2chi/MMIOBridge.scala index 960066cdf..fece0af4e 100644 --- a/src/main/scala/coupledL2/tl2chi/MMIOBridge.scala +++ b/src/main/scala/coupledL2/tl2chi/MMIOBridge.scala @@ -27,7 +27,7 @@ import freechips.rocketchip.tilelink._ import freechips.rocketchip.tilelink.TLMessages._ import coupledL2.HasCoupledL2Parameters import coupledL2.{MemBackTypeMM, MemPageTypeNC} -import coupledL2.utils.ArbPerf +import coupledL2.utils._ class MMIOBridge()(implicit p: Parameters) extends LazyModule with HasCoupledL2Parameters @@ -382,7 +382,7 @@ class MMIOBridgeImp(outer: MMIOBridge) extends LazyModuleImp(outer) entry.io.id := i.U } - val txreqArb = Module(new RRArbiterInit(chiselTypeOf(io.tx.req.bits), mmioBridgeSize)) + val txreqArb = Module(new TwoLevelRRArbiter(chiselTypeOf(io.tx.req.bits), mmioBridgeSize)) ArbPerf(txreqArb, "mmio_txreq_arb") for ((a, entry) <- txreqArb.io.in.zip(entries)) { val req = entry.io.chi.tx.req diff --git a/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala b/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala index e9529195f..ca86718c3 100644 --- a/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala +++ b/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala @@ -19,7 +19,7 @@ package coupledL2.tl2chi import chisel3._ import chisel3.util._ -import utility.{FastArbiter, Pipeline, ParallelPriorityMux, RegNextN, RRArbiterInit, XSPerfAccumulate} +import utility.{Pipeline, ParallelPriorityMux, RegNextN, XSPerfAccumulate} import freechips.rocketchip.diplomacy._ import freechips.rocketchip.tilelink._ import freechips.rocketchip.tilelink.TLMessages._ @@ -28,7 +28,7 @@ import org.chipsalliance.cde.config.{Parameters, Field} import scala.math.max import coupledL2._ import coupledL2.prefetch._ -import coupledL2.utils.ArbPerf +import coupledL2.utils._ abstract class TL2CHIL2Bundle(implicit val p: Parameters) extends Bundle with HasCoupledL2Parameters @@ -129,7 +129,7 @@ class TL2CHICoupledL2(implicit p: Parameters) extends CoupledL2Base { slices match { case slices: Seq[Slice] => // TXREQ - val txreq_arb = Module(new RRArbiterInit(new CHIREQ, slices.size + 1)) // plus 1 for MMIO + val txreq_arb = Module(new TwoLevelRRArbiter(new CHIREQ, slices.size + 1)) // plus 1 for MMIO ArbPerf(txreq_arb, "txreq_arb") val txreq = Wire(DecoupledIO(new CHIREQ)) slices.zip(txreq_arb.io.in.init).foreach { case (s, in) => in <> s.io.out.tx.req } diff --git a/src/main/scala/coupledL2/utils/Arb.scala b/src/main/scala/coupledL2/utils/Arb.scala index 050303517..2c59c1a9f 100644 --- a/src/main/scala/coupledL2/utils/Arb.scala +++ b/src/main/scala/coupledL2/utils/Arb.scala @@ -1,13 +1,9 @@ package coupledL2.utils import chisel3._ import chisel3.util._ -import utility.XSPerfAccumulate import freechips.rocketchip.util.SeqToAugmentedSeq import org.chipsalliance.cde.config.Parameters -import svsim.CommonCompilationSettings.Timescale.Unit.s -import utility.LockingRRArbiterInit -import utility.FastArbiter -import utility.XSPerfHistogram +import utility._ object ArbPerf { def apply(valids: Seq[Bool], readys: Seq[Bool], outRdy: Bool, name: String)(implicit p: Parameters): Unit = { @@ -33,4 +29,107 @@ object ArbPerf { def apply[T <: Data](arb: FastArbiter[T], name: String)(implicit p: Parameters): Unit = { apply(arb.io.in.map(_.valid), arb.io.in.map(_.ready), arb.io.out.ready, name) } + + def apply[T <: Data](arb: L2FastArbiterBase[T], name: String)(implicit p: Parameters): Unit = { + apply(arb.io.in.map(_.valid), arb.io.in.map(_.ready), arb.io.out.ready, name) + } +} + +class L2ArbiterIO[T <: Data](gen: T, n: Int) extends ArbiterIO[T](gen, n) { + val chosenOH = Output(UInt(n.W)) +} + +abstract class L2FastArbiterBase[T <: Data](val gen: T, val n: Int) extends Module { + val io = IO(new L2ArbiterIO[T](gen, n)) + + def maskToOH(seq: Seq[Bool]) = { + seq.zipWithIndex.map{ + case (b, 0) => b + case (b, i) => b && !Cat(seq.take(i)).orR + } + } +} + +class TwoLevelRRArbiter[T <: Data](gen: T, n: Int) extends L2FastArbiterBase[T](gen, n) { + if (n == 1) { + // for better codegen + io.out.valid := io.in.head.valid + io.out.bits := io.in.head.bits + io.in.head.ready := io.out.ready + io.chosen := 0.U + io.chosenOH := 1.U + } else if (n == 0) { + io.out.valid := false.B + io.out.bits := 0.U.asTypeOf(io.out.bits) + io.chosen := 0.U + io.chosenOH := 0.U + } else { + val mid = n / 2 + val rest = n - mid + val lowOut = Wire(Decoupled(gen)) + val highOut = Wire(Decoupled(gen)) + val selLow = RegInit(false.B) + val finalSelLow = Wire(Bool()) + + val chosenOHLow = Wire(UInt(mid.W)) + val chosenOHHigh = Wire(UInt(rest.W)) + val validsLow = VecInit(io.in.take(mid).map(_.valid)).asUInt + val validsHigh = VecInit(io.in.drop(mid).map(_.valid)).asUInt + + val pendingMaskLow = RegEnable( + validsLow & (~chosenOHLow).asUInt, // make IDEA happy ... + 0.U(mid.W), + lowOut.fire + ) + val pendingMaskHigh = RegEnable( + validsHigh & (~chosenOHHigh).asUInt, // make IDEA happy ... + 0.U(rest.W), + highOut.fire + ) + + val rrGrantMaskLow = RegEnable(VecInit((0 until mid) map { i => + if(i == 0) false.B else chosenOHLow(i - 1, 0).orR + }).asUInt, 0.U(mid.W), lowOut.fire) + val rrGrantMaskHigh = RegEnable(VecInit((0 until rest) map { i => + if(i == 0) false.B else chosenOHHigh(i - 1, 0).orR + }).asUInt, 0.U(rest.W), highOut.fire) + + val rrSelOHLow = VecInit(maskToOH((rrGrantMaskLow & pendingMaskLow).asBools)).asUInt + val rrSelOHHigh = VecInit(maskToOH((rrGrantMaskHigh & pendingMaskHigh).asBools)).asUInt + val firstOneOHLow = VecInit(maskToOH(validsLow.asBools)).asUInt + val firstOneOHHigh = VecInit(maskToOH(validsHigh.asBools)).asUInt + val rrValidLow = (rrSelOHLow & validsLow).orR + val rrValidHigh = (rrSelOHHigh & validsHigh).orR + chosenOHLow := Mux(rrValidLow, rrSelOHLow, firstOneOHLow) + chosenOHHigh := Mux(rrValidHigh, rrSelOHHigh, firstOneOHHigh) + + lowOut.valid := validsLow.orR + lowOut.bits := Mux1H(chosenOHLow, io.in.take(mid).map(_.bits)) + highOut.valid := validsHigh.orR + highOut.bits := Mux1H(chosenOHHigh, io.in.drop(mid).map(_.bits)) + + io.in.take(mid).map(_.ready).zip(chosenOHLow.asBools).foreach{ + case (rdy, grant) => rdy := grant && lowOut.ready + } + io.in.drop(mid).map(_.ready).zip(chosenOHHigh.asBools).foreach{ + case (rdy, grant) => rdy := grant && highOut.ready + } + + io.out.valid := lowOut.valid || highOut.valid + finalSelLow := Mux1H(Seq( + (lowOut.valid && highOut.valid) -> selLow, + (lowOut.valid && !highOut.valid) -> true.B, + (!lowOut.valid && highOut.valid) -> false.B + )) + io.out.bits := Mux(finalSelLow, lowOut.bits, highOut.bits) + lowOut.ready := io.out.ready && finalSelLow + highOut.ready := io.out.ready && !finalSelLow + + io.chosenOH := Cat(Seq.fill(rest)(!finalSelLow).asUInt & chosenOHHigh, Seq.fill(mid)(finalSelLow).asUInt & chosenOHLow) + io.chosen := OHToUInt(io.chosenOH) + + when (io.out.fire) { + selLow := Mux(finalSelLow, !highOut.valid, lowOut.valid) + } + } } \ No newline at end of file diff --git a/src/main/scala/coupledL2/utils/OHOperation.scala b/src/main/scala/coupledL2/utils/OHOperation.scala index 88c3e72a9..465d52c3d 100644 --- a/src/main/scala/coupledL2/utils/OHOperation.scala +++ b/src/main/scala/coupledL2/utils/OHOperation.scala @@ -3,19 +3,19 @@ package coupledL2.utils import chisel3._ import chisel3.util._ import utility.ParallelOR +import freechips.rocketchip.util.SeqToAugmentedSeq object MaskToOH { + def apply(mask: Seq[Bool]): UInt = { + val oh = mask.zipWithIndex.map{ + case (b, 0) => b + case (b, i) => b && !Cat(mask.take(i)).orR + }.asUInt + assert(oh === PriorityEncoderOH(mask.asUInt), "MaskToOH should give the same result as PriorityEncoderOH") + oh + } + def apply(mask: UInt): UInt = { - val width = mask.getWidth - val oh = Wire(Vec(width, Bool())) - for (i <- 0 until width) { - if (i == 0) { - oh(i) := mask(i) - } else { - oh(i) := mask(i) && !ParallelOR(mask(i-1, 0).asBools) - } - } - assert(oh.asUInt === PriorityEncoderOH(mask), "MaskToOH should give the same result as PriorityEncoderOH") - oh.asUInt + apply(mask.asBools) } } \ No newline at end of file From 5aa9c2206ab696ad5dfd3b39ea4d29779bd3c93d Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Thu, 9 Apr 2026 16:26:41 +0800 Subject: [PATCH 21/23] fix(BOP): simplify constant value input in arbiter in bop Some input value of arbiter in bop is he same constant value. So assign these value in output directly. --- .../coupledL2/prefetch/BestOffsetPrefetch.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala b/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala index c831fb5a0..56c9ccfa3 100644 --- a/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala +++ b/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala @@ -462,9 +462,15 @@ class PrefetchReqBuffer(name: String = "vbop")(implicit p: Parameters) extends B tlb_req_arb.io.out.ready := true.B io.tlb_req.req.valid := RegNext(tlb_req_arb.io.out.valid) io.tlb_req.req.bits := RegEnable(tlb_req_arb.io.out.bits, tlb_req_arb.io.out.valid) + io.tlb_req.req.bits.cmd := TlbCmd.read + io.tlb_req.req.bits.size := 3.U + io.tlb_req.req.bits.kill := false.B + io.tlb_req.req.bits.no_translate := false.B + io.tlb_req.req.bits.isPrefetch := true.B io.tlb_req.req_kill := false.B io.tlb_req.resp.ready := true.B io.out_req <> pf_req_arb.io.out + io.out_req.bits.pfSource := MemReqSource.Prefetch2L2BOP.id.U /* s0: entries look up */ val prev_in_valid = RegNext(io.in_req.valid, false.B) @@ -564,12 +570,8 @@ class PrefetchReqBuffer(name: String = "vbop")(implicit p: Parameters) extends B /* tlb & pf */ for((e, i) <- entries.zipWithIndex){ tlb_req_arb.io.in(i).valid := valids(i) && !e.paddrValid && !s1_tlb_fire_oh(i) && !s2_tlb_fire_oh(i) && !s3_tlb_fire_oh(i) && !e.replayCnt.orR + tlb_req_arb.io.in(i).bits := 0.U.asTypeOf(tlb_req_arb.io.in(i).bits) tlb_req_arb.io.in(i).bits.vaddr := e.get_tlb_vaddr() - tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read - tlb_req_arb.io.in(i).bits.size := 3.U - tlb_req_arb.io.in(i).bits.kill := false.B - tlb_req_arb.io.in(i).bits.no_translate := false.B - tlb_req_arb.io.in(i).bits.isPrefetch := true.B pf_req_arb.io.in(i).valid := can_send_pf(i) pf_req_arb.io.in(i).bits := e.toPrefetchReq() From 7fd25b150d373c6f5ce9260d72ba34ac8d398304 Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Mon, 13 Apr 2026 22:15:13 +0800 Subject: [PATCH 22/23] fix: Using OneHot to replace idx in ReqBuf and MSHRCtl --- src/main/scala/coupledL2/RequestBuffer.scala | 66 ++++++++++--------- src/main/scala/coupledL2/tl2chi/MSHRCtl.scala | 9 ++- src/main/scala/coupledL2/tl2tl/MSHRCtl.scala | 2 +- 3 files changed, 42 insertions(+), 35 deletions(-) diff --git a/src/main/scala/coupledL2/RequestBuffer.scala b/src/main/scala/coupledL2/RequestBuffer.scala index 61199d73a..be3180d5f 100644 --- a/src/main/scala/coupledL2/RequestBuffer.scala +++ b/src/main/scala/coupledL2/RequestBuffer.scala @@ -58,13 +58,13 @@ class ReqEntry(entries: Int = 4)(implicit p: Parameters) extends L2Bundle() { } -class ChosenQBundle(idWIdth: Int = 2)(implicit p: Parameters) extends L2Bundle { +class ChosenQBundle(idOHWIdth: Int = 2)(implicit p: Parameters) extends L2Bundle { val bits = new ReqEntry() - val id = UInt(idWIdth.W) + val idOH = UInt(idOHWIdth.W) } class AMergeTask(implicit p: Parameters) extends L2Bundle { - val id = UInt(mshrBits.W) + val idOH = UInt(mshrsAll.W) val task = new TaskBundle() } @@ -100,7 +100,7 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete val buffer = RegInit(VecInit(Seq.fill(entries)(0.U.asTypeOf(new ReqEntry)))) val issueArb = Module(new TwoLevelRRArbiter(new ReqEntry, entries)) ArbPerf(issueArb, "issueArb") - val chosenQ = Module(new Queue(new ChosenQBundle(log2Ceil(entries)), entries = 1, pipe = true, flow = false)) + val chosenQ = Module(new Queue(new ChosenQBundle(entries), entries = 1, pipe = true, flow = false)) val chosenQValid = chosenQ.io.deq.valid /* ======== Enchantment ======== */ @@ -127,7 +127,8 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete a.fromA && (a.opcode === AcquireBlock || a.opcode === AcquirePerm) )) val matched = matchVec.asUInt.orR - val matchSrc = ParallelPriorityMux(matchVec, io.mshrInfo.map(_.bits.meta.prefetchSrc.getOrElse(PfSource.NoWhere.id.U))) + assert(PopCount(matchVec) <= 1.U, "Multiple late prefetch MSHRs matched") + val matchSrc = Mux1H(matchVec, io.mshrInfo.map(_.bits.meta.prefetchSrc.getOrElse(PfSource.NoWhere.id.U))) (matched, matchSrc) } @@ -159,9 +160,8 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete in.fromA && (in.opcode === AcquireBlock || in.opcode === AcquirePerm) && !s.bits.mergeA && !(in.param === NtoT && s.bits.param === NtoB) }).asUInt val mergeA = mergeAMask.orR - val mergeAId = OHToUInt(mergeAMask) io.aMergeTask.valid := io.in.valid && mergeA - io.aMergeTask.bits.id := mergeAId + io.aMergeTask.bits.idOH := mergeAMask io.aMergeTask.bits.task := in /* @@ -208,28 +208,27 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete /* ======== Alloc ======== */ io.in.ready := !full || doFlow || mergeA || dup - val insertIdx = PriorityEncoder(buffer.map(!_.valid)) + val insertOH = MaskToOH(buffer.map(!_.valid)) val alloc = !full && io.in.valid && !doFlow && !dup && !mergeA - when(alloc){ - val entry = buffer(insertIdx) - val mpBlock = Cat(io.mainPipeBlock).orR - val pipeBlockOut = io.out.fire && sameSet(in, io.out.bits) - val probeBlock = io.s1Entrance.valid && io.s1Entrance.bits.set === in.set // wait for same-addr req to enter MSHR - val s1Block = pipeBlockOut || probeBlock - - entry.valid := true.B - // when Addr-Conflict / Same-Addr-Dependent / MainPipe-Block / noFreeWay-in-Set, entry not ready - entry.rdy := !conflict(in) && !mpBlock && !s1Block && !noFreeWay(in)// && !Cat(depMask).orR - entry.task := io.in.bits - entry.waitMP := Cat( - s1Block, - io.mainPipeBlock(0), - io.mainPipeBlock(1), - 0.U(1.W)) - entry.waitMS := conflictMask(in) - -// entry.depMask := depMask - assert(PopCount(conflictMaskFromA(in)) <= 2.U) + buffer.zip(insertOH.asBools).foreach { case (entry, sel) => + when(alloc && sel){ + val mpBlock = Cat(io.mainPipeBlock).orR + val pipeBlockOut = io.out.fire && sameSet(in, io.out.bits) + val probeBlock = io.s1Entrance.valid && io.s1Entrance.bits.set === in.set // wait for same-addr req to enter MSHR + val s1Block = pipeBlockOut || probeBlock + + entry.valid := true.B + // when Addr-Conflict / Same-Addr-Dependent / MainPipe-Block / noFreeWay-in-Set, entry not ready + entry.rdy := !conflict(in) && !mpBlock && !s1Block && !noFreeWay(in)// && !Cat(depMask).orR + entry.task := io.in.bits + entry.waitMP := Cat( + s1Block, + io.mainPipeBlock(0), + io.mainPipeBlock(1), + 0.U(1.W)) + entry.waitMS := conflictMask(in) + assert(PopCount(conflictMaskFromA(in)) <= 2.U) + } } /* ======== Issue ======== */ @@ -245,7 +244,7 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete // in such case, we need a place to save it chosenQ.io.enq.valid := issueArb.io.out.valid chosenQ.io.enq.bits.bits := issueArb.io.out.bits - chosenQ.io.enq.bits.id := issueArb.io.chosen + chosenQ.io.enq.bits.idOH := issueArb.io.chosenOH issueArb.io.out.ready := chosenQ.io.enq.ready /* ======== Update rdy and masks ======== */ @@ -294,7 +293,7 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete // when entry.rdy is no longer true, // we cancel req in chosenQ, with the entry still held in buffer to issue later // val cancel = (canFlow && sameSet(chosenQ.io.deq.bits.bits.task, io.in.bits)) || !buffer(chosenQ.io.deq.bits.id).rdy - val cancel = !buffer(chosenQ.io.deq.bits.id).rdy + val cancel = !Mux1H(chosenQ.io.deq.bits.idOH, buffer.map(_.rdy)) chosenQ.io.deq.ready := io.out.ready || cancel io.out.valid := chosenQValid && !cancel || io.in.valid && canFlow @@ -303,8 +302,11 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete else Mux(chosenQValid, chosenQ.io.deq.bits.bits.task, io.in.bits) } - when(chosenQ.io.deq.fire && !cancel) { - buffer(chosenQ.io.deq.bits.id).valid := false.B + buffer.zip(chosenQ.io.deq.bits.idOH.asBools).foreach { + case (e, y) => + when(chosenQ.io.deq.fire && y && !cancel) { + e.valid := false.B + } } // for Dir to choose a free way diff --git a/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala b/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala index 70526da9a..1fbe0887a 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala @@ -26,6 +26,7 @@ import freechips.rocketchip.tilelink._ import freechips.rocketchip.tilelink.TLMessages._ import coupledL2.prefetch.PrefetchTrain import coupledL2._ +import coupledL2.utils._ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes with HasPerfEvents { val io = IO(new Bundle() { @@ -148,7 +149,7 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes io.msInfo(i) := m.io.msInfo m.io.nestedwb := io.nestedwb - m.io.aMergeTask.valid := io.aMergeTask.valid && io.aMergeTask.bits.id === i.U + m.io.aMergeTask.valid := io.aMergeTask.valid && io.aMergeTask.bits.idOH(i) m.io.aMergeTask.bits := io.aMergeTask.bits.task io.pCrd(i) <> m.io.pCrd @@ -173,7 +174,11 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes io.toSourceB <> sourceB.io.sourceB /* Arbitrate MSHR task to RequestArbiter */ - fastArb(mshrs.map(_.io.tasks.mainpipe), io.mshrTask, Some("mshr_task")) + val mshrTask = Wire(Decoupled(new TaskBundle())) + fastArb(mshrs.map(_.io.tasks.mainpipe), mshrTask, Some("mshr_task")) + io.mshrTask <> mshrTask + io.mshrTask.bits.mshrId := OHToUInt(mshrs.map(_.io.tasks.mainpipe.fire)) + assert(Mux(io.mshrTask.fire, io.mshrTask.bits.mshrId === mshrTask.bits.mshrId, true.B), "mshrId should be consistent") /* releaseBuf link to MSHR id */ io.releaseBufWriteId := ParallelPriorityMux(resp_sinkC_match_vec, (0 until mshrsAll).map(i => i.U)) diff --git a/src/main/scala/coupledL2/tl2tl/MSHRCtl.scala b/src/main/scala/coupledL2/tl2tl/MSHRCtl.scala index ecaae3404..eca568574 100644 --- a/src/main/scala/coupledL2/tl2tl/MSHRCtl.scala +++ b/src/main/scala/coupledL2/tl2tl/MSHRCtl.scala @@ -138,7 +138,7 @@ class MSHRCtl(implicit p: Parameters) extends L2Module with HasPerfEvents { io.msInfo(i) := m.io.msInfo m.io.nestedwb := io.nestedwb - m.io.aMergeTask.valid := io.aMergeTask.valid && io.aMergeTask.bits.id === i.U + m.io.aMergeTask.valid := io.aMergeTask.valid && io.aMergeTask.bits.idOH(i) m.io.aMergeTask.bits := io.aMergeTask.bits.task } From 7427574c2d9ccda295f10945fd69de0af7fc5594 Mon Sep 17 00:00:00 2001 From: Frankslu <1653954675@qq.com> Date: Sat, 9 May 2026 11:13:33 +0800 Subject: [PATCH 23/23] submodule: bump utility move TwoLevelRRArbiter and MaskToOH to utility and bump --- src/main/scala/coupledL2/CoupledL2.scala | 17 ++- src/main/scala/coupledL2/Directory.scala | 2 +- src/main/scala/coupledL2/SinkC.scala | 2 +- .../prefetch/BestOffsetPrefetch.scala | 2 +- src/main/scala/coupledL2/tl2chi/MSHRCtl.scala | 8 +- .../scala/coupledL2/tl2chi/MainPipe.scala | 1 - .../coupledL2/tl2chi/TL2CHICoupledL2.scala | 4 +- src/main/scala/coupledL2/tl2tl/MSHRCtl.scala | 6 +- src/main/scala/coupledL2/utils/Arb.scala | 135 ------------------ .../scala/coupledL2/utils/OHOperation.scala | 21 --- src/main/scala/coupledL2/utils/Replacer.scala | 1 + utility | 2 +- 12 files changed, 27 insertions(+), 174 deletions(-) delete mode 100644 src/main/scala/coupledL2/utils/Arb.scala delete mode 100644 src/main/scala/coupledL2/utils/OHOperation.scala diff --git a/src/main/scala/coupledL2/CoupledL2.scala b/src/main/scala/coupledL2/CoupledL2.scala index e03d25002..f35e6da22 100644 --- a/src/main/scala/coupledL2/CoupledL2.scala +++ b/src/main/scala/coupledL2/CoupledL2.scala @@ -212,19 +212,28 @@ trait HasCoupledL2Parameters { x(x.getWidth - 1, pageOffsetBits) } - def arb[T <: Bundle](in: Seq[DecoupledIO[T]], out: DecoupledIO[T], name: Option[String] = None): Unit = { + def arb[T <: Bundle](in: Seq[DecoupledIO[T]], out: DecoupledIO[T], name: Option[String] = None) = { val arb = Module(new Arbiter[T](chiselTypeOf(out.bits), in.size)) if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } for ((a, req) <- arb.io.in.zip(in)) { a <> req } out <> arb.io.out + arb } - def fastArb[T <: Bundle](in: Seq[DecoupledIO[T]], out: DecoupledIO[T], name: Option[String] = None)(implicit p: Parameters): Unit = { - val arb = Module(new TwoLevelRRArbiter[T](chiselTypeOf(out.bits), in.size)) + def fastArb[T <: Bundle](in: Seq[DecoupledIO[T]], out: DecoupledIO[T], name: Option[String] = None) = { + val arb = Module(new FastArbiter[T](chiselTypeOf(out.bits), in.size)) if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } for ((a, req) <- arb.io.in.zip(in)) { a <> req } out <> arb.io.out - ArbPerf(arb, name.getOrElse("fastArb")) + arb + } + + def twoLevelArb[T <: Bundle](in: Seq[DecoupledIO[T]], out: DecoupledIO[T], name: Option[String] = None) = { + val arb = Module(new TwoLevelRRArbiter(chiselTypeOf(out.bits), in.size)) + if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } + for ((a, req) <- arb.io.in.zip(in)) { a <> req } + out <> arb.io.out + arb } def odOpGen(r: UInt) = { diff --git a/src/main/scala/coupledL2/Directory.scala b/src/main/scala/coupledL2/Directory.scala index 7f058b241..b733ca497 100644 --- a/src/main/scala/coupledL2/Directory.scala +++ b/src/main/scala/coupledL2/Directory.scala @@ -21,7 +21,7 @@ import chisel3._ import chisel3.util._ import utility.mbist.MbistPipeline import coupledL2.utils._ -import utility.{ChiselDB, Code, MemReqSource, ParallelPriorityMux, RegNextN, XSPerfAccumulate} +import utility.{ChiselDB, Code, MemReqSource, ParallelPriorityMux, RegNextN, XSPerfAccumulate, MaskToOH} import utility.sram.SRAMTemplate import org.chipsalliance.cde.config.Parameters import coupledL2.prefetch.PfSource diff --git a/src/main/scala/coupledL2/SinkC.scala b/src/main/scala/coupledL2/SinkC.scala index 6d59cf589..b88f0e339 100644 --- a/src/main/scala/coupledL2/SinkC.scala +++ b/src/main/scala/coupledL2/SinkC.scala @@ -22,7 +22,7 @@ import chisel3.util._ import freechips.rocketchip.tilelink._ import freechips.rocketchip.tilelink.TLMessages._ import org.chipsalliance.cde.config.Parameters -import utility.{MemReqSource, XSPerfAccumulate} +import utility.{MemReqSource, XSPerfAccumulate, TwoLevelRRArbiter, ArbPerf} import coupledL2.utils._ class PipeBufferResp(implicit p: Parameters) extends L2Bundle { diff --git a/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala b/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala index 56c9ccfa3..d3c971020 100644 --- a/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala +++ b/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala @@ -26,7 +26,7 @@ package coupledL2.prefetch -import utility.{ChiselDB, Constantin, MemReqSource, ParallelPriorityMux, XSPerfAccumulate} +import utility.{ChiselDB, Constantin, MemReqSource, ParallelPriorityMux, XSPerfAccumulate, TwoLevelRRArbiter, ArbPerf} import utility.sram.SRAMTemplate import org.chipsalliance.cde.config.Parameters import chisel3.DontCare.:= diff --git a/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala b/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala index 1fbe0887a..a81e1d1ae 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala @@ -162,20 +162,20 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes io.toReqArb.blockG_s1 := false.B /* Acquire downwards to TXREQ*/ - fastArb(mshrs.map(_.io.tasks.txreq), io.toTXREQ, Some("txreq")) + ArbPerf(twoLevelArb(mshrs.map(_.io.tasks.txreq), io.toTXREQ, Some("txreq")), "txreq_arb") /* Response downwards to TXRSP*/ - fastArb(mshrs.map(_.io.tasks.txrsp), io.toTXRSP, Some("txrsp")) + ArbPerf(twoLevelArb(mshrs.map(_.io.tasks.txrsp), io.toTXRSP, Some("txrsp")), "txrsp_arb") /* Probe upwards */ val sourceB = Module(new SourceB()) - fastArb(mshrs.map(_.io.tasks.source_b), sourceB.io.task, Some("source_b")) + ArbPerf(twoLevelArb(mshrs.map(_.io.tasks.source_b), sourceB.io.task, Some("source_b")), "source_b_arb") sourceB.io.grantStatus := io.grantStatus io.toSourceB <> sourceB.io.sourceB /* Arbitrate MSHR task to RequestArbiter */ val mshrTask = Wire(Decoupled(new TaskBundle())) - fastArb(mshrs.map(_.io.tasks.mainpipe), mshrTask, Some("mshr_task")) + ArbPerf(twoLevelArb(mshrs.map(_.io.tasks.mainpipe), mshrTask, Some("mshr_task")), "mshr_task_arb") io.mshrTask <> mshrTask io.mshrTask.bits.mshrId := OHToUInt(mshrs.map(_.io.tasks.mainpipe.fire)) assert(Mux(io.mshrTask.fire, io.mshrTask.bits.mshrId === mshrTask.bits.mshrId, true.B), "mshrId should be consistent") diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index 020d1ceaf..c5858aad5 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -502,7 +502,6 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes io.replResp.bits.way, Mux(mshr_req_s3, req_s3.way, dirResult_s3.way) ) - // io.toDS.req_s3.bits.set := Mux(mshr_req_s3, req_s3.set, dirResult_s3.set) io.toDS.req_s3.bits.set := req_s3.set io.toDS.req_s3.bits.wen := wen io.toDS.req_s3.bits.ren := ren diff --git a/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala b/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala index ca86718c3..e5f03b8d7 100644 --- a/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala +++ b/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala @@ -19,7 +19,7 @@ package coupledL2.tl2chi import chisel3._ import chisel3.util._ -import utility.{Pipeline, ParallelPriorityMux, RegNextN, XSPerfAccumulate} +import utility.{Pipeline, ParallelPriorityMux, RegNextN, XSPerfAccumulate, TwoLevelRRArbiter, ArbPerf} import freechips.rocketchip.diplomacy._ import freechips.rocketchip.tilelink._ import freechips.rocketchip.tilelink.TLMessages._ @@ -202,7 +202,7 @@ class TL2CHICoupledL2(implicit p: Parameters) extends CoupledL2Base { arbPort } - fastArb(mshrPCrdArbIn, mshrPCrdArbOut, Some("pcrdgrant")) + ArbPerf(twoLevelArb(mshrPCrdArbIn, mshrPCrdArbOut, Some("pcrdgrant")), "pcrdgrant_arb") mshrPCrdGrants.zip(mshrPCrdArbGrants).foreach { case (grant, arb) => grant := arb } diff --git a/src/main/scala/coupledL2/tl2tl/MSHRCtl.scala b/src/main/scala/coupledL2/tl2tl/MSHRCtl.scala index eca568574..7202d67e5 100644 --- a/src/main/scala/coupledL2/tl2tl/MSHRCtl.scala +++ b/src/main/scala/coupledL2/tl2tl/MSHRCtl.scala @@ -149,17 +149,17 @@ class MSHRCtl(implicit p: Parameters) extends L2Module with HasPerfEvents { /* Acquire downwards */ val acquireUnit = Module(new AcquireUnit()) - fastArb(mshrs.map(_.io.tasks.source_a), acquireUnit.io.task, Some("source_a")) + ArbPerf(twoLevelArb(mshrs.map(_.io.tasks.source_a), acquireUnit.io.task, Some("source_a")), "source_a_arb") io.sourceA <> acquireUnit.io.sourceA /* Probe upwards */ val sourceB = Module(new SourceB()) - fastArb(mshrs.map(_.io.tasks.source_b), sourceB.io.task, Some("source_b")) + ArbPerf(twoLevelArb(mshrs.map(_.io.tasks.source_b), sourceB.io.task, Some("source_b")), "source_b_arb") sourceB.io.grantStatus := io.grantStatus io.sourceB <> sourceB.io.sourceB /* Arbitrate MSHR task to RequestArbiter */ - fastArb(mshrs.map(_.io.tasks.mainpipe), io.mshrTask, Some("mshr_task")) + ArbPerf(twoLevelArb(mshrs.map(_.io.tasks.mainpipe), io.mshrTask, Some("mshr_task")), "mshr_task_arb") /* Arbitrate prefetchTrains to Prefetcher */ // prefetchOpt.foreach { diff --git a/src/main/scala/coupledL2/utils/Arb.scala b/src/main/scala/coupledL2/utils/Arb.scala deleted file mode 100644 index 2c59c1a9f..000000000 --- a/src/main/scala/coupledL2/utils/Arb.scala +++ /dev/null @@ -1,135 +0,0 @@ -package coupledL2.utils -import chisel3._ -import chisel3.util._ -import freechips.rocketchip.util.SeqToAugmentedSeq -import org.chipsalliance.cde.config.Parameters -import utility._ - -object ArbPerf { - def apply(valids: Seq[Bool], readys: Seq[Bool], outRdy: Bool, name: String)(implicit p: Parameters): Unit = { - require(valids.size == readys.size) - XSPerfHistogram(s"${name}ArbConflict", PopCount(valids), valids.asUInt.orR, 1, valids.size + 1) - val n = valids.size - val a = valids.zip(readys).zipWithIndex.map { case ((v, r), i) => - val w = RegInit(0.U(64.W)) - when (v && !r && outRdy) { w := w + 1.U } - when (v && r) { w := 0.U } - 0.until(n*2).map(x => ((x.U === w && v && r).asUInt, x)) :+ (((w >= (n*2).U && v && r).asUInt, n*2)) - }.reduce { (left, right) => - left.zip(right).map { case (a, b) => (a._1 + b._1, a._2) } - }.foreach { case (cnt, time) => - XSPerfAccumulate(s"${name}ArbWaitTime_${time}", cnt) - } - } - - def apply[T <: Data](arb: LockingRRArbiterInit[T], name: String)(implicit p: Parameters): Unit = { - apply(arb.io.in.map(_.valid), arb.io.in.map(_.ready), arb.io.out.ready, name) - } - - def apply[T <: Data](arb: FastArbiter[T], name: String)(implicit p: Parameters): Unit = { - apply(arb.io.in.map(_.valid), arb.io.in.map(_.ready), arb.io.out.ready, name) - } - - def apply[T <: Data](arb: L2FastArbiterBase[T], name: String)(implicit p: Parameters): Unit = { - apply(arb.io.in.map(_.valid), arb.io.in.map(_.ready), arb.io.out.ready, name) - } -} - -class L2ArbiterIO[T <: Data](gen: T, n: Int) extends ArbiterIO[T](gen, n) { - val chosenOH = Output(UInt(n.W)) -} - -abstract class L2FastArbiterBase[T <: Data](val gen: T, val n: Int) extends Module { - val io = IO(new L2ArbiterIO[T](gen, n)) - - def maskToOH(seq: Seq[Bool]) = { - seq.zipWithIndex.map{ - case (b, 0) => b - case (b, i) => b && !Cat(seq.take(i)).orR - } - } -} - -class TwoLevelRRArbiter[T <: Data](gen: T, n: Int) extends L2FastArbiterBase[T](gen, n) { - if (n == 1) { - // for better codegen - io.out.valid := io.in.head.valid - io.out.bits := io.in.head.bits - io.in.head.ready := io.out.ready - io.chosen := 0.U - io.chosenOH := 1.U - } else if (n == 0) { - io.out.valid := false.B - io.out.bits := 0.U.asTypeOf(io.out.bits) - io.chosen := 0.U - io.chosenOH := 0.U - } else { - val mid = n / 2 - val rest = n - mid - val lowOut = Wire(Decoupled(gen)) - val highOut = Wire(Decoupled(gen)) - val selLow = RegInit(false.B) - val finalSelLow = Wire(Bool()) - - val chosenOHLow = Wire(UInt(mid.W)) - val chosenOHHigh = Wire(UInt(rest.W)) - val validsLow = VecInit(io.in.take(mid).map(_.valid)).asUInt - val validsHigh = VecInit(io.in.drop(mid).map(_.valid)).asUInt - - val pendingMaskLow = RegEnable( - validsLow & (~chosenOHLow).asUInt, // make IDEA happy ... - 0.U(mid.W), - lowOut.fire - ) - val pendingMaskHigh = RegEnable( - validsHigh & (~chosenOHHigh).asUInt, // make IDEA happy ... - 0.U(rest.W), - highOut.fire - ) - - val rrGrantMaskLow = RegEnable(VecInit((0 until mid) map { i => - if(i == 0) false.B else chosenOHLow(i - 1, 0).orR - }).asUInt, 0.U(mid.W), lowOut.fire) - val rrGrantMaskHigh = RegEnable(VecInit((0 until rest) map { i => - if(i == 0) false.B else chosenOHHigh(i - 1, 0).orR - }).asUInt, 0.U(rest.W), highOut.fire) - - val rrSelOHLow = VecInit(maskToOH((rrGrantMaskLow & pendingMaskLow).asBools)).asUInt - val rrSelOHHigh = VecInit(maskToOH((rrGrantMaskHigh & pendingMaskHigh).asBools)).asUInt - val firstOneOHLow = VecInit(maskToOH(validsLow.asBools)).asUInt - val firstOneOHHigh = VecInit(maskToOH(validsHigh.asBools)).asUInt - val rrValidLow = (rrSelOHLow & validsLow).orR - val rrValidHigh = (rrSelOHHigh & validsHigh).orR - chosenOHLow := Mux(rrValidLow, rrSelOHLow, firstOneOHLow) - chosenOHHigh := Mux(rrValidHigh, rrSelOHHigh, firstOneOHHigh) - - lowOut.valid := validsLow.orR - lowOut.bits := Mux1H(chosenOHLow, io.in.take(mid).map(_.bits)) - highOut.valid := validsHigh.orR - highOut.bits := Mux1H(chosenOHHigh, io.in.drop(mid).map(_.bits)) - - io.in.take(mid).map(_.ready).zip(chosenOHLow.asBools).foreach{ - case (rdy, grant) => rdy := grant && lowOut.ready - } - io.in.drop(mid).map(_.ready).zip(chosenOHHigh.asBools).foreach{ - case (rdy, grant) => rdy := grant && highOut.ready - } - - io.out.valid := lowOut.valid || highOut.valid - finalSelLow := Mux1H(Seq( - (lowOut.valid && highOut.valid) -> selLow, - (lowOut.valid && !highOut.valid) -> true.B, - (!lowOut.valid && highOut.valid) -> false.B - )) - io.out.bits := Mux(finalSelLow, lowOut.bits, highOut.bits) - lowOut.ready := io.out.ready && finalSelLow - highOut.ready := io.out.ready && !finalSelLow - - io.chosenOH := Cat(Seq.fill(rest)(!finalSelLow).asUInt & chosenOHHigh, Seq.fill(mid)(finalSelLow).asUInt & chosenOHLow) - io.chosen := OHToUInt(io.chosenOH) - - when (io.out.fire) { - selLow := Mux(finalSelLow, !highOut.valid, lowOut.valid) - } - } -} \ No newline at end of file diff --git a/src/main/scala/coupledL2/utils/OHOperation.scala b/src/main/scala/coupledL2/utils/OHOperation.scala deleted file mode 100644 index 465d52c3d..000000000 --- a/src/main/scala/coupledL2/utils/OHOperation.scala +++ /dev/null @@ -1,21 +0,0 @@ -package coupledL2.utils - -import chisel3._ -import chisel3.util._ -import utility.ParallelOR -import freechips.rocketchip.util.SeqToAugmentedSeq - -object MaskToOH { - def apply(mask: Seq[Bool]): UInt = { - val oh = mask.zipWithIndex.map{ - case (b, 0) => b - case (b, i) => b && !Cat(mask.take(i)).orR - }.asUInt - assert(oh === PriorityEncoderOH(mask.asUInt), "MaskToOH should give the same result as PriorityEncoderOH") - oh - } - - def apply(mask: UInt): UInt = { - apply(mask.asBools) - } -} \ No newline at end of file diff --git a/src/main/scala/coupledL2/utils/Replacer.scala b/src/main/scala/coupledL2/utils/Replacer.scala index a17102aae..598c658df 100644 --- a/src/main/scala/coupledL2/utils/Replacer.scala +++ b/src/main/scala/coupledL2/utils/Replacer.scala @@ -30,6 +30,7 @@ package coupledL2.utils import chisel3._ import chisel3.util._ import chisel3.util.random.LFSR +import utility.MaskToOH import freechips.rocketchip.util.{Random, UIntToAugmentedUInt} import freechips.rocketchip.util.property.cover diff --git a/utility b/utility index 84a74fc30..822413bf6 160000 --- a/utility +++ b/utility @@ -1 +1 @@ -Subproject commit 84a74fc3021b7769a0b99bb74213a5c9bc3d48c2 +Subproject commit 822413bf6df085a468212366f0864026ede8cf28