diff --git a/src/main/scala/coupledL2/Common.scala b/src/main/scala/coupledL2/Common.scala index 5fdb5fe63..d86e61ff0 100644 --- a/src/main/scala/coupledL2/Common.scala +++ b/src/main/scala/coupledL2/Common.scala @@ -262,6 +262,8 @@ class MSHRInfo(implicit p: Parameters) extends L2Bundle with HasTLChannelBits { val mergeA = Bool() // whether the mshr already merge an acquire(avoid alias merge) val w_grantfirst = Bool() + val w_grantlast = Bool() + val w_grant = Bool() val s_release = Bool() val s_refill = Bool() val s_cmoresp = Bool() @@ -399,8 +401,8 @@ class PrefetchRecv extends Bundle { } // custom l2 - l1 interface -class L2ToL1Hint(implicit p: Parameters) extends Bundle { - val sourceId = UInt(32.W) // tilelink sourceID +class L2ToL1Hint(implicit p: Parameters) extends L2Bundle { + val sourceId = UInt(sourceIdBits.W) // tilelink sourceID val isKeyword = Bool() // miss entry keyword } diff --git a/src/main/scala/coupledL2/CoupledL2.scala b/src/main/scala/coupledL2/CoupledL2.scala index f6e3e8ac9..f35e6da22 100644 --- a/src/main/scala/coupledL2/CoupledL2.scala +++ b/src/main/scala/coupledL2/CoupledL2.scala @@ -34,6 +34,7 @@ import coupledL2.prefetch._ import huancun.{BankBitsKey, TPmetaReq, TPmetaResp} import utility.mbist.{MbistInterface, MbistPipeline} import utility.sram.{SramBroadcastBundle, SramHelper} +import coupledL2.utils._ trait HasCoupledL2Parameters { val p: Parameters @@ -211,18 +212,28 @@ trait HasCoupledL2Parameters { x(x.getWidth - 1, pageOffsetBits) } - def arb[T <: Bundle](in: Seq[DecoupledIO[T]], out: DecoupledIO[T], name: Option[String] = None): Unit = { + def arb[T <: Bundle](in: Seq[DecoupledIO[T]], out: DecoupledIO[T], name: Option[String] = None) = { val arb = Module(new Arbiter[T](chiselTypeOf(out.bits), in.size)) if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } for ((a, req) <- arb.io.in.zip(in)) { a <> req } out <> arb.io.out + arb } - def fastArb[T <: Bundle](in: Seq[DecoupledIO[T]], out: DecoupledIO[T], name: Option[String] = None): Unit = { + def fastArb[T <: Bundle](in: Seq[DecoupledIO[T]], out: DecoupledIO[T], name: Option[String] = None) = { val arb = Module(new FastArbiter[T](chiselTypeOf(out.bits), in.size)) if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } for ((a, req) <- arb.io.in.zip(in)) { a <> req } out <> arb.io.out + arb + } + + def twoLevelArb[T <: Bundle](in: Seq[DecoupledIO[T]], out: DecoupledIO[T], name: Option[String] = None) = { + val arb = Module(new TwoLevelRRArbiter(chiselTypeOf(out.bits), in.size)) + if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } + for ((a, req) <- arb.io.in.zip(in)) { a <> req } + out <> arb.io.out + arb } def odOpGen(r: UInt) = { @@ -327,7 +338,7 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has val hartId = Input(UInt(hartIdLen.W)) val pfCtrlFromCore = Input(new PrefetchCtrlFromCore) // val l2_hint = Valid(UInt(32.W)) - val l2_hint = ValidIO(new L2ToL1Hint()) + val l2_hint = ValidIO(new L2ToL1Hint()(l2ECCParams)) val l2_tlb_req = new L2ToL1TlbIO(nRespDups = 1)(l2TlbParams) val debugTopDown = new Bundle { val robTrueCommit = Input(UInt(64.W)) @@ -526,7 +537,7 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has if (enableHintGuidedGrant) { // for timing consideration, hint should latch one cycle before sending to L1 // instead of adding a Pipeline/Queue to latch here, we just set hintQueue in GrantBuf & CustomL1Hint "flow=false" - val l1HintArb = Module(new Arbiter(new L2ToL1Hint(), slices.size)) + val l1HintArb = Module(new Arbiter(new L2ToL1Hint()(l2ECCParams), slices.size)) val slices_l1Hint = slices.zipWithIndex.map { case (s, i) => s.io.l1Hint } diff --git a/src/main/scala/coupledL2/CustomL1Hint.scala b/src/main/scala/coupledL2/CustomL1Hint.scala index 3cd350f31..f9e282977 100644 --- a/src/main/scala/coupledL2/CustomL1Hint.scala +++ b/src/main/scala/coupledL2/CustomL1Hint.scala @@ -26,13 +26,14 @@ import coupledL2.utils._ class HintQueueEntry(implicit p: Parameters) extends L2Bundle { val source = UInt(sourceIdBits.W) - val opcode = UInt(3.W) + val isGrantData = Bool() val isKeyword = Bool() } class CustomL1HintIOBundle(implicit p: Parameters) extends L2Bundle { // input information - val s1 = Flipped(ValidIO(new TaskBundle())) + val mshrHintQInfo = Flipped(ValidIO(new TaskBundle())) + val sinkCHintQInfo = Flipped(ValidIO(new TaskBundle())) val s3 = new L2Bundle { val task = Flipped(ValidIO(new TaskBundle())) val need_mshr = Input(Bool()) @@ -47,76 +48,69 @@ class CustomL1HintIOBundle(implicit p: Parameters) extends L2Bundle { class CustomL1Hint(implicit p: Parameters) extends L2Module { val io = IO(new CustomL1HintIOBundle) - val task_s1 = io.s1 + val mshr_s1 = io.mshrHintQInfo.bits + val mshrMerge_s1 = mshr_s1.aMergeTask + val sinkC_s1 = io.sinkCHintQInfo.bits val task_s3 = io.s3.task - val mshrReq_s1 = task_s1.bits.mshrTask val mshrReq_s3 = task_s3.bits.mshrTask - val mergeA_s1 = task_s1.bits.mergeA val need_mshr_s3 = io.s3.need_mshr - def isGrantData(t: TaskBundle): Bool = t.fromA && t.opcode === GrantData - def isGrant(t: TaskBundle): Bool = t.fromA && t.opcode === Grant - def isHintAck(t: TaskBundle): Bool = t.fromA && t.opcode === HintAck // HintAck has no effect on Hint - def isRelease(t: TaskBundle): Bool = t.fromC && (t.opcode === Release || t.opcode === ReleaseData) - def isMergeGrantData(t: TaskBundle): Bool = t.fromA && t.mergeA && t.aMergeTask.opcode === GrantData - def isMergeGrant(t: TaskBundle): Bool = t.fromA && t.mergeA && t.aMergeTask.opcode === Grant - // ==================== Hint Generation ==================== // Hint for "MSHRTask and ReleaseAck" will fire@s1 - val mshr_GrantData_s1 = task_s1.valid && mshrReq_s1 && (isGrantData(task_s1.bits) || isMergeGrantData(task_s1.bits)) - val mshr_Grant_s1 = task_s1.valid && mshrReq_s1 && (isGrant(task_s1.bits) || isMergeGrant(task_s1.bits)) - val chn_Release_s1 = task_s1.valid && !mshrReq_s1 && isRelease(task_s1.bits) + // val mshr_GrantData_s1 = task_s1.valid && mshrReq_s1 && (isGrantData(task_s1.bits) || isMergeGrantData(task_s1.bits)) + // val mshr_Grant_s1 = task_s1.valid && mshrReq_s1 && (isGrant(task_s1.bits) || isMergeGrant(task_s1.bits)) + // val chn_Release_s1 = task_s1.valid && !mshrReq_s1 && isRelease(task_s1.bits) + val mshr_GrantData_s1 = io.mshrHintQInfo.valid && (mshr_s1.fromA && (mshr_s1.opcode === GrantData || (mshr_s1.mergeA && mshrMerge_s1.opcode === GrantData))) + val mshr_Grant_s1 = io.mshrHintQInfo.valid && (mshr_s1.fromA && (mshr_s1.opcode === Grant || (mshr_s1.mergeA && mshrMerge_s1.opcode === Grant))) + val chn_Release_s1 = io.sinkCHintQInfo.valid + assert(Mux(chn_Release_s1, sinkC_s1.fromC, true.B)) + assert(Mux(chn_Release_s1, sinkC_s1.opcode === Release || sinkC_s1.opcode === ReleaseData, true.B)) - val enqValid_s1 = mshr_GrantData_s1 || mshr_Grant_s1 || chn_Release_s1 - val enqSource_s1 = Mux(task_s1.bits.mergeA, task_s1.bits.aMergeTask.sourceId, task_s1.bits.sourceId) - val enqKeyWord_s1 = Mux(task_s1.bits.mergeA, - task_s1.bits.aMergeTask.isKeyword.getOrElse(false.B), - task_s1.bits.isKeyword.getOrElse(false.B) - ) - val enqOpcode_s1 = ParallelPriorityMux( - Seq( - mshr_Grant_s1 -> Grant, - mshr_GrantData_s1 -> GrantData, - chn_Release_s1 -> ReleaseAck - ) - ) + val enqBits_s1 = Wire(new HintQueueEntry) + // enqBits_s1.source := Mux(task_s1.bits.mergeA, task_s1.bits.aMergeTask.sourceId, task_s1.bits.sourceId) + enqBits_s1.source := Mux1H(Seq( + (io.mshrHintQInfo.valid && mshr_s1.mergeA) -> mshrMerge_s1.sourceId, + (io.mshrHintQInfo.valid && !mshr_s1.mergeA) -> mshr_s1.sourceId, + io.sinkCHintQInfo.valid -> sinkC_s1.sourceId + )) + OneHot.checkOneHot(Cat(io.mshrHintQInfo.valid && mshr_s1.mergeA, io.mshrHintQInfo.valid && !mshr_s1.mergeA, io.sinkCHintQInfo.valid)) + enqBits_s1.isKeyword := Mux(mshr_s1.mergeA, mshrMerge_s1.isKeyword.getOrElse(false.B), mshr_s1.isKeyword.getOrElse(false.B)) + enqBits_s1.isGrantData := mshr_GrantData_s1 // Hint for "chnTask Hit" will fire@s3 - val chn_Grant_s3 = task_s3.valid && !mshrReq_s3 && !need_mshr_s3 && isGrant(task_s3.bits) - val chn_GrantData_s3 = task_s3.valid && !mshrReq_s3 && !need_mshr_s3 && isGrantData(task_s3.bits) + val chn_Grant_s3 = task_s3.valid && !mshrReq_s3 && !need_mshr_s3 && task_s3.bits.fromA && task_s3.bits.opcode === Grant + val chn_GrantData_s3 = task_s3.valid && !mshrReq_s3 && !need_mshr_s3 && task_s3.bits.fromA && task_s3.bits.opcode === GrantData + val enqBits_s3 = Wire(new HintQueueEntry) val enqValid_s3 = chn_Grant_s3 || chn_GrantData_s3 - val enqSource_s3 = task_s3.bits.sourceId - val enqKeyWord_s3 = task_s3.bits.isKeyword.getOrElse(false.B) - val enqOpcode_s3 = ParallelPriorityMux( - Seq( - chn_Grant_s3 -> Grant, - chn_GrantData_s3 -> GrantData - ) - ) + enqBits_s3.source := task_s3.bits.sourceId + enqBits_s3.isKeyword := task_s3.bits.isKeyword.getOrElse(false.B) + enqBits_s3.isGrantData := chn_GrantData_s3 // ==================== Hint Queue ==================== val hintEntries = mshrsAll val hintEntriesWidth = log2Ceil(hintEntries) val hintQueue = Module(new Queue(new HintQueueEntry, hintEntries)) + val canFlow_s1 = !hintQueue.io.deq.valid || hintQueue.io.count === 1.U && hintQueue.io.deq.fire + val valid_s1 = mshr_GrantData_s1 || mshr_Grant_s1 || chn_Release_s1 + val flow_s1, enq_s3 = Wire(Decoupled(new HintQueueEntry)) + // noSpaceForSinkReq in GrantBuffer may ensure that these queues will not overflow + assert(enq_s3.ready || !enq_s3.valid) // this will have at most 2 entries - val hint_s1Queue = Module(new Queue(new HintQueueEntry, 4, flow = true)) - hint_s1Queue.io.enq.valid := enqValid_s1 - hint_s1Queue.io.enq.bits.opcode := enqOpcode_s1 - hint_s1Queue.io.enq.bits.source := enqSource_s1 - hint_s1Queue.io.enq.bits.isKeyword := enqKeyWord_s1 - hint_s1Queue.io.deq.ready := hintQueue.io.enq.ready && !enqValid_s3 - // WARNING:TODO: ensure queue will never overflow - assert(hint_s1Queue.io.enq.ready, "hint_s1Queue should never be full") - assert(hintQueue.io.enq.ready, "hintQueue should never be full") + val hint_s1Queue = Module(new Pipeline(new HintQueueEntry)) + hint_s1Queue.io.in.valid := valid_s1 && (!canFlow_s1 || !flow_s1.ready) + hint_s1Queue.io.in.bits := enqBits_s1 + assert(!valid_s1 || hint_s1Queue.io.in.ready || flow_s1.ready) + + flow_s1.valid := valid_s1 && canFlow_s1 + flow_s1.bits := enqBits_s1 - hintQueue.io.enq.valid := enqValid_s3 || hint_s1Queue.io.deq.valid - hintQueue.io.enq.bits.opcode := Mux(enqValid_s3, enqOpcode_s3, hint_s1Queue.io.deq.bits.opcode) - hintQueue.io.enq.bits.source := Mux(enqValid_s3, enqSource_s3, hint_s1Queue.io.deq.bits.source) - hintQueue.io.enq.bits.isKeyword := Mux(enqValid_s3, enqKeyWord_s3, hint_s1Queue.io.deq.bits.isKeyword) + enq_s3.valid := enqValid_s3 + enq_s3.bits := enqBits_s3 + arb(Seq(enq_s3, hint_s1Queue.io.out, flow_s1), hintQueue.io.enq, Some("Hint")) hintQueue.io.deq.ready := io.l1Hint.ready - io.l1Hint.valid := hintQueue.io.deq.valid && hintQueue.io.deq.bits.opcode === GrantData + io.l1Hint.valid := hintQueue.io.deq.valid && hintQueue.io.deq.bits.isGrantData io.l1Hint.bits.sourceId := hintQueue.io.deq.bits.source io.l1Hint.bits.isKeyword := hintQueue.io.deq.bits.isKeyword } \ No newline at end of file diff --git a/src/main/scala/coupledL2/DataStorage.scala b/src/main/scala/coupledL2/DataStorage.scala index de7015f8f..9fe793430 100644 --- a/src/main/scala/coupledL2/DataStorage.scala +++ b/src/main/scala/coupledL2/DataStorage.scala @@ -27,6 +27,7 @@ class DSRequest(implicit p: Parameters) extends L2Bundle { val way = UInt(wayBits.W) val set = UInt(setBits.W) val wen = Bool() + val ren = Bool() } // mask not used @@ -78,12 +79,12 @@ class DataStorage(implicit p: Parameters) extends L2Module { extraHold = true, withClockGate = true )) - array.io_en := io.en + array.io_en := io.en && (io.req.bits.ren || io.req.bits.wen) private val mbistPl = MbistPipeline.PlaceMbistPipeline(1, "L2DataStorage", p(L2ParamKey).hasMbist) val arrayIdx = Cat(io.req.bits.way, io.req.bits.set) val wen = io.req.valid && io.req.bits.wen - val ren = io.req.valid && !io.req.bits.wen + val ren = io.req.valid && !io.req.bits.wen && io.req.bits.ren val arrayWrite = Wire(new DSECCBankBlock) val arrayWriteData = if (enableDataECC) { @@ -121,12 +122,12 @@ class DataStorage(implicit p: Parameters) extends L2Module { io.rdata := dataRead io.error := error - assert(!io.en || !RegNext(io.en, false.B), + assert(!array.io_en || !RegNext(array.io_en, false.B), "Continuous SRAM req prohibited under MCP2!") - assert(!(RegNext(io.en) && (io.req.asUInt =/= RegNext(io.req.asUInt))), + assert(!(RegNext(array.io_en) && (io.req.asUInt =/= RegNext(io.req.asUInt))), s"DataStorage req fails to hold for 2 cycles!") - assert(!(RegNext(io.en && io.req.bits.wen) && (io.wdata.asUInt =/= RegNext(io.wdata.asUInt))), + assert(!(RegNext(array.io_en && io.req.bits.wen) && (io.wdata.asUInt =/= RegNext(io.wdata.asUInt))), s"DataStorage wdata fails to hold for 2 cycles!") } diff --git a/src/main/scala/coupledL2/Directory.scala b/src/main/scala/coupledL2/Directory.scala index e0762dcf2..b733ca497 100644 --- a/src/main/scala/coupledL2/Directory.scala +++ b/src/main/scala/coupledL2/Directory.scala @@ -21,11 +21,12 @@ import chisel3._ import chisel3.util._ import utility.mbist.MbistPipeline import coupledL2.utils._ -import utility.{ChiselDB, Code, MemReqSource, ParallelPriorityMux, RegNextN, XSPerfAccumulate} +import utility.{ChiselDB, Code, MemReqSource, ParallelPriorityMux, RegNextN, XSPerfAccumulate, MaskToOH} import utility.sram.SRAMTemplate import org.chipsalliance.cde.config.Parameters import coupledL2.prefetch.PfSource import freechips.rocketchip.tilelink.TLMessages._ +import freechips.rocketchip.util.SeqToAugmentedSeq class MetaEntry(implicit p: Parameters) extends L2Bundle { val dirty = Bool() @@ -98,6 +99,7 @@ class ReplacerResult(implicit p: Parameters) extends L2Bundle { val meta = new MetaEntry() val mshrId = UInt(mshrBits.W) val retry = Bool() + val validHold = Bool() } class MetaWrite(implicit p: Parameters) extends L2Bundle { @@ -108,7 +110,7 @@ class MetaWrite(implicit p: Parameters) extends L2Bundle { class TagWrite(implicit p: Parameters) extends L2Bundle { val set = UInt(setBits.W) - val way = UInt(wayBits.W) + val wayOH = UInt(cacheParams.ways.W) val wtag = UInt(tagBits.W) } @@ -134,13 +136,19 @@ class Directory(implicit p: Parameters) extends L2Module { val replResp = ValidIO(new ReplacerResult) // used to count occWays for Grant to retry val msInfo = Vec(mshrsAll, Flipped(ValidIO(new MSHRInfo))) + val metaOnHit = new MetaEntry() + val errOnSnp = Bool() + val wayOH = Output(UInt(cacheParams.ways.W)) + val replWayOH = Output(UInt(cacheParams.ways.W)) + val cmoHitInvalid = Output(Bool()) }) - def invalid_way_sel(metaVec: Seq[MetaEntry], repl: UInt) = { + def invalid_way_sel(metaVec: Seq[MetaEntry]) = { val invalid_vec = metaVec.map(_.state === MetaData.INVALID) val has_invalid_way = Cat(invalid_vec).orR - val way = ParallelPriorityMux(invalid_vec.zipWithIndex.map(x => x._1 -> x._2.U(wayBits.W))) - (has_invalid_way, way) + val invalid_oh = MaskToOH(invalid_vec.asUInt) + val invalid_way = OHToUInt(invalid_oh) + (has_invalid_way, invalid_way, invalid_oh) // one-hot of invalid ways } val sets = cacheParams.sets @@ -184,12 +192,7 @@ class Directory(implicit p: Parameters) extends L2Module { val metaArray = Module(new SRAMTemplate(new MetaEntry, sets, ways, singlePort = true, hasMbist = mbist, hasSramCtl = hasSramCtl)) - val tagRead_s3 = Wire(Vec(ways, UInt(tagBits.W))) val metaRead = Wire(Vec(ways, new MetaEntry())) - val errorRead = Wire(Vec(ways, Bool())) - - val resetFinish = RegInit(false.B) - val resetIdx = RegInit((sets - 1).U) // Replacer val repl = ReplacementPolicy.fromString(cacheParams.replacement, ways) @@ -207,9 +210,11 @@ class Directory(implicit p: Parameters) extends L2Module { val reqValid_s3 = RegNext(reqValid_s2, false.B) val req_s2 = RegEnable(io.read.bits, 0.U.asTypeOf(io.read.bits), io.read.fire) val req_s3 = RegEnable(req_s2, 0.U.asTypeOf(req_s2), reqValid_s2) + val cmoWayOH_s3 = RegEnable(UIntToOH(req_s2.cmoWay), reqValid_s2) val refillReqValid_s2 = RegNext(io.read.fire && io.read.bits.refill, false.B) val refillReqValid_s3 = RegNext(refillReqValid_s2, false.B) + val refillReqValid_hold_s3 = RegEnable(refillReqValid_s2, false.B, !RegNext(refillReqValid_s2)) // Tag(ECC) R/W val tagWrite = if (enableTagECC) { @@ -219,31 +224,16 @@ class Directory(implicit p: Parameters) extends L2Module { io.tagWReq.bits.wtag } val tagRead = tagArray.io.r(io.read.fire, io.read.bits.set).resp.data - val bankTagRead = if (enableTagECC) { - tagRead.map(x => - Cat(VecInit(Seq.tabulate(tagBankSplit)(i => x(encTagBankBits * (i + 1) - 1, encTagBankBits * i)(tagBankBits - 1, 0)))) - ) - } else { - tagRead + when (io.tagWReq.valid) { + assert(PopCount(io.tagWReq.bits.wayOH) === 1.U, "Tag write should be one-hot") } - tagRead_s3 := bankTagRead tagArray.io.w( tagWen, tagWrite, io.tagWReq.bits.set, - UIntToOH(io.tagWReq.bits.way) + io.tagWReq.bits.wayOH ) - val bankTagError = if (enableTagECC) { - tagRead.map(x => - VecInit(Seq.tabulate(tagBankSplit)(i => x(encTagBankBits * (i + 1) - 1, encTagBankBits * i))). - map(tag => cacheParams.dataCode.decode(tag).error).reduce(_ | _) - ) - } else { - VecInit(Seq.fill(ways)(false.B)) - } - errorRead := bankTagError - // Meta R/W metaRead := metaArray.io.r(io.read.fire, io.read.bits.set).resp.data metaArray.io.w( @@ -254,8 +244,22 @@ class Directory(implicit p: Parameters) extends L2Module { ) val metaAll_s3 = RegEnable(metaRead, 0.U.asTypeOf(metaRead), reqValid_s2) - val tagAll_s3 = RegEnable(tagRead_s3, 0.U.asTypeOf(tagRead_s3), reqValid_s2) - val errorAll_s3 = RegEnable(errorRead, 0.U.asTypeOf(errorRead), reqValid_s2) + val tagRead_s3 = RegEnable(tagRead, reqValid_s2) + val tagAll_s3 = if (enableTagECC) { + tagRead_s3.map(x => + Cat(VecInit(Seq.tabulate(tagBankSplit)(i => x(encTagBankBits * (i + 1) - 1, encTagBankBits * i)(tagBankBits - 1, 0)))) + ) + } else { + tagRead_s3 + } + val errorAll_s3 = if (enableTagECC) { + tagRead_s3.map(x => + VecInit(Seq.tabulate(tagBankSplit)(i => x(encTagBankBits * (i + 1) - 1, encTagBankBits * i))). + map(tag => cacheParams.dataCode.decode(tag).error).reduce(_ | _) + ) + } else { + VecInit(Seq.fill(ways)(false.B)) + } val tagMatchVec = tagAll_s3.map(_ (tagBits - 1, 0) === req_s3.tag) val metaValidVec = metaAll_s3.map(_.state =/= MetaData.INVALID) @@ -275,34 +279,37 @@ class Directory(implicit p: Parameters) extends L2Module { )).reduceTree(_ | _) val freeWayMask_s3 = RegEnable(~occWayMask_s2, refillReqValid_s2) - val refillRetry = !(freeWayMask_s3.orR) + val refillRetry = RegEnable(occWayMask_s2.andR, refillReqValid_s2) - val hitWay = OHToUInt(hitVec) + // val hitWay = OHToUInt(hitVec) + val hitOH = hitVec.asUInt + assert(PopCount(hitVec) <= 1.U, "Set should not have more than one hit") val replaceWay = WireInit(UInt(wayBits.W), 0.U) - val (inv, invalidWay) = invalid_way_sel(metaAll_s3, replaceWay) - val chosenWay = Mux(inv, invalidWay, replaceWay) - // if chosenWay not in wayMask, then choose a way in wayMask - // for retry bug fixing: if the chosenway cause retry last time, choose another way - /*val finalWay = Mux( - req_s3.wayMask(chosenWay), - chosenWay, - PriorityEncoder(req_s3.wayMask) - )*/ + val replaceOH = WireInit(UInt(ways.W), 0.U) + val (inv, invalidWay, invOH) = invalid_way_sel(metaAll_s3) + val chosenOH = Mux(inv, invOH, replaceOH) // for retry bug fixing: if the chosenway not in freewaymask, choose another way - // TODO: req_s3.wayMask not take into consideration - val finalWay = Mux( - freeWayMask_s3(chosenWay), - chosenWay, - PriorityEncoder(freeWayMask_s3) + val finalReplOH = Mux( + Mux1H(chosenOH, freeWayMask_s3), + chosenOH, + MaskToOH(freeWayMask_s3) ) val hit_s3 = Cat(hitVec).orR || req_s3.cmoAll - val way_s3 = Mux(req_s3.cmoAll, req_s3.cmoWay, Mux(hit_s3, hitWay, finalWay)) - val meta_s3 = metaAll_s3(way_s3) - val tag_s3 = tagAll_s3(way_s3) + val wayOH_s3 = Mux(req_s3.cmoAll, cmoWayOH_s3, Mux(hit_s3, hitOH, finalReplOH)) + val way_s3 = OHToUInt(wayOH_s3) + val meta_s3 = Mux1H(wayOH_s3, metaAll_s3) + val metaOnHit_s3 = Mux1H(hitOH, metaAll_s3) // only valid when hit + val tag_s3 = Mux1H(wayOH_s3, tagAll_s3) val set_s3 = req_s3.set val replacerInfo_s3 = req_s3.replacerInfo + val errorOnSNP_s3 = if (enableTagECC) { + Mux1H(hitOH, errorAll_s3) + } else { + false.B + } + val error_s3 = if (enableTagECC) { - errorAll_s3(way_s3) && reqValid_s3 && !req_s3.cmoAll && meta_s3.state =/= MetaData.INVALID + Mux1H(wayOH_s3, errorAll_s3) && reqValid_s3 && !req_s3.cmoAll && meta_s3.state =/= MetaData.INVALID } else { false.B } @@ -311,10 +318,13 @@ class Directory(implicit p: Parameters) extends L2Module { io.resp.bits.hit := hit_s3 io.resp.bits.way := way_s3 io.resp.bits.meta := meta_s3 + io.metaOnHit := metaOnHit_s3 io.resp.bits.tag := tag_s3 io.resp.bits.set := set_s3 io.resp.bits.error := error_s3 // depends on ECC + io.errOnSnp := errorOnSNP_s3 io.resp.bits.replacerInfo := replacerInfo_s3 + io.wayOH := wayOH_s3 dontTouch(io) dontTouch(metaArray.io) @@ -335,15 +345,19 @@ class Directory(implicit p: Parameters) extends L2Module { repl_state } - replaceWay := repl.get_replace_way(repl_state_s3) + replaceOH := repl.get_replace_way(repl_state_s3) + assert(PopCount(replaceOH) === 1.U, "Replacement way should be one-hot") + replaceWay := OHToUInt(replaceOH) io.replResp.valid := refillReqValid_s3 - io.replResp.bits.tag := tagAll_s3(finalWay) + io.replResp.bits.tag := Mux1H(finalReplOH, tagAll_s3) io.replResp.bits.set := req_s3.set - io.replResp.bits.way := finalWay - io.replResp.bits.meta := metaAll_s3(finalWay) + io.replResp.bits.way := OHToUInt(finalReplOH) + io.replResp.bits.meta := Mux1H(finalReplOH, metaAll_s3) io.replResp.bits.mshrId := req_s3.mshrId io.replResp.bits.retry := refillRetry + io.replResp.bits.validHold := refillReqValid_hold_s3 + io.replWayOH := finalReplOH /* ====== Update ====== */ // PLRU: update replacer only when A hit or refill, at stage 3 @@ -366,34 +380,26 @@ class Directory(implicit p: Parameters) extends L2Module { Some(Module(new SRAMTemplate(Bool(), sets, ways, singlePort = true, shouldReset = true, hasMbist = mbist, hasSramCtl = hasSramCtl))) val origin_bits_r = origin_bit_opt.get.io.r(io.read.fire, io.read.bits.set).resp.data val origin_bits_hold = Wire(Vec(ways, Bool())) - origin_bits_hold := HoldUnless(origin_bits_r, RegNext(io.read.fire, false.B)) - origin_bit_opt.get.io.w( - !resetFinish || replacerWen, - Mux(resetFinish, hit_s3, false.B), - Mux(resetFinish, req_s3.set, resetIdx), - UIntToOH(way_s3) - ) + origin_bits_hold := RegEnable(origin_bits_r, reqValid_s2) + origin_bit_opt.get.io.w(replacerWen, hit_s3, set_s3, wayOH_s3) val rrip_req_type = WireInit(0.U(4.W)) // [3]: 0-firstuse, 1-reuse; // [2]: 0-acquire, 1-release; // [1]: 0-non-prefetch, 1-prefetch; // [0]: 0-not-refill, 1-refill - rrip_req_type := Cat(origin_bits_hold(way_s3), + rrip_req_type := Cat(Mux1H(hitOH, origin_bits_hold), req_s3.replacerInfo.channel(2), - (!refillReqValid_s3 && req_s3.replacerInfo.channel(0) && req_s3.replacerInfo.opcode === Hint) || (req_s3.replacerInfo.channel(2) && metaAll_s3(way_s3).prefetch.getOrElse(false.B)) || (refillReqValid_s3 && req_s3.replacerInfo.refill_prefetch), + (!refillReqValid_s3 && req_s3.replacerInfo.channel(0) && req_s3.replacerInfo.opcode === Hint) || + (req_s3.replacerInfo.channel(2) && Mux1H(wayOH_s3, metaAll_s3).prefetch.getOrElse(false.B)) || + (refillReqValid_s3 && req_s3.replacerInfo.refill_prefetch), req_s3.refill ) private val mbistPl = MbistPipeline.PlaceMbistPipeline(1, "L2Directory", mbist) if(cacheParams.replacement == "srrip"){ - val next_state_s3 = repl.get_next_state(repl_state_s3, way_s3, hit_s3, inv, rrip_req_type) + val next_state_s3 = repl.get_next_state(repl_state_s3, wayOH_s3, hit_s3, inv, rrip_req_type) val repl_init = Wire(Vec(ways, UInt(2.W))) repl_init.foreach(_ := 2.U(2.W)) - replacer_sram_opt.get.io.w( - !resetFinish || replacerWen, - Mux(resetFinish, next_state_s3, repl_init.asUInt), - Mux(resetFinish, set_s3, resetIdx), - 1.U - ) + replacer_sram_opt.get.io.w(replacerWen, next_state_s3, set_s3, 1.U) } else if(cacheParams.replacement == "drrip"){ // Set Dueling @@ -418,36 +424,22 @@ class Directory(implicit p: Parameters) extends L2Module { Mux(match_b, true.B, Mux(PSEL(9)===0.U, false.B, true.B))) // false.B - srrip, true.B - brrip - val next_state_s3 = repl.get_next_state(repl_state_s3, way_s3, hit_s3, inv, repl_type, rrip_req_type) + val next_state_s3 = repl.get_next_state(repl_state_s3, wayOH_s3, hit_s3, inv, repl_type, rrip_req_type) val repl_init = Wire(Vec(ways, UInt(2.W))) repl_init.foreach(_ := 2.U(2.W)) - replacer_sram_opt.get.io.w( - !resetFinish || replacerWen, - Mux(resetFinish, next_state_s3, repl_init.asUInt), - Mux(resetFinish, set_s3, resetIdx), - 1.U - ) + replacer_sram_opt.get.io.w(replacerWen, next_state_s3, set_s3, 1.U) } else { val next_state_s3 = repl.get_next_state(repl_state_s3, way_s3) - replacer_sram_opt.get.io.w( - !resetFinish || replacerWen, - Mux(resetFinish, next_state_s3, 0.U), - Mux(resetFinish, set_s3, resetIdx), - 1.U - ) + replacer_sram_opt.get.io.w(replacerWen, next_state_s3, set_s3, 1.U) } + io.cmoHitInvalid := Mux1H(cmoWayOH_s3, metaAll_s3).state === MetaData.INVALID + /* ====== Reset ====== */ - when(resetIdx === 0.U) { - resetFinish := true.B - } - when(!resetFinish) { - resetIdx := resetIdx - 1.U - } XSPerfAccumulate("dirRead_cnt", io.read.fire) - XSPerfAccumulate("choose_busy_way", reqValid_s3 && !req_s3.wayMask(chosenWay)) + XSPerfAccumulate("choose_busy_way", reqValid_s3 && !Mux1H(chosenOH, req_s3.wayMask)) /* ====== ChiselDB logging for prefetcher lifecycle ====== */ if (cacheParams.enableMonitor && !cacheParams.FPGAPlatform) { @@ -462,7 +454,7 @@ class Directory(implicit p: Parameters) extends L2Module { val pfReqWriteEn = io.metaWReq.valid && wmeta.prefetch.getOrElse(false.B) val pfReqWrite = Wire(new PrefetchDbEntry) val writeHasTag = io.tagWReq.valid && (io.tagWReq.bits.set === io.metaWReq.bits.set) && - (OHToUInt(io.metaWReq.bits.wayOH) === io.tagWReq.bits.way) // try to attach tag when tagWReq coincides with metaWReq + (io.metaWReq.bits.wayOH === io.tagWReq.bits.wayOH) // try to attach tag when tagWReq coincides with metaWReq pfReqWrite.isHit := false.B //useless for write req, just set it to false.B pfReqWrite.setIdx := io.metaWReq.bits.set // when meta write, the set idx to be written @@ -487,7 +479,7 @@ class Directory(implicit p: Parameters) extends L2Module { // Eviction: when Directory issues a replacement for a prefetched block val evictBlockEn = io.replResp.valid && !io.replResp.bits.retry - val evictBlockMeta = metaAll_s3(finalWay) // meta of the block to be evicted + val evictBlockMeta = Mux1H(finalReplOH, metaAll_s3) // meta of the block to be evicted val pfReqEvictEn = evictBlockEn && evictBlockMeta.prefetch.getOrElse(false.B) val pfReqEvict = Wire(new PrefetchDbEntry) diff --git a/src/main/scala/coupledL2/RequestArb.scala b/src/main/scala/coupledL2/RequestArb.scala index 6e7c05628..fa166d8ba 100644 --- a/src/main/scala/coupledL2/RequestArb.scala +++ b/src/main/scala/coupledL2/RequestArb.scala @@ -48,7 +48,8 @@ class RequestArb(implicit p: Parameters) extends L2Module /* send task to mainpipe */ val taskToPipe_s2 = ValidIO(new TaskBundle()) /* send s1 task info to mainpipe to help hint */ - val taskInfo_s1 = ValidIO(new TaskBundle()) + val mshrHintQInfo = ValidIO(new TaskBundle) + val sinkCHintQInfo = ValidIO(new TaskBundle) /* send mshrBuf read request */ val refillBufRead_s2 = ValidIO(new MSHRBufRead) @@ -168,8 +169,10 @@ class RequestArb(implicit p: Parameters) extends L2Module s1_cango := task_s1.valid && !mshr_replRead_stall s1_fire := s1_cango && s2_ready - io.taskInfo_s1.valid := s1_fire - io.taskInfo_s1.bits := task_s1.bits + io.mshrHintQInfo.valid := mshr_task_s1.valid && !mshr_replRead_stall && s2_ready + io.mshrHintQInfo.bits := mshr_task_s1.bits + io.sinkCHintQInfo.valid := io.sinkC.fire + io.sinkCHintQInfo.bits := io.sinkC.bits /* Meta read request */ // ^ only sinkA/B/C tasks need to read directory @@ -185,8 +188,8 @@ class RequestArb(implicit p: Parameters) extends L2Module io.dirRead_s1.bits.replacerInfo.refill_prefetch := s1_needs_replRead && (mshr_task_s1.bits.opcode === HintAck && mshr_task_s1.bits.dsWen) io.dirRead_s1.bits.refill := s1_needs_replRead io.dirRead_s1.bits.mshrId := task_s1.bits.mshrId - io.dirRead_s1.bits.cmoAll := task_s1.bits.cmoAll - io.dirRead_s1.bits.cmoWay := task_s1.bits.way + io.dirRead_s1.bits.cmoAll := A_task.cmoAll + io.dirRead_s1.bits.cmoWay := A_task.way // block same-set A req io.s1Entrance.valid := mshr_task_s1.valid && s2_ready && mshr_task_s1.bits.metaWen || io.sinkC.fire || io.sinkB.fire diff --git a/src/main/scala/coupledL2/RequestBuffer.scala b/src/main/scala/coupledL2/RequestBuffer.scala index 2e5eb941c..be3180d5f 100644 --- a/src/main/scala/coupledL2/RequestBuffer.scala +++ b/src/main/scala/coupledL2/RequestBuffer.scala @@ -58,13 +58,13 @@ class ReqEntry(entries: Int = 4)(implicit p: Parameters) extends L2Bundle() { } -class ChosenQBundle(idWIdth: Int = 2)(implicit p: Parameters) extends L2Bundle { +class ChosenQBundle(idOHWIdth: Int = 2)(implicit p: Parameters) extends L2Bundle { val bits = new ReqEntry() - val id = UInt(idWIdth.W) + val idOH = UInt(idOHWIdth.W) } class AMergeTask(implicit p: Parameters) extends L2Bundle { - val id = UInt(mshrBits.W) + val idOH = UInt(mshrsAll.W) val task = new TaskBundle() } @@ -98,8 +98,9 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete io.ASet := io.in.bits.set val buffer = RegInit(VecInit(Seq.fill(entries)(0.U.asTypeOf(new ReqEntry)))) - val issueArb = Module(new FastArbiter(new ReqEntry, entries)) - val chosenQ = Module(new Queue(new ChosenQBundle(log2Ceil(entries)), entries = 1, pipe = true, flow = false)) + val issueArb = Module(new TwoLevelRRArbiter(new ReqEntry, entries)) + ArbPerf(issueArb, "issueArb") + val chosenQ = Module(new Queue(new ChosenQBundle(entries), entries = 1, pipe = true, flow = false)) val chosenQValid = chosenQ.io.deq.valid /* ======== Enchantment ======== */ @@ -115,6 +116,7 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete def conflictMask(a: TaskBundle): UInt = VecInit(io.mshrInfo.map(s => s.valid && addrConflict(a, s.bits) && !s.bits.willFree)).asUInt def conflict(a: TaskBundle): Bool = conflictMask(a).orR + def parallelConflict(a: TaskBundle): Bool = ParallelOR(conflictMask(a).asBools) def conflictMaskFromA(a: TaskBundle): UInt = conflictMask(a) & VecInit(io.mshrInfo.map(_.bits.fromA)).asUInt @@ -125,7 +127,8 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete a.fromA && (a.opcode === AcquireBlock || a.opcode === AcquirePerm) )) val matched = matchVec.asUInt.orR - val matchSrc = ParallelPriorityMux(matchVec, io.mshrInfo.map(_.bits.meta.prefetchSrc.getOrElse(PfSource.NoWhere.id.U))) + assert(PopCount(matchVec) <= 1.U, "Multiple late prefetch MSHRs matched") + val matchSrc = Mux1H(matchVec, io.mshrInfo.map(_.bits.meta.prefetchSrc.getOrElse(PfSource.NoWhere.id.U))) (matched, matchSrc) } @@ -144,21 +147,21 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete val in = io.in.bits val full = Cat(buffer.map(_.valid)).andR - // - val mshrConflictMask = conflictMask(in) - val mshrConflictMaskFromA = conflictMaskFromA(in) - dontTouch(mshrConflictMask) - dontTouch(mshrConflictMaskFromA) + + // val mshrConflictMask = conflictMask(in) + // val mshrConflictMaskFromA = conflictMaskFromA(in) + // dontTouch(mshrConflictMask) + // dontTouch(mshrConflictMaskFromA) // incoming Acquire can be merged with late_pf MSHR block - val mergeAMask = VecInit(io.mshrInfo.map(s => - s.valid && s.bits.isPrefetch && sameAddr(in, s.bits) && !s.bits.willFree && !s.bits.dirHit && !s.bits.s_refill && + val mergeAMask = VecInit(io.mshrInfo.map { case s => + val mshrInflight = !(s.bits.w_grantlast && s.bits.w_grant) + s.valid && s.bits.isPrefetch && sameAddr(in, s.bits) && !s.bits.dirHit && mshrInflight && in.fromA && (in.opcode === AcquireBlock || in.opcode === AcquirePerm) && !s.bits.mergeA && !(in.param === NtoT && s.bits.param === NtoB) - )).asUInt + }).asUInt val mergeA = mergeAMask.orR - val mergeAId = OHToUInt(mergeAMask) io.aMergeTask.valid := io.in.valid && mergeA - io.aMergeTask.bits.id := mergeAId + io.aMergeTask.bits.idOH := mergeAMask io.aMergeTask.bits.task := in /* @@ -177,7 +180,7 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete def noFreeWay(task: TaskBundle): Bool = noFreeWayForSet(task.set) // flow not allowed when full, or entries might starve - val canFlow = flow.B && !full && !conflict(in) && !chosenQValid && !Cat(io.mainPipeBlock).orR && !noFreeWay(in) + val canFlow = flow.B && !full && !parallelConflict(in) && !chosenQValid && !Cat(io.mainPipeBlock).orR && !noFreeWay(in) val doFlow = canFlow && io.out.ready // val depMask = buffer.map(e => e.valid && sameAddr(io.in.bits, e.task)) @@ -205,37 +208,33 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete /* ======== Alloc ======== */ io.in.ready := !full || doFlow || mergeA || dup - val insertIdx = PriorityEncoder(buffer.map(!_.valid)) + val insertOH = MaskToOH(buffer.map(!_.valid)) val alloc = !full && io.in.valid && !doFlow && !dup && !mergeA - when(alloc){ - val entry = buffer(insertIdx) - val mpBlock = Cat(io.mainPipeBlock).orR - val pipeBlockOut = io.out.fire && sameSet(in, io.out.bits) - val probeBlock = io.s1Entrance.valid && io.s1Entrance.bits.set === in.set // wait for same-addr req to enter MSHR - val s1Block = pipeBlockOut || probeBlock - - entry.valid := true.B - // when Addr-Conflict / Same-Addr-Dependent / MainPipe-Block / noFreeWay-in-Set, entry not ready - entry.rdy := !conflict(in) && !mpBlock && !s1Block && !noFreeWay(in)// && !Cat(depMask).orR - entry.task := io.in.bits - entry.waitMP := Cat( - s1Block, - io.mainPipeBlock(0), - io.mainPipeBlock(1), - 0.U(1.W)) - entry.waitMS := conflictMask(in) - -// entry.depMask := depMask - assert(PopCount(conflictMaskFromA(in)) <= 2.U) + buffer.zip(insertOH.asBools).foreach { case (entry, sel) => + when(alloc && sel){ + val mpBlock = Cat(io.mainPipeBlock).orR + val pipeBlockOut = io.out.fire && sameSet(in, io.out.bits) + val probeBlock = io.s1Entrance.valid && io.s1Entrance.bits.set === in.set // wait for same-addr req to enter MSHR + val s1Block = pipeBlockOut || probeBlock + + entry.valid := true.B + // when Addr-Conflict / Same-Addr-Dependent / MainPipe-Block / noFreeWay-in-Set, entry not ready + entry.rdy := !conflict(in) && !mpBlock && !s1Block && !noFreeWay(in)// && !Cat(depMask).orR + entry.task := io.in.bits + entry.waitMP := Cat( + s1Block, + io.mainPipeBlock(0), + io.mainPipeBlock(1), + 0.U(1.W)) + entry.waitMS := conflictMask(in) + assert(PopCount(conflictMaskFromA(in)) <= 2.U) + } } /* ======== Issue ======== */ issueArb.io.in zip buffer foreach { case(in, e) => - // when io.out.valid, we temporarily stall all entries of the same set - val pipeBlockOut = io.out.valid && sameSet(e.task, io.out.bits) - - in.valid := e.valid && e.rdy && !pipeBlockOut + in.valid := e.valid && e.rdy in.bits := e } @@ -245,7 +244,7 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete // in such case, we need a place to save it chosenQ.io.enq.valid := issueArb.io.out.valid chosenQ.io.enq.bits.bits := issueArb.io.out.bits - chosenQ.io.enq.bits.id := issueArb.io.chosen + chosenQ.io.enq.bits.idOH := issueArb.io.chosenOH issueArb.io.out.ready := chosenQ.io.enq.ready /* ======== Update rdy and masks ======== */ @@ -294,14 +293,20 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete // when entry.rdy is no longer true, // we cancel req in chosenQ, with the entry still held in buffer to issue later // val cancel = (canFlow && sameSet(chosenQ.io.deq.bits.bits.task, io.in.bits)) || !buffer(chosenQ.io.deq.bits.id).rdy - val cancel = !buffer(chosenQ.io.deq.bits.id).rdy + val cancel = !Mux1H(chosenQ.io.deq.bits.idOH, buffer.map(_.rdy)) chosenQ.io.deq.ready := io.out.ready || cancel io.out.valid := chosenQValid && !cancel || io.in.valid && canFlow - io.out.bits := Mux(canFlow, io.in.bits, chosenQ.io.deq.bits.bits.task) + io.out.bits := { + if (!flow) chosenQ.io.deq.bits.bits.task + else Mux(chosenQValid, chosenQ.io.deq.bits.bits.task, io.in.bits) + } - when(chosenQ.io.deq.fire && !cancel) { - buffer(chosenQ.io.deq.bits.id).valid := false.B + buffer.zip(chosenQ.io.deq.bits.idOH.asBools).foreach { + case (e, y) => + when(chosenQ.io.deq.fire && y && !cancel) { + e.valid := false.B + } } // for Dir to choose a free way diff --git a/src/main/scala/coupledL2/SinkC.scala b/src/main/scala/coupledL2/SinkC.scala index 81961860b..b88f0e339 100644 --- a/src/main/scala/coupledL2/SinkC.scala +++ b/src/main/scala/coupledL2/SinkC.scala @@ -22,7 +22,8 @@ import chisel3.util._ import freechips.rocketchip.tilelink._ import freechips.rocketchip.tilelink.TLMessages._ import org.chipsalliance.cde.config.Parameters -import utility.{MemReqSource, XSPerfAccumulate, RRArbiterInit} +import utility.{MemReqSource, XSPerfAccumulate, TwoLevelRRArbiter, ArbPerf} +import coupledL2.utils._ class PipeBufferResp(implicit p: Parameters) extends L2Bundle { val data = Vec(beatSize, UInt((beatBytes * 8).W)) @@ -54,8 +55,9 @@ class SinkC(implicit p: Parameters) extends L2Module { val dataValids = VecInit(beatValids.map(_.asUInt.orR)).asUInt val taskBuf = RegInit(VecInit(Seq.fill(bufBlocks)(0.U.asTypeOf(new TaskBundle)))) val taskValids = RegInit(VecInit(Seq.fill(bufBlocks)(false.B))) - val taskArb = Module(new RRArbiterInit(new TaskBundle, bufBlocks)) + val taskArb = Module(new TwoLevelRRArbiter(new TaskBundle, bufBlocks)) val bufValids = taskValids.asUInt | dataValids + ArbPerf(taskArb, "taskArb") val full = bufValids.andR val noSpace = full && hasData diff --git a/src/main/scala/coupledL2/SourceB.scala b/src/main/scala/coupledL2/SourceB.scala index 5fb75629b..713036f5b 100644 --- a/src/main/scala/coupledL2/SourceB.scala +++ b/src/main/scala/coupledL2/SourceB.scala @@ -98,7 +98,8 @@ class SourceB(implicit p: Parameters) extends L2Module { } /* ======== Issue ======== */ - val issueArb = Module(new FastArbiter(new SourceBReq, entries)) + val issueArb = Module(new TwoLevelRRArbiter(new SourceBReq, entries)) + ArbPerf(issueArb, "issueArb") issueArb.io.in zip probes foreach{ case (i, p) => i.valid := p.valid && p.rdy diff --git a/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala b/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala index 7e9bf19a1..d3c971020 100644 --- a/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala +++ b/src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala @@ -26,7 +26,7 @@ package coupledL2.prefetch -import utility.{ChiselDB, Constantin, MemReqSource, ParallelPriorityMux, RRArbiterInit, XSPerfAccumulate} +import utility.{ChiselDB, Constantin, MemReqSource, ParallelPriorityMux, XSPerfAccumulate, TwoLevelRRArbiter, ArbPerf} import utility.sram.SRAMTemplate import org.chipsalliance.cde.config.Parameters import chisel3.DontCare.:= @@ -36,6 +36,7 @@ import coupledL2.{HasCoupledL2Parameters, L2TlbReq, L2ToL1TlbIO, TlbCmd, Pbmt} import coupledL2.utils.ReplacementPolicy import scopt.Read import freechips.rocketchip.util.SeqToAugmentedSeq +import coupledL2.utils._ case class BOPParameters( virtualTrain: Boolean = true, @@ -418,8 +419,10 @@ class PrefetchReqBuffer(name: String = "vbop")(implicit p: Parameters) extends B val valids = Seq.fill(REQ_FILTER_SIZE)(RegInit(false.B)) val entries = Seq.fill(REQ_FILTER_SIZE)(Reg(new BopReqBufferEntry)) //val replacement = ReplacementPolicy.fromString("plru", REQ_FILTER_SIZE) - val tlb_req_arb = Module(new RRArbiterInit(new L2TlbReq, REQ_FILTER_SIZE)) - val pf_req_arb = Module(new RRArbiterInit(new PrefetchReq, REQ_FILTER_SIZE)) + val tlb_req_arb = Module(new TwoLevelRRArbiter(new L2TlbReq, REQ_FILTER_SIZE)) + val pf_req_arb = Module(new TwoLevelRRArbiter(new PrefetchReq, REQ_FILTER_SIZE)) + ArbPerf(tlb_req_arb, "bop_tlb_req_arb") + ArbPerf(pf_req_arb, "bop_pf_req_arb") def wayMap[T <: Data](f: Int => T) = VecInit((0 until REQ_FILTER_SIZE).map(f)) @@ -459,9 +462,15 @@ class PrefetchReqBuffer(name: String = "vbop")(implicit p: Parameters) extends B tlb_req_arb.io.out.ready := true.B io.tlb_req.req.valid := RegNext(tlb_req_arb.io.out.valid) io.tlb_req.req.bits := RegEnable(tlb_req_arb.io.out.bits, tlb_req_arb.io.out.valid) + io.tlb_req.req.bits.cmd := TlbCmd.read + io.tlb_req.req.bits.size := 3.U + io.tlb_req.req.bits.kill := false.B + io.tlb_req.req.bits.no_translate := false.B + io.tlb_req.req.bits.isPrefetch := true.B io.tlb_req.req_kill := false.B io.tlb_req.resp.ready := true.B io.out_req <> pf_req_arb.io.out + io.out_req.bits.pfSource := MemReqSource.Prefetch2L2BOP.id.U /* s0: entries look up */ val prev_in_valid = RegNext(io.in_req.valid, false.B) @@ -561,12 +570,8 @@ class PrefetchReqBuffer(name: String = "vbop")(implicit p: Parameters) extends B /* tlb & pf */ for((e, i) <- entries.zipWithIndex){ tlb_req_arb.io.in(i).valid := valids(i) && !e.paddrValid && !s1_tlb_fire_oh(i) && !s2_tlb_fire_oh(i) && !s3_tlb_fire_oh(i) && !e.replayCnt.orR + tlb_req_arb.io.in(i).bits := 0.U.asTypeOf(tlb_req_arb.io.in(i).bits) tlb_req_arb.io.in(i).bits.vaddr := e.get_tlb_vaddr() - tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read - tlb_req_arb.io.in(i).bits.size := 3.U - tlb_req_arb.io.in(i).bits.kill := false.B - tlb_req_arb.io.in(i).bits.no_translate := false.B - tlb_req_arb.io.in(i).bits.isPrefetch := true.B pf_req_arb.io.in(i).valid := can_send_pf(i) pf_req_arb.io.in(i).bits := e.toPrefetchReq() diff --git a/src/main/scala/coupledL2/tl2chi/MMIOBridge.scala b/src/main/scala/coupledL2/tl2chi/MMIOBridge.scala index 410d000cc..fece0af4e 100644 --- a/src/main/scala/coupledL2/tl2chi/MMIOBridge.scala +++ b/src/main/scala/coupledL2/tl2chi/MMIOBridge.scala @@ -27,6 +27,7 @@ import freechips.rocketchip.tilelink._ import freechips.rocketchip.tilelink.TLMessages._ import coupledL2.HasCoupledL2Parameters import coupledL2.{MemBackTypeMM, MemPageTypeNC} +import coupledL2.utils._ class MMIOBridge()(implicit p: Parameters) extends LazyModule with HasCoupledL2Parameters @@ -89,6 +90,7 @@ class MMIOBridgeEntry(edge: TLEdgeIn)(implicit p: Parameters) extends TL2CHIL2Mo val id = Input(UInt()) val pCrd = new PCrdQueryBundle val waitOnReadReceipt = Option.when(needRR)(Output(Bool())) + val isRead = Output(Bool()) // only for better timing }) val s_txreq = RegInit(true.B) @@ -115,7 +117,7 @@ class MMIOBridgeEntry(edge: TLEdgeIn)(implicit p: Parameters) extends TL2CHIL2Mo val denied = Reg(Bool()) val corrupt = Reg(Bool()) val traceTag = Reg(Bool()) - val isRead = req.opcode === Get + val isRead = RegEnable(io.req.bits.opcode === Get, false.B, io.req.fire) val isBackTypeMM = req.user.lift(MemBackTypeMM).getOrElse(false.B) val isPageTypeNC = req.user.lift(MemPageTypeNC).getOrElse(false.B) @@ -228,11 +230,7 @@ class MMIOBridgeEntry(edge: TLEdgeIn)(implicit p: Parameters) extends TL2CHIL2Mo txreq.bits.qos := Fill(QOS_WIDTH, 1.U(1.W)) - 1.U txreq.bits.tgtID := SAM(sam).lookup(txreq.bits.addr) txreq.bits.txnID := io.id - txreq.bits.opcode := ParallelLookUp(req.opcode, Seq( - Get -> ReadNoSnp, - PutFullData -> WriteNoSnpPtl, - PutPartialData -> WriteNoSnpPtl - )) + txreq.bits.opcode := Mux(isRead, ReadNoSnp, WriteNoSnpPtl) txreq.bits.size := req.size txreq.bits.addr := req.address txreq.bits.ns := enableNS.B @@ -325,6 +323,7 @@ class MMIOBridgeEntry(edge: TLEdgeIn)(implicit p: Parameters) extends TL2CHIL2Mo io.pCrd.query.bits.srcID := srcID io.waitOnReadReceipt.foreach(_ := !w_readreceipt.get && s_txreq) + io.isRead := isRead /** * performance counters @@ -383,13 +382,16 @@ class MMIOBridgeImp(outer: MMIOBridge) extends LazyModuleImp(outer) entry.io.id := i.U } - val txreqArb = Module(new RRArbiterInit(chiselTypeOf(io.tx.req.bits), mmioBridgeSize)) - for ((a, req) <- txreqArb.io.in.zip(entries.map(_.io.chi.tx.req))) { - a <> req - val isReadNoSnp = req.bits.opcode === ReadNoSnp - val block = isReadNoSnp && waitOnReadReceipt + val txreqArb = Module(new TwoLevelRRArbiter(chiselTypeOf(io.tx.req.bits), mmioBridgeSize)) + ArbPerf(txreqArb, "mmio_txreq_arb") + for ((a, entry) <- txreqArb.io.in.zip(entries)) { + val req = entry.io.chi.tx.req + val isRead = entry.io.isRead + val block = isRead && waitOnReadReceipt + assert(!req.valid || !isRead || req.bits.opcode === ReadNoSnp) req.ready := a.ready && !block a.valid := req.valid && !block + a.bits := req.bits } io.tx.req <> txreqArb.io.out // arb(entries.map(_.io.chi.tx.req), io.tx.req, Some("mmio_txreq")) diff --git a/src/main/scala/coupledL2/tl2chi/MSHR.scala b/src/main/scala/coupledL2/tl2chi/MSHR.scala index 7773465e9..b536c1564 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHR.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHR.scala @@ -806,15 +806,14 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { mp_grant.reqSource := 0.U(MemReqSource.reqSourceBits.W) // Add merge grant task for Acquire and late Prefetch - mp_grant.mergeA := mergeA || io.aMergeTask.valid - val merge_task_r = RegEnable(io.aMergeTask.bits, 0.U.asTypeOf(new TaskBundle), io.aMergeTask.valid) - val merge_task = Mux(io.aMergeTask.valid, io.aMergeTask.bits, merge_task_r) - val merge_task_isKeyword = Mux(io.aMergeTask.valid, io.aMergeTask.bits.isKeyword.getOrElse(false.B), merge_task_r.isKeyword.getOrElse(false.B) ) + mp_grant.mergeA := mergeA + + val merge_task = RegEnable(io.aMergeTask.bits, 0.U.asTypeOf(new TaskBundle), io.aMergeTask.valid) mp_grant.aMergeTask.off := merge_task.off mp_grant.aMergeTask.alias.foreach(_ := merge_task.alias.getOrElse(0.U)) mp_grant.aMergeTask.vaddr.foreach(_ := merge_task.vaddr.getOrElse(0.U)) - mp_grant.aMergeTask.isKeyword.foreach(_ := merge_task_isKeyword) + mp_grant.aMergeTask.isKeyword.foreach(_ := merge_task.isKeyword.getOrElse(false.B)) mp_grant.aMergeTask.opcode := odOpGen(merge_task.opcode) mp_grant.aMergeTask.param := MuxLookup( // Acquire -> Grant merge_task.param, @@ -1340,6 +1339,8 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { io.msInfo.bits.param := req.param io.msInfo.bits.mergeA := mergeA io.msInfo.bits.w_grantfirst := state.w_grantfirst + io.msInfo.bits.w_grantlast := state.w_grantlast + io.msInfo.bits.w_grant := state.w_grant io.msInfo.bits.s_release := state.s_release io.msInfo.bits.s_refill := state.s_refill io.msInfo.bits.s_cmoresp := state.s_cmoresp diff --git a/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala b/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala index 70526da9a..a81e1d1ae 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHRCtl.scala @@ -26,6 +26,7 @@ import freechips.rocketchip.tilelink._ import freechips.rocketchip.tilelink.TLMessages._ import coupledL2.prefetch.PrefetchTrain import coupledL2._ +import coupledL2.utils._ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes with HasPerfEvents { val io = IO(new Bundle() { @@ -148,7 +149,7 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes io.msInfo(i) := m.io.msInfo m.io.nestedwb := io.nestedwb - m.io.aMergeTask.valid := io.aMergeTask.valid && io.aMergeTask.bits.id === i.U + m.io.aMergeTask.valid := io.aMergeTask.valid && io.aMergeTask.bits.idOH(i) m.io.aMergeTask.bits := io.aMergeTask.bits.task io.pCrd(i) <> m.io.pCrd @@ -161,19 +162,23 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes io.toReqArb.blockG_s1 := false.B /* Acquire downwards to TXREQ*/ - fastArb(mshrs.map(_.io.tasks.txreq), io.toTXREQ, Some("txreq")) + ArbPerf(twoLevelArb(mshrs.map(_.io.tasks.txreq), io.toTXREQ, Some("txreq")), "txreq_arb") /* Response downwards to TXRSP*/ - fastArb(mshrs.map(_.io.tasks.txrsp), io.toTXRSP, Some("txrsp")) + ArbPerf(twoLevelArb(mshrs.map(_.io.tasks.txrsp), io.toTXRSP, Some("txrsp")), "txrsp_arb") /* Probe upwards */ val sourceB = Module(new SourceB()) - fastArb(mshrs.map(_.io.tasks.source_b), sourceB.io.task, Some("source_b")) + ArbPerf(twoLevelArb(mshrs.map(_.io.tasks.source_b), sourceB.io.task, Some("source_b")), "source_b_arb") sourceB.io.grantStatus := io.grantStatus io.toSourceB <> sourceB.io.sourceB /* Arbitrate MSHR task to RequestArbiter */ - fastArb(mshrs.map(_.io.tasks.mainpipe), io.mshrTask, Some("mshr_task")) + val mshrTask = Wire(Decoupled(new TaskBundle())) + ArbPerf(twoLevelArb(mshrs.map(_.io.tasks.mainpipe), mshrTask, Some("mshr_task")), "mshr_task_arb") + io.mshrTask <> mshrTask + io.mshrTask.bits.mshrId := OHToUInt(mshrs.map(_.io.tasks.mainpipe.fire)) + assert(Mux(io.mshrTask.fire, io.mshrTask.bits.mshrId === mshrTask.bits.mshrId, true.B), "mshrId should be consistent") /* releaseBuf link to MSHR id */ io.releaseBufWriteId := ParallelPriorityMux(resp_sinkC_match_vec, (0 until mshrsAll).map(i => i.U)) diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index 75b0252b6..c5858aad5 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -34,7 +34,8 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes /* receive task from arbiter at stage 2 */ val taskFromArb_s2 = Flipped(ValidIO(new TaskBundle())) /* status from arbiter at stage1 */ - val taskInfo_s1 = Flipped(ValidIO(new TaskBundle())) + val mshrHintQInfo = Flipped(ValidIO(new TaskBundle)) + val sinkCHintQInfo = Flipped(ValidIO(new TaskBundle)) /* handle set conflict in req arb */ val fromReqArb = Input(new Bundle() { @@ -53,6 +54,11 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes /* get dir result at stage 3 */ val dirResp_s3 = Input(new DirResult()) + val metaOnHit_s3 = Input(new MetaEntry()) + val errOnSnp_s3 = Input(Bool()) + val dirWayOH_s3 = Input(UInt(cacheParams.ways.W)) + val dirReplWayOH_s3 = Input(UInt(cacheParams.ways.W)) + val cmoHitInvalid = Input(Bool()) val replResp = Flipped(ValidIO(new ReplacerResult())) /* send task to MSHRCtl at stage 3 */ @@ -137,6 +143,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes /* ======== Stage 2 ======== */ val task_s2 = io.taskFromArb_s2 + val reqWayOH_s2 = UIntToOH(task_s2.bits.way) /* ======== Stage 3 ======== */ val task_s3 = RegInit(0.U.asTypeOf(Valid(new TaskBundle))) @@ -144,12 +151,14 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes when (task_s2.valid) { task_s3.bits := task_s2.bits } + val reqWayOH_s3 = RegEnable(reqWayOH_s2, task_s2.valid) /* ======== Enchantment ======== */ val dirResult_s3 = io.dirResp_s3 val meta_s3 = dirResult_s3.meta + val metaOnHit_s3 = io.metaOnHit_s3 val req_s3 = task_s3.bits - val cmoHitInvalid = io.cmoAllBlock.getOrElse(false.B) && (meta_s3.state === INVALID) + val cmoHitInvalid = io.cmoAllBlock.getOrElse(false.B) && io.cmoHitInvalid val mshr_req_s3 = req_s3.mshrTask val sink_req_s3 = !mshr_req_s3 @@ -191,19 +200,21 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val mshr_cbWrData_s3 = mshr_req_s3 && req_s3.toTXDAT && req_s3.chiOpcode.get === CopyBackWrData val meta_has_clients_s3 = meta_s3.clients.orR + val metaOnHit_has_clients_s3 = metaOnHit_s3.clients.orR val req_needT_s3 = needT(req_s3.opcode, req_s3.param) val cmo_cbo_retention_s3 = req_cbo_clean_s3 || req_cbo_flush_s3 val cmo_cbo_s3 = req_cbo_clean_s3 || req_cbo_flush_s3 || req_cbo_inval_s3 - val cache_alias = req_acquire_s3 && dirResult_s3.hit && meta_s3.clients(0) && - meta_s3.alias.getOrElse(0.U) =/= req_s3.alias.getOrElse(0.U) + val cache_alias = req_acquire_s3 && dirResult_s3.hit && metaOnHit_s3.clients(0) && + metaOnHit_s3.alias.getOrElse(0.U) =/= req_s3.alias.getOrElse(0.U) // *NOTICE: 'nestable_*' must not be used in A Channel related logics. val nestable_dirResult_s3 = Wire(chiselTypeOf(dirResult_s3)) val nestable_meta_s3 = nestable_dirResult_s3.meta val nestable_meta_has_clients_s3 = nestable_dirResult_s3.meta.clients.orR nestable_dirResult_s3 := dirResult_s3 + nestable_dirResult_s3.meta := metaOnHit_s3 when (req_s3.snpHitRelease) { // Meta states from MSHRs were considered as directory result here. // Therefore, meta states were always inferred to be hit when nesting release, no matter the fact that directory @@ -219,9 +230,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val l2Error_s3 = io.dirResp_s3.error val mshr_refill_s3 = mshr_accessackdata_s3 || mshr_hintack_s3 || mshr_grant_s3 // needs refill to L2 DS - val replResp_valid_s3 = io.replResp.valid - val replResp_valid_s4 = RegNext(io.replResp.valid, init = false.B) - val replResp_valid_hold = replResp_valid_s3 || replResp_valid_s4 + val replResp_valid_hold = io.replResp.bits.validHold val retry = replResp_valid_hold && io.replResp.bits.retry val need_repl = replResp_valid_hold && io.replResp.bits.meta.state =/= INVALID && req_s3.replTask @@ -229,22 +238,22 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes // *NOTICE: A Channel requests should be blocked by RequestBuffer when MSHR nestable, // 'nestable_*' must not be used here. val acquire_on_miss_s3 = req_acquire_s3 || req_prefetch_s3 || req_get_s3 - val acquire_on_hit_s3 = meta_s3.state === BRANCH && req_needT_s3 && !req_prefetch_s3 + val acquire_on_hit_s3 = metaOnHit_s3.state === BRANCH && req_needT_s3 && !req_prefetch_s3 val need_acquire_s3_a = req_s3.fromA && (Mux( dirResult_s3.hit, acquire_on_hit_s3, acquire_on_miss_s3 ) || cmo_cbo_s3) - val need_probe_s3_a = dirResult_s3.hit && meta_has_clients_s3 && ( - req_get_s3 && (meta_s3.state === TRUNK) || - req_cbo_clean_s3 && (meta_s3.state === TRUNK) || + val need_probe_s3_a = dirResult_s3.hit && metaOnHit_has_clients_s3 && ( + req_get_s3 && (metaOnHit_s3.state === TRUNK) || + req_cbo_clean_s3 && (metaOnHit_s3.state === TRUNK) || req_cbo_flush_s3 || req_cbo_inval_s3 ) val need_release_s3_a = dirResult_s3.hit && ( - req_cbo_clean_s3 && (!need_probe_s3_a && meta_s3.dirty) || - req_cbo_flush_s3 && (isValid(meta_s3.state)) || - req_cbo_inval_s3 && (isValid(meta_s3.state)) + req_cbo_clean_s3 && (!need_probe_s3_a && metaOnHit_s3.dirty) || + req_cbo_flush_s3 && (isValid(metaOnHit_s3.state)) || + req_cbo_inval_s3 && (isValid(metaOnHit_s3.state)) ) val need_cmoresp_s3_a = cmo_cbo_s3 val need_compack_s3_a = !cmo_cbo_s3 @@ -267,22 +276,24 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes */ // whether L2 should do forwarding or not val expectFwd = isSnpXFwd(req_s3.chiOpcode.get) - val canFwd = nestable_dirResult_s3.hit && !(nestable_dirResult_s3.meta.tagErr || nestable_dirResult_s3.error) + val canFwd = nestable_dirResult_s3.hit && + !(Mux(req_s3.snpHitRelease, req_s3.snpHitReleaseMeta.tagErr, io.metaOnHit_s3.tagErr) || io.errOnSnp_s3) val doFwd = expectFwd && canFwd val need_pprobe_s3_b_snpStable = req_s3.fromB && ( isSnpOnceX(req_s3.chiOpcode.get) || isSnpQuery(req_s3.chiOpcode.get) || isSnpStashX(req_s3.chiOpcode.get) - ) && dirResult_s3.hit && meta_s3.state === TRUNK && meta_has_clients_s3 + ) && dirResult_s3.hit && metaOnHit_s3.state === TRUNK && metaOnHit_has_clients_s3 val need_pprobe_s3_b_snpToB = req_s3.fromB && ( isSnpToB(req_s3.chiOpcode.get) || req_s3.chiOpcode.get === SnpCleanShared - ) && dirResult_s3.hit && meta_s3.state === TRUNK && meta_has_clients_s3 + ) && dirResult_s3.hit && metaOnHit_s3.state === TRUNK && metaOnHit_has_clients_s3 val need_pprobe_s3_b_snpToN = req_s3.fromB && ( isSnpUniqueX(req_s3.chiOpcode.get) || req_s3.chiOpcode.get === SnpCleanInvalid || isSnpMakeInvalidX(req_s3.chiOpcode.get) - ) && dirResult_s3.hit && meta_has_clients_s3 - val need_pprobe_s3_b_snpNDERR = req_s3.fromB && tagError_s3 && dirResult_s3.hit - val need_pprobe_s3_b = need_pprobe_s3_b_snpStable || need_pprobe_s3_b_snpToB || need_pprobe_s3_b_snpToN || need_pprobe_s3_b_snpNDERR + ) && dirResult_s3.hit && metaOnHit_has_clients_s3 + val need_pprobe_s3_b_snpNDERR = req_s3.fromB && (io.errOnSnp_s3 || io.metaOnHit_s3.tagErr) && dirResult_s3.hit + val need_pprobe_s3_b = need_pprobe_s3_b_snpStable || need_pprobe_s3_b_snpToB || + need_pprobe_s3_b_snpToN || need_pprobe_s3_b_snpNDERR val need_dct_s3_b = doFwd // DCT val need_mshr_s3_b = need_pprobe_s3_b || need_dct_s3_b @@ -305,7 +316,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes /* ======== Resps to SinkA/B/C Reqs ======== */ val sink_resp_s3 = WireInit(0.U.asTypeOf(Valid(new TaskBundle))) - val sink_resp_s3_a_promoteT = dirResult_s3.hit && isT(meta_s3.state) + val sink_resp_s3_a_promoteT = dirResult_s3.hit && isT(metaOnHit_s3.state) // whether L2 should respond data to HN or not val retToSrc = req_s3.retToSrc.getOrElse(false.B) @@ -482,22 +493,18 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val wen = wen_c || wen_mshr // This is to let io.toDS.req_s3.valid hold for 2 cycles (see DataStorage for details) - val task_s3_valid_hold2 = RegInit(0.U(2.W)) - when(task_s2.valid) { - task_s3_valid_hold2 := "b11".U - }.otherwise { - task_s3_valid_hold2 := task_s3_valid_hold2 >> 1.U - } + val task_s3_valid_hold2 = RegEnable(task_s2.valid, false.B, !RegNext(task_s2.valid)) - io.toDS.en_s3 := task_s3.valid && (ren || wen) - io.toDS.req_s3.valid := task_s3_valid_hold2(0) && (ren || wen) + io.toDS.en_s3 := task_s3.valid + io.toDS.req_s3.valid := task_s3_valid_hold2 io.toDS.req_s3.bits.way := Mux( mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, Mux(mshr_req_s3, req_s3.way, dirResult_s3.way) ) - io.toDS.req_s3.bits.set := Mux(mshr_req_s3, req_s3.set, dirResult_s3.set) + io.toDS.req_s3.bits.set := req_s3.set io.toDS.req_s3.bits.wen := wen + io.toDS.req_s3.bits.ren := ren io.toDS.wdata_s3.data := Mux( !mshr_req_s3, c_releaseData_s3, @@ -531,7 +538,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val metaW_valid_s3_b = sinkB_req_s3 && !need_mshr_s3_b && dirResult_s3.hit && (!isSnpOnce(req_s3.chiOpcode.get) || (req_s3.snpHitReleaseToClean && req_s3.snpHitReleaseMeta.dirty)) && !isSnpStashX(req_s3.chiOpcode.get) && !isSnpQuery(req_s3.chiOpcode.get) && ( - meta_s3.state === TIP || meta_s3.state === BRANCH && isSnpToN(req_s3.chiOpcode.get) + metaOnHit_s3.state === TIP || metaOnHit_s3.state === BRANCH && isSnpToN(req_s3.chiOpcode.get) ) val metaW_valid_s3_c = sinkC_req_s3 && dirResult_s3.hit val metaW_valid_s3_mshr = mshr_req_s3 && req_s3.metaWen && !(mshr_refill_s3 && retry) @@ -540,37 +547,37 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val metaW_s3_a_alias = Mux( req_get_s3 || req_prefetch_s3, - meta_s3.alias.getOrElse(0.U), + metaOnHit_s3.alias.getOrElse(0.U), req_s3.alias.getOrElse(0.U) ) val metaW_s3_a = MetaEntry( - dirty = meta_s3.dirty, - state = Mux(req_needT_s3 || sink_resp_s3_a_promoteT, TRUNK, meta_s3.state), + dirty = metaOnHit_s3.dirty, + state = Mux(req_needT_s3 || sink_resp_s3_a_promoteT, TRUNK, metaOnHit_s3.state), clients = Fill(clientBits, Mux(l2Error_s3, false.B, true.B)), alias = Some(metaW_s3_a_alias), accessed = true.B, - tagErr = meta_s3.tagErr, - dataErr = meta_s3.dataErr + tagErr = metaOnHit_s3.tagErr, + dataErr = metaOnHit_s3.dataErr ) val metaW_s3_b = Mux(isSnpToN(req_s3.chiOpcode.get), MetaEntry(), MetaEntry( dirty = false.B, - state = Mux(req_s3.chiOpcode.get === SnpCleanShared, meta_s3.state, BRANCH), - clients = meta_s3.clients, - alias = meta_s3.alias, - accessed = meta_s3.accessed, - tagErr = meta_s3.tagErr, - dataErr = meta_s3.dataErr + state = Mux(req_s3.chiOpcode.get === SnpCleanShared, metaOnHit_s3.state, BRANCH), + clients = metaOnHit_s3.clients, + alias = metaOnHit_s3.alias, + accessed = metaOnHit_s3.accessed, + tagErr = metaOnHit_s3.tagErr, + dataErr = metaOnHit_s3.dataErr ) ) val metaW_s3_c = MetaEntry( - dirty = meta_s3.dirty || wen_c, - state = Mux(isParamFromT(req_s3.param), TIP, meta_s3.state), + dirty = metaOnHit_s3.dirty || wen_c, + state = Mux(isParamFromT(req_s3.param), TIP, metaOnHit_s3.state), clients = Fill(clientBits, !isToN(req_s3.param)), - alias = meta_s3.alias, - accessed = meta_s3.accessed, - tagErr = Mux(wen_c, req_s3.denied, meta_s3.tagErr), - dataErr = Mux(wen_c, req_s3.corrupt, meta_s3.dataErr) // update error when write DS + alias = metaOnHit_s3.alias, + accessed = metaOnHit_s3.accessed, + tagErr = Mux(wen_c, req_s3.denied, metaOnHit_s3.tagErr), + dataErr = Mux(wen_c, req_s3.corrupt, metaOnHit_s3.dataErr) // update error when write DS ) // use merge_meta if mergeA val metaW_s3_mshr = WireInit(Mux(req_s3.mergeA, req_s3.aMergeTask.meta, req_s3.meta)) @@ -578,17 +585,17 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes metaW_s3_mshr.dataErr := req_s3.corrupt val metaW_s3_cmo = MetaEntry() // invalid the block - val metaW_way = Mux( + val metaW_wayOH = Mux( mshr_refill_s3 && req_s3.replTask, - io.replResp.bits.way, // grant always use replResp way - Mux(mshr_req_s3, req_s3.way, dirResult_s3.way) + io.dirReplWayOH_s3, // grant always use replResp way + Mux(mshr_req_s3, reqWayOH_s3, io.dirWayOH_s3) ) io.metaWReq.valid := !resetFinish || task_s3.valid && ( metaW_valid_s3_a || metaW_valid_s3_b || metaW_valid_s3_c || metaW_valid_s3_mshr || metaW_valid_s3_cmo ) io.metaWReq.bits.set := Mux(resetFinish, req_s3.set, resetIdx) - io.metaWReq.bits.wayOH := Mux(resetFinish, UIntToOH(metaW_way), Fill(cacheParams.ways, true.B)) + io.metaWReq.bits.wayOH := Mux(resetFinish, metaW_wayOH, Fill(cacheParams.ways, true.B)) io.metaWReq.bits.wmeta := Mux( resetFinish, ParallelPriorityMux( @@ -600,7 +607,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes io.tagWReq.valid := task_s3.valid && req_s3.tagWen && mshr_refill_s3 && !retry io.tagWReq.bits.set := req_s3.set - io.tagWReq.bits.way := Mux(mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, req_s3.way) + io.tagWReq.bits.wayOH := Mux(mshr_refill_s3 && req_s3.replTask, io.dirReplWayOH_s3, reqWayOH_s3) io.tagWReq.bits.wtag := req_s3.tag sink_resp_s3_b_metaWen := metaW_valid_s3_b @@ -636,18 +643,18 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes val isTXRSP_s3 = Mux( mshr_req_s3, mshr_snpRespX_s3, - req_s3.fromB && !need_mshr_s3 && !hasData_s3 + req_s3.fromB && !need_mshr_s3_b && !hasData_s3 ) val isTXDAT_s3 = Mux( mshr_req_s3, mshr_snpRespDataX_s3 || mshr_cbWrData_s3 || mshr_dct_s3, - req_s3.fromB && !need_mshr_s3 && + req_s3.fromB && !need_mshr_s3_b && (doRespData && (!data_unready_s3 || req_s3.snpHitRelease && req_s3.snpHitReleaseWithData)) ) val isTXDAT_s3_ready = Mux( mshr_req_s3, mshr_snpRespDataX_s3 || mshr_cbWrData_s3 || mshr_dct_s3, - req_s3.fromB && !need_mshr_s3 && !txdat_s3_latch.B && + req_s3.fromB && !need_mshr_s3_b && !txdat_s3_latch.B && (doRespData && (!data_unready_s3 || req_s3.snpHitRelease && req_s3.snpHitReleaseWithData)) ) val isTXREQ_s3 = mshr_req_s3 && (mshr_writeBackFull_s3 || mshr_writeCleanFull_s3 || @@ -707,7 +714,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes // train on request(with needHint flag) miss or hit on prefetched block // trigger train also in a_merge here train.valid := task_s3.valid && ((req_acquire_s3 || req_get_s3) && req_s3.needHint.getOrElse(false.B) && - (!dirResult_s3.hit || meta_s3.prefetch.get) || req_s3.mergeA) + (!dirResult_s3.hit || metaOnHit_s3.prefetch.get) || req_s3.mergeA) train.bits.tag := req_s3.tag train.bits.set := req_s3.set train.bits.needT := Mux( @@ -721,8 +728,8 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes pc := Mux(req_s3.mergeA, req_s3.aMergeTask.pc.getOrElse(0.U), req_s3.pc.getOrElse(0.U)) } train.bits.hit := Mux(req_s3.mergeA, true.B, dirResult_s3.hit) - train.bits.prefetched := Mux(req_s3.mergeA, true.B, meta_s3.prefetch.getOrElse(false.B)) - train.bits.pfsource := meta_s3.prefetchSrc.getOrElse(PfSource.NoWhere.id.U) // TODO + train.bits.prefetched := Mux(req_s3.mergeA, true.B, metaOnHit_s3.prefetch.getOrElse(false.B)) + train.bits.pfsource := Mux(req_s3.mergeA, req_s3.meta.prefetchSrc.getOrElse(PfSource.NoWhere.id.U), metaOnHit_s3.prefetchSrc.getOrElse(PfSource.NoWhere.id.U)) // TODO train.bits.reqsource := req_s3.reqSource } @@ -815,8 +822,8 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes // TODO: check this val customL1Hint = Module(new CustomL1Hint) - customL1Hint.io.s1 := io.taskInfo_s1 - // customL1Hint.io.s2 := task_s2 + customL1Hint.io.mshrHintQInfo := io.mshrHintQInfo + customL1Hint.io.sinkCHintQInfo := io.sinkCHintQInfo customL1Hint.io.s3.task := task_s3 // overwrite opcode: if sinkReq can respond, use sink_resp_s3.bits.opcode = Grant/GrantData @@ -915,7 +922,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes // capacity control of TX channels val tx_task_s3 = Wire(Valid(new TaskBundle)) tx_task_s3.valid := task_s3.valid // TODO: review this - tx_task_s3.bits := source_req_s3 + tx_task_s3.bits := req_s3 val tasks = Seq(tx_task_s3, task_s4, task_s5) io.status_vec_toTX.zip(tasks).foreach { case (status, task) => status.valid := task.valid diff --git a/src/main/scala/coupledL2/tl2chi/Slice.scala b/src/main/scala/coupledL2/tl2chi/Slice.scala index d3d23fb44..8040204fa 100644 --- a/src/main/scala/coupledL2/tl2chi/Slice.scala +++ b/src/main/scala/coupledL2/tl2chi/Slice.scala @@ -111,11 +111,17 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] reqBuf.io.taskFromArb_s2 := reqArb.io.taskToPipe_s2 mainPipe.io.taskFromArb_s2 := reqArb.io.taskToPipe_s2 - mainPipe.io.taskInfo_s1 := reqArb.io.taskInfo_s1 + mainPipe.io.mshrHintQInfo := reqArb.io.mshrHintQInfo + mainPipe.io.sinkCHintQInfo := reqArb.io.sinkCHintQInfo mainPipe.io.fromReqArb.status_s1 := reqArb.io.status_s1 mainPipe.io.bufResp := sinkC.io.bufResp mainPipe.io.dirResp_s3 := directory.io.resp.bits mainPipe.io.replResp := directory.io.replResp + mainPipe.io.metaOnHit_s3 := directory.io.metaOnHit + mainPipe.io.errOnSnp_s3 := directory.io.errOnSnp + mainPipe.io.dirWayOH_s3 := directory.io.wayOH + mainPipe.io.dirReplWayOH_s3 := directory.io.replWayOH + mainPipe.io.cmoHitInvalid := directory.io.cmoHitInvalid mainPipe.io.fromMSHRCtl <> mshrCtl.io.toMainPipe mainPipe.io.bufResp := sinkC.io.bufResp mainPipe.io.refillBufResp_s3.valid := RegNext(refillBuf.io.r.valid, false.B) diff --git a/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala b/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala index 38b51a56a..e5f03b8d7 100644 --- a/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala +++ b/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala @@ -19,7 +19,7 @@ package coupledL2.tl2chi import chisel3._ import chisel3.util._ -import utility.{FastArbiter, Pipeline, ParallelPriorityMux, RegNextN, RRArbiterInit, XSPerfAccumulate} +import utility.{Pipeline, ParallelPriorityMux, RegNextN, XSPerfAccumulate, TwoLevelRRArbiter, ArbPerf} import freechips.rocketchip.diplomacy._ import freechips.rocketchip.tilelink._ import freechips.rocketchip.tilelink.TLMessages._ @@ -28,6 +28,7 @@ import org.chipsalliance.cde.config.{Parameters, Field} import scala.math.max import coupledL2._ import coupledL2.prefetch._ +import coupledL2.utils._ abstract class TL2CHIL2Bundle(implicit val p: Parameters) extends Bundle with HasCoupledL2Parameters @@ -128,7 +129,8 @@ class TL2CHICoupledL2(implicit p: Parameters) extends CoupledL2Base { slices match { case slices: Seq[Slice] => // TXREQ - val txreq_arb = Module(new RRArbiterInit(new CHIREQ, slices.size + 1)) // plus 1 for MMIO + val txreq_arb = Module(new TwoLevelRRArbiter(new CHIREQ, slices.size + 1)) // plus 1 for MMIO + ArbPerf(txreq_arb, "txreq_arb") val txreq = Wire(DecoupledIO(new CHIREQ)) slices.zip(txreq_arb.io.in.init).foreach { case (s, in) => in <> s.io.out.tx.req } txreq_arb.io.in.last <> mmio.io.tx.req @@ -200,7 +202,7 @@ class TL2CHICoupledL2(implicit p: Parameters) extends CoupledL2Base { arbPort } - fastArb(mshrPCrdArbIn, mshrPCrdArbOut, Some("pcrdgrant")) + ArbPerf(twoLevelArb(mshrPCrdArbIn, mshrPCrdArbOut, Some("pcrdgrant")), "pcrdgrant_arb") mshrPCrdGrants.zip(mshrPCrdArbGrants).foreach { case (grant, arb) => grant := arb } diff --git a/src/main/scala/coupledL2/tl2chi/TXDAT.scala b/src/main/scala/coupledL2/tl2chi/TXDAT.scala index 98e37a11b..1b71a9da1 100644 --- a/src/main/scala/coupledL2/tl2chi/TXDAT.scala +++ b/src/main/scala/coupledL2/tl2chi/TXDAT.scala @@ -62,13 +62,13 @@ class TXDAT(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes { val queueCnt = queue.io.count // TODO: this may be imprecise, review this later val pipeStatus_s1_s5 = io.pipeStatusVec - val pipeStatus_s1_s2 = pipeStatus_s1_s5.take(2) - val pipeStatus_s2 = pipeStatus_s1_s2.tail - val pipeStatus_s3_s5 = pipeStatus_s1_s5.drop(2) + val pipeStatus_s1_s3 = pipeStatus_s1_s5.take(3) + val pipeStatus_s2_s3 = pipeStatus_s1_s3.tail + val pipeStatus_s4_s5 = pipeStatus_s1_s5.drop(3) // inflightCnt equals the number of reqs on s2~s5 that may flow into TXDAT soon, plus queueCnt. // The calculation of inflightCnt might be imprecise and leads to false positive back pressue. - val inflightCnt = PopCount(Cat(pipeStatus_s3_s5.map(s => s.valid && s.bits.toTXDAT && (s.bits.fromB || s.bits.mshrTask)))) + - PopCount(Cat(pipeStatus_s2.map(s => s.valid && Mux(s.bits.mshrTask, s.bits.toTXDAT, s.bits.fromB)))) + + val inflightCnt = PopCount(Cat(pipeStatus_s4_s5.map(s => s.valid && s.bits.toTXDAT && (s.bits.fromB || s.bits.mshrTask)))) + + PopCount(Cat(pipeStatus_s2_s3.map(s => s.valid && Mux(s.bits.mshrTask, s.bits.toTXDAT, s.bits.fromB)))) + queueCnt assert(inflightCnt <= mshrsAll.U, "in-flight overflow at TXDAT") diff --git a/src/main/scala/coupledL2/tl2chi/TXRSP.scala b/src/main/scala/coupledL2/tl2chi/TXRSP.scala index 53079f4f6..1a136b9b8 100644 --- a/src/main/scala/coupledL2/tl2chi/TXRSP.scala +++ b/src/main/scala/coupledL2/tl2chi/TXRSP.scala @@ -51,13 +51,13 @@ class TXRSP(implicit p: Parameters) extends TL2CHIL2Module { val queueCnt = queue.io.count // TODO: this may be imprecise, review this later val pipeStatus_s1_s5 = io.pipeStatusVec - val pipeStatus_s1_s2 = pipeStatus_s1_s5.take(2) - val pipeStatus_s2 = pipeStatus_s1_s2.tail - val pipeStatus_s3_s5 = pipeStatus_s1_s5.drop(2) + val pipeStatus_s1_s3 = pipeStatus_s1_s5.take(3) + val pipeStatus_s2_s3 = pipeStatus_s1_s3.tail + val pipeStatus_s4_s5 = pipeStatus_s1_s5.drop(3) // inflightCnt equals the number of reqs on s2~s5 that may flow into TXRSP soon, plus queueCnt. // The calculation of inflightCnt might be imprecise and leads to false positive back pressue. - val inflightCnt = PopCount(Cat(pipeStatus_s3_s5.map(s => s.valid && s.bits.toTXRSP && (s.bits.fromB || s.bits.mshrTask)))) + - PopCount(Cat(pipeStatus_s2.map(s => s.valid && Mux(s.bits.mshrTask, s.bits.toTXRSP, s.bits.fromB)))) + + val inflightCnt = PopCount(Cat(pipeStatus_s4_s5.map(s => s.valid && s.bits.toTXRSP && (s.bits.fromB || s.bits.mshrTask)))) + + PopCount(Cat(pipeStatus_s2_s3.map(s => s.valid && Mux(s.bits.mshrTask, s.bits.toTXRSP, s.bits.fromB)))) + queueCnt assert(inflightCnt <= mshrsAll.U, "in-flight overflow at TXRSP") diff --git a/src/main/scala/coupledL2/tl2tl/MSHR.scala b/src/main/scala/coupledL2/tl2tl/MSHR.scala index 05c5611bb..902d1f403 100644 --- a/src/main/scala/coupledL2/tl2tl/MSHR.scala +++ b/src/main/scala/coupledL2/tl2tl/MSHR.scala @@ -372,15 +372,14 @@ class MSHR(implicit p: Parameters) extends L2Module { mp_grant.corrupt := corrupt // Add merge grant task for Acquire and late Prefetch - mp_grant.mergeA := mergeA || io.aMergeTask.valid - val merge_task_r = RegEnable(io.aMergeTask.bits, 0.U.asTypeOf(new TaskBundle), io.aMergeTask.valid) - val merge_task = Mux(io.aMergeTask.valid, io.aMergeTask.bits, merge_task_r) - val merge_task_isKeyword = Mux(io.aMergeTask.valid, io.aMergeTask.bits.isKeyword.getOrElse(false.B), merge_task_r.isKeyword.getOrElse(false.B) ) + mp_grant.mergeA := mergeA + + val merge_task = RegEnable(io.aMergeTask.bits, 0.U.asTypeOf(new TaskBundle), io.aMergeTask.valid) mp_grant.aMergeTask.off := merge_task.off mp_grant.aMergeTask.alias.foreach(_ := merge_task.alias.getOrElse(0.U)) mp_grant.aMergeTask.vaddr.foreach(_ := merge_task.vaddr.getOrElse(0.U)) - mp_grant.aMergeTask.isKeyword.foreach(_ := merge_task_isKeyword) + mp_grant.aMergeTask.isKeyword.foreach(_ := merge_task.isKeyword.getOrElse(false.B)) mp_grant.aMergeTask.opcode := odOpGen(merge_task.opcode) mp_grant.aMergeTask.param := MuxLookup( // Acquire -> Grant merge_task.param, @@ -571,6 +570,8 @@ class MSHR(implicit p: Parameters) extends L2Module { io.msInfo.bits.param := req.param io.msInfo.bits.mergeA := mergeA io.msInfo.bits.w_grantfirst := state.w_grantfirst + io.msInfo.bits.w_grantlast := state.w_grantlast + io.msInfo.bits.w_grant := state.w_grant io.msInfo.bits.s_refill := state.s_refill io.msInfo.bits.s_release := state.s_release io.msInfo.bits.s_cmoresp := true.B diff --git a/src/main/scala/coupledL2/tl2tl/MSHRCtl.scala b/src/main/scala/coupledL2/tl2tl/MSHRCtl.scala index ecaae3404..7202d67e5 100644 --- a/src/main/scala/coupledL2/tl2tl/MSHRCtl.scala +++ b/src/main/scala/coupledL2/tl2tl/MSHRCtl.scala @@ -138,7 +138,7 @@ class MSHRCtl(implicit p: Parameters) extends L2Module with HasPerfEvents { io.msInfo(i) := m.io.msInfo m.io.nestedwb := io.nestedwb - m.io.aMergeTask.valid := io.aMergeTask.valid && io.aMergeTask.bits.id === i.U + m.io.aMergeTask.valid := io.aMergeTask.valid && io.aMergeTask.bits.idOH(i) m.io.aMergeTask.bits := io.aMergeTask.bits.task } @@ -149,17 +149,17 @@ class MSHRCtl(implicit p: Parameters) extends L2Module with HasPerfEvents { /* Acquire downwards */ val acquireUnit = Module(new AcquireUnit()) - fastArb(mshrs.map(_.io.tasks.source_a), acquireUnit.io.task, Some("source_a")) + ArbPerf(twoLevelArb(mshrs.map(_.io.tasks.source_a), acquireUnit.io.task, Some("source_a")), "source_a_arb") io.sourceA <> acquireUnit.io.sourceA /* Probe upwards */ val sourceB = Module(new SourceB()) - fastArb(mshrs.map(_.io.tasks.source_b), sourceB.io.task, Some("source_b")) + ArbPerf(twoLevelArb(mshrs.map(_.io.tasks.source_b), sourceB.io.task, Some("source_b")), "source_b_arb") sourceB.io.grantStatus := io.grantStatus io.sourceB <> sourceB.io.sourceB /* Arbitrate MSHR task to RequestArbiter */ - fastArb(mshrs.map(_.io.tasks.mainpipe), io.mshrTask, Some("mshr_task")) + ArbPerf(twoLevelArb(mshrs.map(_.io.tasks.mainpipe), io.mshrTask, Some("mshr_task")), "mshr_task_arb") /* Arbitrate prefetchTrains to Prefetcher */ // prefetchOpt.foreach { diff --git a/src/main/scala/coupledL2/tl2tl/MainPipe.scala b/src/main/scala/coupledL2/tl2tl/MainPipe.scala index 3f96031ce..caaea8b20 100644 --- a/src/main/scala/coupledL2/tl2tl/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2tl/MainPipe.scala @@ -34,6 +34,9 @@ class MainPipe(implicit p: Parameters) extends L2Module with HasPerfEvents { val io = IO(new Bundle() { /* receive task from arbiter at stage 2 */ val taskFromArb_s2 = Flipped(ValidIO(new TaskBundle())) + /* receive s1 info for Hint */ + val mshrHintQInfo = Flipped(ValidIO(new TaskBundle)) + val sinkCHintQInfo = Flipped(ValidIO(new TaskBundle)) /* handle set conflict in req arb */ val fromReqArb = Input(new Bundle() { @@ -97,8 +100,6 @@ class MainPipe(implicit p: Parameters) extends L2Module with HasPerfEvents { /* send Hint to L1 */ val l1Hint = DecoupledIO(new L2ToL1Hint()) - /* receive s1 info for Hint */ - val taskInfo_s1 = Flipped(ValidIO(new TaskBundle())) /* send prefetchTrain to Prefetch to trigger a prefetch req */ val prefetchTrain = prefetchOpt.map(_ => DecoupledIO(new PrefetchTrain)) @@ -304,19 +305,16 @@ class MainPipe(implicit p: Parameters) extends L2Module with HasPerfEvents { val wen = wen_c || wen_mshr // This is to let io.toDS.req_s3.valid hold for 2 cycles (see DataStorage for details) - val task_s3_valid_hold2 = RegInit(0.U(2.W)) - when(task_s2.valid) { - task_s3_valid_hold2 := "b11".U - }.otherwise { - task_s3_valid_hold2 := task_s3_valid_hold2 >> 1.U - } + val task_s3_valid_hold2 = RegEnable(task_s2.valid, false.B, !RegNext(task_s2.valid)) - io.toDS.en_s3 := task_s3.valid && (ren || wen) - io.toDS.req_s3.valid := task_s3_valid_hold2(0) && (ren || wen) + io.toDS.en_s3 := task_s3.valid + io.toDS.req_s3.valid := task_s3_valid_hold2 io.toDS.req_s3.bits.way := Mux(mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, Mux(mshr_req_s3, req_s3.way, dirResult_s3.way)) - io.toDS.req_s3.bits.set := Mux(mshr_req_s3, req_s3.set, dirResult_s3.set) + // io.toDS.req_s3.bits.set := Mux(mshr_req_s3, req_s3.set, dirResult_s3.set) + io.toDS.req_s3.bits.set := req_s3.set io.toDS.req_s3.bits.wen := wen + io.toDS.req_s3.bits.ren := ren io.toDS.wdata_s3.data := Mux( !mshr_req_s3, c_releaseData_s3, // Among all sinkTasks, only C-Release writes DS @@ -404,7 +402,7 @@ class MainPipe(implicit p: Parameters) extends L2Module with HasPerfEvents { io.tagWReq.valid := task_s3.valid && req_s3.tagWen && mshr_refill_s3 && !retry io.tagWReq.bits.set := req_s3.set - io.tagWReq.bits.way := Mux(mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, req_s3.way) + io.tagWReq.bits.wayOH := UIntToOH(Mux(mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, req_s3.way)) io.tagWReq.bits.wtag := req_s3.tag /* ======== Interact with Channels (C & D) ======== */ @@ -547,7 +545,8 @@ class MainPipe(implicit p: Parameters) extends L2Module with HasPerfEvents { val customL1Hint = Module(new CustomL1Hint) - customL1Hint.io.s1 := io.taskInfo_s1 + customL1Hint.io.mshrHintQInfo := io.mshrHintQInfo + customL1Hint.io.sinkCHintQInfo := io.sinkCHintQInfo customL1Hint.io.s3.task := task_s3 // overwrite opcode: if sinkReq can respond, use sink_resp_s3.bits.opcode = Grant/GrantData diff --git a/src/main/scala/coupledL2/tl2tl/Slice.scala b/src/main/scala/coupledL2/tl2tl/Slice.scala index 88cf00a34..7f5356b03 100644 --- a/src/main/scala/coupledL2/tl2tl/Slice.scala +++ b/src/main/scala/coupledL2/tl2tl/Slice.scala @@ -108,7 +108,8 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] { mainPipe.io.releaseBufResp_s3.valid := RegNext(releaseBuf.io.r.valid, false.B) mainPipe.io.releaseBufResp_s3.bits := releaseBuf.io.resp.data mainPipe.io.fromReqArb.status_s1 := reqArb.io.status_s1 - mainPipe.io.taskInfo_s1 <> reqArb.io.taskInfo_s1 + mainPipe.io.mshrHintQInfo := reqArb.io.mshrHintQInfo + mainPipe.io.sinkCHintQInfo := reqArb.io.sinkCHintQInfo // priority: nested-ReleaseData / probeAckData [NEW] > mainPipe DS rdata [OLD] // 0/1 might happen at the same cycle with 2 diff --git a/src/main/scala/coupledL2/utils/Replacer.scala b/src/main/scala/coupledL2/utils/Replacer.scala index 9ca7c623a..598c658df 100644 --- a/src/main/scala/coupledL2/utils/Replacer.scala +++ b/src/main/scala/coupledL2/utils/Replacer.scala @@ -30,6 +30,7 @@ package coupledL2.utils import chisel3._ import chisel3.util._ import chisel3.util.random.LFSR +import utility.MaskToOH import freechips.rocketchip.util.{Random, UIntToAugmentedUInt} import freechips.rocketchip.util.property.cover @@ -330,28 +331,28 @@ class StaticRRIP(n_ways: Int) extends ReplacementPolicy { def access(touch_ways: Seq[Valid[UInt]]) = {} def get_next_state(state: UInt, touch_way: UInt) = 0.U //DontCare - override def get_next_state(state: UInt, touch_way: UInt, hit: Bool, invalid: Bool, req_type: UInt): UInt = { + override def get_next_state(state: UInt, touch_wayOH: UInt, hit: Bool, invalid: Bool, req_type: UInt): UInt = { val State = Wire(Vec(n_ways, UInt(2.W))) val nextState = Wire(Vec(n_ways, UInt(2.W))) State.zipWithIndex.map { case (e, i) => e := state(2*i+1,2*i) } // hit-Promotion, miss-Insertion & Aging - val increcement = 3.U(2.W) - State(touch_way) + val increcement = 3.U(2.W) - Mux1H(touch_wayOH, State) // req_type[3]: 0-firstuse, 1-reuse; req_type[2]: 0-acquire, 1-release; // req_type[1]: 0-non-prefetch, 1-prefetch; req_type[0]: 0-not-refill, 1-refill // rrpv: non-pref_hit/non-pref_refill(miss)/non-pref_release_reuse = 0; // pref_hit do nothing; pref_refill = 1; non-pref_release_firstuse/pref_release = 2; - nextState.zipWithIndex.map { case (e, i) => - e := Mux(i.U === touch_way, + nextState.zip(State).zip(touch_wayOH.asBools).map { case ((e, s), w) => + e := Mux(w, // for touch_way - MuxCase(State(i), Seq( + MuxCase(s, Seq( ((req_type(2,0) === 0.U && hit) || req_type(2,0) === 1.U || req_type === 12.U) -> 0.U, (req_type(2,0) === 3.U) -> 1.U, (req_type === 4.U || req_type(2,0) === 6.U) -> 2.U )), // for other ways - Mux(hit || invalid, State(i), State(i)+increcement) + Mux(hit || invalid, s, s+increcement) ) } Cat(nextState.map(x=>x).reverse) @@ -392,7 +393,7 @@ class BRRIP(n_ways: Int) extends ReplacementPolicy { def access(touch_ways: Seq[Valid[UInt]]) = {} def get_next_state(state: UInt, touch_way: UInt) = 0.U //DontCare - override def get_next_state(state: UInt, touch_way: UInt, hit: Bool, invalid: Bool, req_type: UInt): UInt = { + override def get_next_state(state: UInt, touch_wayOH: UInt, hit: Bool, invalid: Bool, req_type: UInt): UInt = { val State = Wire(Vec(n_ways, UInt(2.W))) val nextState = Wire(Vec(n_ways, UInt(2.W))) State.zipWithIndex.map { case (e, i) => @@ -400,21 +401,21 @@ class BRRIP(n_ways: Int) extends ReplacementPolicy { } // hit-Promotion, miss-Insertion & Aging - val increcement = 3.U(2.W) - State(touch_way) + val increcement = 3.U(2.W) - Mux1H(touch_wayOH, State) // req_type[3]: 0-firstuse, 1-reuse; req_type[2]: 0-acquire, 1-release; // req_type[1]: 0-non-prefetch, 1-prefetch; req_type[0]: 0-not-refill, 1-refill // rrpv: non-pref_hit/non-pref_refill(miss)/non-pref_release_reuse = 0; // pref_hit do nothing; pref_refill = 1; non-pref_release_firstuse/pref_release = 3; - nextState.zipWithIndex.map { case (e, i) => - e := Mux(i.U === touch_way, + nextState.zip(State).zip(touch_wayOH.asBools).map { case ((e, s), w) => + e := Mux(w, // for touch_way - MuxCase(State(i), Seq( + MuxCase(s, Seq( ((req_type(2,0) === 0.U && hit) || req_type(2,0) === 1.U || req_type === 12.U) -> 0.U, (req_type(2,0) === 3.U) -> 1.U, (req_type === 4.U || req_type(2,0) === 6.U) -> 3.U )), // for other ways - Mux(hit || invalid, State(i), State(i)+increcement) + Mux(hit || invalid, s, s+increcement) ) } /* val random = (rand.nextInt(32)).U @@ -465,8 +466,8 @@ class DRRIP(n_ways: Int) extends ReplacementPolicy { def hit = {} def get_next_state(state: UInt, touch_way: UInt) = 0.U //DontCare - override def get_next_state(state: UInt, touch_way: UInt, hit: Bool, invalid: Bool, chosen_type: Bool, req_type: UInt): UInt = { - Mux(chosen_type, repl_BRRIP.get_next_state(state, touch_way, hit, invalid, req_type), repl_SRRIP.get_next_state(state, touch_way, hit, invalid, req_type)) + override def get_next_state(state: UInt, touch_wayOH: UInt, hit: Bool, invalid: Bool, chosen_type: Bool, req_type: UInt): UInt = { + Mux(chosen_type, repl_BRRIP.get_next_state(state, touch_wayOH, hit, invalid, req_type), repl_SRRIP.get_next_state(state, touch_wayOH, hit, invalid, req_type)) } def get_replace_way(state: UInt): UInt = { val RRPVVec = Wire(Vec(n_ways, UInt(2.W))) @@ -482,7 +483,8 @@ class DRRIP(n_ways: Int) extends ReplacementPolicy { } e := !(isLarger.contains(true.B)) } - PriorityEncoder(lrrWayVec) + // PriorityEncoder(lrrWayVec) + MaskToOH(lrrWayVec.asUInt) } } \ No newline at end of file diff --git a/utility b/utility index 84a74fc30..822413bf6 160000 --- a/utility +++ b/utility @@ -1 +1 @@ -Subproject commit 84a74fc3021b7769a0b99bb74213a5c9bc3d48c2 +Subproject commit 822413bf6df085a468212366f0864026ede8cf28