From 2bd7cbd99e9da1ce9c1b96ba56b0a697aa5bf0b7 Mon Sep 17 00:00:00 2001 From: Ding Haonan Date: Thu, 26 Mar 2026 15:33:06 +0800 Subject: [PATCH 01/17] chore(build): update mill for JDK 21+ support (#481) --- .github/workflows/main.yml | 6 ++++-- .mill-version | 2 +- build.sc | 31 +++++++++++++++++++------------ 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 334fefc04..6bfe2d706 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -67,12 +67,14 @@ jobs: sudo bash install-verilator.sh - name: Setup Scala - uses: olafurpg/setup-scala@v10 + uses: olafurpg/setup-scala@v11 + with: + java-version: openjdk@1.17 - name: Setup Mill uses: jodersky/setup-mill@v0.2.3 with: - mill-version: 0.11.1 + mill-version: 0.12.3 # - name: Check scalafmt # run: make checkformat diff --git a/.mill-version b/.mill-version index af88ba824..d61567cd1 100644 --- a/.mill-version +++ b/.mill-version @@ -1 +1 @@ -0.11.1 +0.12.3 \ No newline at end of file diff --git a/build.sc b/build.sc index 79ac6da49..82da554b9 100644 --- a/build.sc +++ b/build.sc @@ -5,9 +5,8 @@ import os.Path import publish._ import $file.common import $file.`rocket-chip`.common -import $file.`rocket-chip`.common import $file.`rocket-chip`.cde.common -import $file.`rocket-chip`.hardfloat.build +import $file.`rocket-chip`.hardfloat.common val defaultScalaVersion = "2.13.15" @@ -16,7 +15,9 @@ def defaultVersions = Map( "chisel-plugin" -> ivy"org.chipsalliance:::chisel-plugin:7.0.0" ) -trait HasChisel extends ScalaModule { +val pwd = os.Path(sys.env("MILL_WORKSPACE_ROOT")) + +trait HasChisel extends SbtModule { def chiselModule: Option[ScalaModule] = None def chiselPluginJar: T[Option[PathRef]] = None @@ -37,18 +38,24 @@ trait HasChisel extends ScalaModule { object rocketchip extends `rocket-chip`.common.RocketChipModule with HasChisel { - val rcPath = os.pwd / "rocket-chip" + val rcPath = pwd / "rocket-chip" override def millSourcePath = rcPath def mainargsIvy = ivy"com.lihaoyi::mainargs:0.7.0" def json4sJacksonIvy = ivy"org.json4s::json4s-jackson:4.0.7" - object macros extends `rocket-chip`.common.MacrosModule with HasChisel { + object macros extends `rocket-chip`.common.MacrosModule with SbtModule { + + def scalaVersion: T[String] = T(defaultScalaVersion) + def scalaReflectIvy = ivy"org.scala-lang:scala-reflect:${scalaVersion}" } - object cde extends `rocket-chip`.cde.common.CDEModule with HasChisel { + object cde extends `rocket-chip`.cde.common.CDEModule with ScalaModule { + + def scalaVersion: T[String] = T(defaultScalaVersion) + override def millSourcePath = rcPath / "cde" / "cde" } @@ -64,8 +71,8 @@ object rocketchip extends `rocket-chip`.common.RocketChipModule with HasChisel { } -object utility extends SbtModule with HasChisel { - override def millSourcePath = os.pwd / "utility" +object utility extends HasChisel { + override def millSourcePath = pwd / "utility" override def moduleDeps = super.moduleDeps ++ Seq(rocketchip) @@ -74,14 +81,14 @@ object utility extends SbtModule with HasChisel { ) } -object huancun extends SbtModule with HasChisel { - override def millSourcePath = os.pwd / "HuanCun" +object huancun extends HasChisel { + override def millSourcePath = pwd / "HuanCun" override def moduleDeps = super.moduleDeps ++ Seq( rocketchip, utility ) } -object CoupledL2 extends SbtModule with HasChisel with millbuild.common.CoupledL2Module { +object CoupledL2 extends HasChisel with $file.common.CoupledL2Module { override def millSourcePath = millOuterCtx.millSourcePath @@ -91,7 +98,7 @@ object CoupledL2 extends SbtModule with HasChisel with millbuild.common.CoupledL def huancunModule: ScalaModule = huancun - object test extends SbtModuleTests with TestModule.ScalaTest + object test extends SbtTests with TestModule.ScalaTest override def scalacOptions = super.scalacOptions() ++ Agg("-deprecation", "-feature") From 6f3425129c6af0cece6dc7ebe730917175d16154 Mon Sep 17 00:00:00 2001 From: Zhu Yu Date: Sat, 7 Mar 2026 11:06:52 +0800 Subject: [PATCH 02/17] fix(AsyncBridge): handle lcredit return sequence safely * Add power handshake to confirm sink Tx Queues are drained before power-down * Change return l-credit in DEACTIVATE controlled by credits (from L2 not from Sink) --- .../coupledL2/tl2chi/chi/AsyncBridge.scala | 51 +++++++++++++++---- .../coupledL2/tl2chi/chi/LinkLayer.scala | 2 +- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala b/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala index 675a68b38..7d7947539 100644 --- a/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala +++ b/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala @@ -50,6 +50,12 @@ class AsyncPortIO( val tx = new AsyncDownwardsLinkIO(params) val rx = Flipped(new AsyncUpwardsLinkIO(params)) } + +class ChannelWithActive[T <: Data](gen: T) extends Bundle { + val channel = new ChannelIO(gen) + val active = Bool() +} + /* * This module enhances the standard async bridge by adding a front-end shadow buffer * to decouple local processing from asynchronous latency and provide instant credit @@ -124,8 +130,9 @@ object FromAsyncBundle { async: AsyncBundle[UInt], params: AsyncQueueParams = AsyncQueueParams(), name: Option[String] = None, - lcrdvReady: Option[Bool]= None - ) = { + lcrdvReady: Option[Bool]= None, + withPowerAck: Boolean = false + ): = Data { val gen = chiselTypeOf(async.mem.head) val out = Wire(new ChannelIO(gen)) val sink = Module(new AsyncQueueSink(gen, params)) @@ -137,7 +144,15 @@ object FromAsyncBundle { // flitpend and lcrdv are assigned independently out.flitpend := DontCare out.lcrdv := DontCare - out + // extend out with 'Active' to indicate sink Queue is NOT empty + if (withPowerAck) { + val result = Wire(new ChannelWithActive(gen)) + result.channel <> out + result.active := sink.io.deq.valid + result + } else { + out + } } def bitPulse[T <: Data]( @@ -232,14 +247,31 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit val async = Flipped(new AsyncPortIO(params)) val deq = new PortIO val resetFinish = Output(Bool()) + val powerAck = new Bundle { + val QACTIVE = Output(Bool()) + val QACCEPTn = Output(Bool()) + val QREQ = Input(Bool()) + } }) val txreq_lcrdvReady = Wire(Bool()) val txrsp_lcrdvReady = Wire(Bool()) val txdat_lcrdvReady = Wire(Bool()) - io.deq.tx.req <> FromAsyncBundle.channel(io.async.tx.req.flit, params, Some("txreq_flit"), Some(txreq_lcrdvReady)) - io.deq.tx.rsp <> FromAsyncBundle.channel(io.async.tx.rsp.flit, params, Some("txrsp_flit"), Some(txrsp_lcrdvReady)) - io.deq.tx.dat <> FromAsyncBundle.channel(io.async.tx.dat.flit, params, Some("txdat_flit"), Some(txdat_lcrdvReady)) + +// io.deq.tx.req <> FromAsyncBundle.channel(io.async.tx.req.flit, params, Some("txreq_flit"), Some(txreq_lcrdvReady)) +// io.deq.tx.rsp <> FromAsyncBundle.channel(io.async.tx.rsp.flit, params, Some("txrsp_flit"), Some(txrsp_lcrdvReady)) +// io.deq.tx.dat <> FromAsyncBundle.channel(io.async.tx.dat.flit, params, Some("txdat_flit"), Some(txdat_lcrdvReady)) + + val txreq = FromAsyncBundle.channel(io.async.tx.req.flit, params, Some("txreq_flit"), Some(txreq_lcrdvReady), true).asInstanceOf[ChannelWithActive[UInt]] + val txrsp = FromAsyncBundle.channel(io.async.tx.rsp.flit, params, Some("txrsp_flit"), Some(txrsp_lcrdvReady), true).asInstanceOf[ChannelWithActive[UInt]] + val txdat = FromAsyncBundle.channel(io.async.tx.dat.flit, params, Some("txdat_flit"), Some(txdat_lcrdvReady), true).asInstanceOf[ChannelWithActive[UInt]] + io.deq.tx.req <> txreq.channel + io.deq.tx.rsp <> txrsp.channel + io.deq.tx.dat <> txdat.channel + // Add handshake to confirm Sink Tx Queue is completely drained + val txActive = txreq.active || txrsp.active || txdat.active + io.powerAck.QACTIVE := txActive + io.powerAck.QACCEPTn := !(io.powerAck.QREQ && !txActive) io.async.tx.req.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.req.lcrdv, params, Some("txreq_lcrdv")) io.async.tx.rsp.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.rsp.lcrdv, params, Some("txrsp_lcrdv")) @@ -322,6 +354,7 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit For tx channel, add l-credit manager module to generate 'ready' to block tx flit to DownStream CHI a. The maximum number of L-Credits in tx channel is 4 inside bridge b. Use L-Credits number more than 4 in CoupledL2 to cover lcrdv sync delay from DownStream CHI to CoupledL2 + c. Normal flits are controlled by credits (from AsyncBridge Sink), Return flits in DEACTIVATE are controlled by credits (from L2) */ val txin = WireInit(0.U asTypeOf(Flipped(new DecoupledPortIO()))) //fake Decoupled IO to provide flitv val txout = WireInit(0.U asTypeOf(new PortIO))//fake LCredit IO to provide lcrdv @@ -334,9 +367,9 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit Decoupled2LCredit(txin.tx.req, txout.tx.req, LinkState(txState), Some("txreq")) Decoupled2LCredit(txin.tx.rsp, txout.tx.rsp, LinkState(txState), Some("txrsp")) Decoupled2LCredit(txin.tx.dat, txout.tx.dat, LinkState(txState), Some("txdat")) - txreq_lcrdvReady := txin.tx.req.ready - txrsp_lcrdvReady := txin.tx.rsp.ready - txdat_lcrdvReady := txin.tx.dat.ready + txreq_lcrdvReady := txin.tx.req.ready || txState === LinkStates.DEACTIVATE + txrsp_lcrdvReady := txin.tx.rsp.ready || txState === LinkStates.DEACTIVATE + txdat_lcrdvReady := txin.tx.dat.ready || txState === LinkStates.DEACTIVATE dontTouch(io) } diff --git a/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala b/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala index b939cfd4b..b747693ff 100644 --- a/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala +++ b/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala @@ -365,7 +365,7 @@ class LinkMonitor(implicit p: Parameters) extends L2Module with HasCHIOpcodes { ) io.out.syscoreq := RegNext(!exitco, init = false.B) - io.out.txsactive := RegNext(!exitcoDone, init = false.B) + io.out.txsactive := RegNext(!exitcoDone && io.out.syscoreq && io.out.syscoack, init = false.B) val retryAckCnt = RegInit(0.U(64.W)) val pCrdGrantCnt = RegInit(0.U(64.W)) From 114023588ec0b8cbf32c9d923d81676ec556bbe4 Mon Sep 17 00:00:00 2001 From: Zhu Yu Date: Sat, 7 Mar 2026 15:53:50 +0800 Subject: [PATCH 03/17] fix compile --- src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala b/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala index 7d7947539..2261a17be 100644 --- a/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala +++ b/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala @@ -132,7 +132,7 @@ object FromAsyncBundle { name: Option[String] = None, lcrdvReady: Option[Bool]= None, withPowerAck: Boolean = false - ): = Data { + ): Data = { val gen = chiselTypeOf(async.mem.head) val out = Wire(new ChannelIO(gen)) val sink = Module(new AsyncQueueSink(gen, params)) From 0fd8899ad28580bb0b912f452b4612355e98991a Mon Sep 17 00:00:00 2001 From: Zhu Yu Date: Sat, 7 Mar 2026 16:37:29 +0800 Subject: [PATCH 04/17] remove txsactive sysco related --- src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala b/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala index b747693ff..e4e10daaf 100644 --- a/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala +++ b/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala @@ -365,7 +365,7 @@ class LinkMonitor(implicit p: Parameters) extends L2Module with HasCHIOpcodes { ) io.out.syscoreq := RegNext(!exitco, init = false.B) - io.out.txsactive := RegNext(!exitcoDone && io.out.syscoreq && io.out.syscoack, init = false.B) + io.out.txsactive := RegNext(!exitcoDone, init = false.B) val retryAckCnt = RegInit(0.U(64.W)) val pCrdGrantCnt = RegInit(0.U(64.W)) From 6a6fccb0b5f9ab884e240359f0e5b4fbe210ac2b Mon Sep 17 00:00:00 2001 From: Zhu Yu Date: Thu, 26 Mar 2026 14:49:38 +0800 Subject: [PATCH 05/17] fix(AsyncBridge): extend lcrdv AsyncQueue depth to 16 --- .../scala/coupledL2/tl2chi/chi/AsyncBridge.scala | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala b/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala index 2261a17be..f4a62cae9 100644 --- a/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala +++ b/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala @@ -254,6 +254,9 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit } }) + val txState = RegInit(LinkStates.STOP) + val rxState = RegInit(LinkStates.STOP) + val txreq_lcrdvReady = Wire(Bool()) val txrsp_lcrdvReady = Wire(Bool()) val txdat_lcrdvReady = Wire(Bool()) @@ -271,11 +274,13 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit // Add handshake to confirm Sink Tx Queue is completely drained val txActive = txreq.active || txrsp.active || txdat.active io.powerAck.QACTIVE := txActive - io.powerAck.QACCEPTn := !(io.powerAck.QREQ && !txActive) + io.powerAck.QACCEPTn := !(io.powerAck.QREQ && !txActive && txState === LinkStates.STOP) - io.async.tx.req.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.req.lcrdv, params, Some("txreq_lcrdv")) - io.async.tx.rsp.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.rsp.lcrdv, params, Some("txrsp_lcrdv")) - io.async.tx.dat.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.dat.lcrdv, params, Some("txdat_lcrdv")) + //extend AsyncQueue depth to 16 for tx lcrdv + val queueParams = params.copy(depth = 16) + io.async.tx.req.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.req.lcrdv, queueParams, Some("txreq_lcrdv")) + io.async.tx.rsp.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.rsp.lcrdv, queueParams, Some("txrsp_lcrdv")) + io.async.tx.dat.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.dat.lcrdv, queueParams, Some("txdat_lcrdv")) val async_rx_rsp = ToAsyncBundleWithBuf.channel(io.deq.rx.rsp, params, Some("rxrsp_flit")) val async_rx_dat = ToAsyncBundleWithBuf.channel(io.deq.rx.dat, params, Some("rxdat_flit")) @@ -326,9 +331,6 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit /* Duplicate Link Monitor tx/rx state FSM by using deq.rx deq.tx active signals which outuput to DownStream CHI */ - val txState = RegInit(LinkStates.STOP) - val rxState = RegInit(LinkStates.STOP) - Seq(txState, rxState).zip(MixedVecInit(Seq(io.deq.tx, io.deq.rx))).foreach { case (state, link) => state := MuxLookup(Cat(link.linkactivereq, link.linkactiveack), LinkStates.STOP)(Seq( Cat(true.B, false.B) -> LinkStates.ACTIVATE, From c2bf2d6712a97086eb7a4cf2e818780e028c2668 Mon Sep 17 00:00:00 2001 From: Zhu Yu Date: Thu, 26 Mar 2026 14:53:33 +0800 Subject: [PATCH 06/17] fix(LinkLayer): Enable txactive after sysco handshake done --- src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala b/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala index e4e10daaf..c30ab96e9 100644 --- a/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala +++ b/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala @@ -357,6 +357,7 @@ class LinkMonitor(implicit p: Parameters) extends L2Module with HasCHIOpcodes { //exit coherecy + deactive tx/rx when l2 flush done val exitco = io.exitco.getOrElse(false.B) val exitcoDone = !io.out.syscoreq && !io.out.syscoack && RegNext(true.B, init = false.B) + val txActiveEnable = !exitcoDone && io.out.syscoreq && io.out.syscoack io.out.tx.linkactivereq := RegNext(!exitcoDone, init = false.B) io.out.rx.linkactiveack := RegNext( @@ -365,7 +366,7 @@ class LinkMonitor(implicit p: Parameters) extends L2Module with HasCHIOpcodes { ) io.out.syscoreq := RegNext(!exitco, init = false.B) - io.out.txsactive := RegNext(!exitcoDone, init = false.B) + io.out.txsactive := RegNext(txActiveEnable, init = false.B) val retryAckCnt = RegInit(0.U(64.W)) val pCrdGrantCnt = RegInit(0.U(64.W)) From 1d9123ffabfb6ce47c6c6ffe87277a0fbd56ea96 Mon Sep 17 00:00:00 2001 From: Zhu Yu Date: Thu, 26 Mar 2026 15:01:46 +0800 Subject: [PATCH 07/17] fix: CMOALL allocate MSHR, the tag should come from dirResult NOT from SinkA --- src/main/scala/coupledL2/Directory.scala | 2 +- src/main/scala/coupledL2/tl2chi/MainPipe.scala | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/scala/coupledL2/Directory.scala b/src/main/scala/coupledL2/Directory.scala index cbe3e2efa..968e16f90 100644 --- a/src/main/scala/coupledL2/Directory.scala +++ b/src/main/scala/coupledL2/Directory.scala @@ -284,7 +284,7 @@ class Directory(implicit p: Parameters) extends L2Module { chosenWay, PriorityEncoder(freeWayMask_s3) ) - val hit_s3 = Cat(hitVec).orR || req_s3.cmoAll + val hit_s3 = Cat(hitVec).orR || (req_s3.cmoAll && VecInit(metaAll_s3.map(_.state =/= MetaData.INVALID))(req_s3.cmoWay)) val way_s3 = Mux(req_s3.cmoAll, req_s3.cmoWay, Mux(hit_s3, hitWay, finalWay)) val meta_s3 = metaAll_s3(way_s3) val tag_s3 = tagAll_s3(way_s3) diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index 0a6811aa8..e6da39ef7 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -296,6 +296,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes io.toMSHRCtl.mshr_alloc_s3.bits.state := alloc_state io.toMSHRCtl.mshr_alloc_s3.bits.task match { case task => task := req_s3 + task.tag := Mux(io.cmoAllBlock.getOrElse(false.B), dirResult_s3.tag, req_s3.tag) task.bufIdx := 0.U(bufIdxBits.W) task.mshrTask := false.B task.aliasTask.foreach(_ := cache_alias) From 96250c2cf616bb1d5d62eda01e7f0c389c563a50 Mon Sep 17 00:00:00 2001 From: Zhu Yu Date: Thu, 26 Mar 2026 15:07:02 +0800 Subject: [PATCH 08/17] fix(SinkA): fix CMOALL should operate line by line --- src/main/scala/coupledL2/SinkA.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/coupledL2/SinkA.scala b/src/main/scala/coupledL2/SinkA.scala index 794d449d7..7a7839544 100644 --- a/src/main/scala/coupledL2/SinkA.scala +++ b/src/main/scala/coupledL2/SinkA.scala @@ -178,7 +178,7 @@ class SinkA(implicit p: Parameters) extends L2Module { }.otherwise { way.foreach { _ := wayVal + 1.U } } - when (mshrValid) { + when (!mshrValid) { state.foreach { _ := sCMOREQ } }.otherwise { state.foreach { _ := sWAITMSHR } From 96d436eb2764000da54f93979c3b166647824788 Mon Sep 17 00:00:00 2001 From: Zhu Yu Date: Thu, 26 Mar 2026 16:11:30 +0800 Subject: [PATCH 09/17] fix: remove change of lcrdv AsyncQueue depth change --- src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala b/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala index f4a62cae9..9be6b087f 100644 --- a/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala +++ b/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala @@ -276,11 +276,9 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit io.powerAck.QACTIVE := txActive io.powerAck.QACCEPTn := !(io.powerAck.QREQ && !txActive && txState === LinkStates.STOP) - //extend AsyncQueue depth to 16 for tx lcrdv - val queueParams = params.copy(depth = 16) - io.async.tx.req.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.req.lcrdv, queueParams, Some("txreq_lcrdv")) - io.async.tx.rsp.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.rsp.lcrdv, queueParams, Some("txrsp_lcrdv")) - io.async.tx.dat.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.dat.lcrdv, queueParams, Some("txdat_lcrdv")) + io.async.tx.req.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.req.lcrdv, params, Some("txreq_lcrdv")) + io.async.tx.rsp.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.rsp.lcrdv, params, Some("txrsp_lcrdv")) + io.async.tx.dat.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.dat.lcrdv, params, Some("txdat_lcrdv")) val async_rx_rsp = ToAsyncBundleWithBuf.channel(io.deq.rx.rsp, params, Some("rxrsp_flit")) val async_rx_dat = ToAsyncBundleWithBuf.channel(io.deq.rx.dat, params, Some("rxdat_flit")) From e6fde00244c205122bfb230338da02d0cf180890 Mon Sep 17 00:00:00 2001 From: Zhu Yu Date: Thu, 26 Mar 2026 17:58:34 +0800 Subject: [PATCH 10/17] fix(AsyncBridge): add shadow buffer(16) for tx lcrdv --- .../coupledL2/tl2chi/chi/AsyncBridge.scala | 33 +++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala b/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala index 9be6b087f..faa20bf93 100644 --- a/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala +++ b/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala @@ -98,6 +98,33 @@ object ToAsyncBundleWithBuf { (source.io.async, deqReady) } + + def bitPulse( + bit: Bool, + params: AsyncQueueParams = AsyncQueueParams(), + name: Option[String] = None + ) = { + /* + 1. Shadow Buffer (depth=16, flow mode for low latency) + */ + val shadow_buffer = Module(new Queue(Bool(), 16, flow = true, pipe = false)) + if (name.isDefined) { shadow_buffer.suggestName("lcrdvShadowBuffer_" + name.get) } + shadow_buffer.io.enq.valid := bit + shadow_buffer.io.enq.bits := DontCare + /* + 2. AsyncQueueSource (depth =4) + */ + val source = Module(new AsyncQueueSource(UInt(0.W), params)) + if (name.isDefined) { source.suggestName("asyncQBitSource_" + name.get) } + source.io.enq.valid := shadow_buffer.io.deq.valid + source.io.enq.bits := DontCare + + shadow_buffer.io.deq.ready := source.io.enq.ready + + source.io.async + + } + } object ToAsyncBundle { def channel[T <: Data]( @@ -276,9 +303,9 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit io.powerAck.QACTIVE := txActive io.powerAck.QACCEPTn := !(io.powerAck.QREQ && !txActive && txState === LinkStates.STOP) - io.async.tx.req.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.req.lcrdv, params, Some("txreq_lcrdv")) - io.async.tx.rsp.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.rsp.lcrdv, params, Some("txrsp_lcrdv")) - io.async.tx.dat.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.dat.lcrdv, params, Some("txdat_lcrdv")) + io.async.tx.req.lcrdv <> ToAsyncBundleWithBuf.bitPulse(io.deq.tx.req.lcrdv, params, Some("txreq_lcrdv")) + io.async.tx.rsp.lcrdv <> ToAsyncBundleWithBuf.bitPulse(io.deq.tx.rsp.lcrdv, params, Some("txrsp_lcrdv")) + io.async.tx.dat.lcrdv <> ToAsyncBundleWithBuf.bitPulse(io.deq.tx.dat.lcrdv, params, Some("txdat_lcrdv")) val async_rx_rsp = ToAsyncBundleWithBuf.channel(io.deq.rx.rsp, params, Some("rxrsp_flit")) val async_rx_dat = ToAsyncBundleWithBuf.channel(io.deq.rx.dat, params, Some("rxdat_flit")) From e2deb32e963bd33d73b714a2b04c8b22ccad547b Mon Sep 17 00:00:00 2001 From: Zhu Yu Date: Fri, 27 Mar 2026 21:29:42 +0800 Subject: [PATCH 11/17] fix(LinkLayer): txsactive enable should NOT wait for sysco done --- src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala b/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala index c30ab96e9..e4e10daaf 100644 --- a/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala +++ b/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala @@ -357,7 +357,6 @@ class LinkMonitor(implicit p: Parameters) extends L2Module with HasCHIOpcodes { //exit coherecy + deactive tx/rx when l2 flush done val exitco = io.exitco.getOrElse(false.B) val exitcoDone = !io.out.syscoreq && !io.out.syscoack && RegNext(true.B, init = false.B) - val txActiveEnable = !exitcoDone && io.out.syscoreq && io.out.syscoack io.out.tx.linkactivereq := RegNext(!exitcoDone, init = false.B) io.out.rx.linkactiveack := RegNext( @@ -366,7 +365,7 @@ class LinkMonitor(implicit p: Parameters) extends L2Module with HasCHIOpcodes { ) io.out.syscoreq := RegNext(!exitco, init = false.B) - io.out.txsactive := RegNext(txActiveEnable, init = false.B) + io.out.txsactive := RegNext(!exitcoDone, init = false.B) val retryAckCnt = RegInit(0.U(64.W)) val pCrdGrantCnt = RegInit(0.U(64.W)) From 036a0793b8ea50b29ee745aa367f1d4206d437f8 Mon Sep 17 00:00:00 2001 From: Zhu Yu Date: Fri, 27 Mar 2026 21:39:30 +0800 Subject: [PATCH 12/17] fix: add coherency gating at the transaction layer TXREQ arbiter - Add coEnable signal from LinkMonitor (coEnable = syscoreq && syscoack) - Gate TXREQ arbiter output with req_pass = coEnable || is_mmio - Apply bidirectional gating to both valid and ready signals: * txreq.valid = arb.out.valid && req_pass * arb.out.ready = txreq.ready && req_pass - mmio requests are always allowed regardless of coEnable state --- .../scala/coupledL2/tl2chi/TL2CHICoupledL2.scala | 12 ++++++++++-- src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala | 4 +++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala b/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala index 56947b114..dc9e1db9b 100644 --- a/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala +++ b/src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala @@ -125,6 +125,9 @@ class TL2CHICoupledL2(implicit p: Parameters) extends CoupledL2Base { ) } + //Coherency enable from Link Layer: when 1 cacheable request can be sent; otherwise they are gated + val coEnable = WireInit(false.B) + slices match { case slices: Seq[Slice] => // TXREQ @@ -132,7 +135,12 @@ class TL2CHICoupledL2(implicit p: Parameters) extends CoupledL2Base { val txreq = Wire(DecoupledIO(new CHIREQ)) slices.zip(txreq_arb.io.in.init).foreach { case (s, in) => in <> s.io.out.tx.req } txreq_arb.io.in.last <> mmio.io.tx.req - txreq <> txreq_arb.io.out + //Coherency Gating cacheable request, mmio always pass + val is_mmio = txreq_arb.io.chosen === slices.size.U + val req_pass = coEnable || is_mmio + txreq.valid := txreq_arb.io.out.valid && req_pass + txreq.bits := txreq_arb.io.out.bits + txreq_arb.io.out.ready := txreq.ready && req_pass txreq.bits.txnID := setSliceID(txreq_arb.io.out.bits.txnID, txreq_arb.io.chosen, mmio.io.tx.req.fire) // TXRSP @@ -267,7 +275,7 @@ class TL2CHICoupledL2(implicit p: Parameters) extends CoupledL2Base { linkMonitor.io.exitco.foreach { _ := Cat(slices.zipWithIndex.map { case (s, i) => s.io.l2FlushDone.getOrElse(false.B)}).andR && io_cpu_halt.getOrElse(false.B) } - + coEnable := linkMonitor.io.coEnable /** * performance counters */ diff --git a/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala b/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala index e4e10daaf..e9be325bb 100644 --- a/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala +++ b/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala @@ -328,6 +328,7 @@ class LinkMonitor(implicit p: Parameters) extends L2Module with HasCHIOpcodes { val in = Flipped(new DecoupledPortIO()) val out = new PortIO val nodeID = Input(UInt(NODEID_WIDTH.W)) + val coEnable = Output(Bool()) val exitco = Option.when(cacheParams.enableL2Flush) (Input(Bool())) }) // val s_stop :: s_activate :: s_run :: s_deactivate :: Nil = Enum(4) @@ -357,6 +358,7 @@ class LinkMonitor(implicit p: Parameters) extends L2Module with HasCHIOpcodes { //exit coherecy + deactive tx/rx when l2 flush done val exitco = io.exitco.getOrElse(false.B) val exitcoDone = !io.out.syscoreq && !io.out.syscoack && RegNext(true.B, init = false.B) + io.coEnable := io.out.syscoreq && io.out.syscoack io.out.tx.linkactivereq := RegNext(!exitcoDone, init = false.B) io.out.rx.linkactiveack := RegNext( @@ -365,7 +367,7 @@ class LinkMonitor(implicit p: Parameters) extends L2Module with HasCHIOpcodes { ) io.out.syscoreq := RegNext(!exitco, init = false.B) - io.out.txsactive := RegNext(!exitcoDone, init = false.B) + io.out.txsactive := RegNext(!exitcoDone, init = false.B) val retryAckCnt = RegInit(0.U(64.W)) val pCrdGrantCnt = RegInit(0.U(64.W)) From ed4a2a5436d298fff7c2620a10da1624a3ca046a Mon Sep 17 00:00:00 2001 From: Zhu Yu Date: Thu, 2 Apr 2026 18:49:20 +0800 Subject: [PATCH 13/17] fix(AsyncBridge): restore AsyncBridge performance commit * Shadow Buffer(32) in rx channel to receive flit from noc * When Shadow Buffer has > 16 space, reponse lcrdv right after flitv --- src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala b/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala index faa20bf93..73c34e2a3 100644 --- a/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala +++ b/src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala @@ -79,15 +79,15 @@ object ToAsyncBundleWithBuf { /* 1. Shadow Buffer (depth=16, flow mode for low latency) */ - val shadow_buffer = Module(new Queue(chiselTypeOf(chn.flit), 16, flow = true, pipe = false)) + val shadow_buffer = Module(new Queue(chiselTypeOf(chn.flit), 32, flow = true, pipe = false)) if (name.isDefined) { shadow_buffer.suggestName("shadowBuffer_" + name.get) } shadow_buffer.io.enq.valid := chn.flitv shadow_buffer.io.enq.bits := chn.flit /* 2. For rx channel (CMN->L2), send out lcrdv right after a flit entering Shadow buffer if has space */ - val deqReady = shadow_buffer.io.deq.ready - dontTouch(deqReady) + val hasSpace = shadow_buffer.io.count <= 16.U + dontTouch(hasSpace) assert(!chn.flitv || shadow_buffer.io.enq.ready, s"${name.getOrElse("ToAsyncBundle")}: Shadow buffer overflow!") /* 3. AsyncQueueSource (depth=4) @@ -96,7 +96,7 @@ object ToAsyncBundleWithBuf { if (name.isDefined) { source.suggestName("asyncQSource_" + name.get) } source.io.enq <> shadow_buffer.io.deq - (source.io.async, deqReady) + (source.io.async, hasSpace) } def bitPulse( From 5f3d78a3144a16983b9bb1990ae870f3be5683cb Mon Sep 17 00:00:00 2001 From: Zhu Yu Date: Wed, 8 Apr 2026 18:37:44 +0800 Subject: [PATCH 14/17] fix(L2Param): change enableCHIAsyncBridge params type --- src/main/scala/coupledL2/L2Param.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/coupledL2/L2Param.scala b/src/main/scala/coupledL2/L2Param.scala index 764c5dd29..afcaf8d2c 100644 --- a/src/main/scala/coupledL2/L2Param.scala +++ b/src/main/scala/coupledL2/L2Param.scala @@ -103,7 +103,7 @@ case class L2Param( // L2 Flush enableL2Flush: Boolean = false, // AsyncBridge - enableCHIAsyncBridge: Option[Boolean] = None, + enableCHIAsyncBridge: Boolean = true, // Performance analysis enablePerf: Boolean = true, // RollingDB From 811939fb50e9233b9b89cfaa405e06b06eaa71e7 Mon Sep 17 00:00:00 2001 From: Zhu Yu Date: Fri, 3 Apr 2026 11:18:25 +0800 Subject: [PATCH 15/17] fix(Linklayer): add Tx shadow buffer to cover lcrdv latency from AsyncBridge --- .../scala/coupledL2/tl2chi/chi/LinkLayer.scala | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala b/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala index e9be325bb..2aa9cfc2b 100644 --- a/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala +++ b/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala @@ -264,6 +264,9 @@ class Decoupled2LCredit[T <: Bundle]( val out = ChannelIO(gen.cloneType) val state = Input(new LinkState()) }) + // Shadow Buffer (depth=32, flow mode for low latency) + val shadow_buffer = Module(new Queue(gen, 32, flow = true, pipe = false)) + shadow_buffer.io.enq <> io.in val out = Wire(io.out.cloneType) @@ -279,8 +282,11 @@ class Decoupled2LCredit[T <: Bundle]( val lcreditsMaxAll = lcreditsMax + overlcreditVal val lcreditPool = RegInit(overlcreditVal.U(log2Up(lcreditsMaxAll+1).W)) - val returnLCreditValid = !io.in.valid && state === LinkStates.DEACTIVATE && lcreditPool =/= overlcreditVal.U - val flitv = io.in.fire || returnLCreditValid + val returnLCreditValid = !shadow_buffer.io.deq.valid && state === LinkStates.DEACTIVATE && lcreditPool =/= overlcreditVal.U + val can_issue_flit = shadow_buffer.io.deq.valid && lcreditPool=/=0.U && !disableFlit + shadow_buffer.io.deq.ready := can_issue_flit + + val flitv = can_issue_flit || returnLCreditValid when (acceptLCredit) { when (!flitv) { @@ -293,12 +299,10 @@ class Decoupled2LCredit[T <: Bundle]( } } - io.in.ready := lcreditPool =/= 0.U && !disableFlit - io.out <> out out.flitpend := RegNext(true.B, init = false.B) // TODO out.flitv := RegNext(flitv, init = false.B) - out.flit := RegEnable(Mux(io.in.valid, Cat(io.in.bits.getElements.map(_.asUInt)), 0.U /* LCrdReturn */), flitv) + out.flit := RegEnable(Mux(shadow_buffer.io.deq.valid, Cat(shadow_buffer.io.deq.bits.getElements.map(_.asUInt)), 0.U /* LCrdReturn */), flitv) /** * performance counters From 98925d1b2424196a1c3c375d42dd0635cee93d2a Mon Sep 17 00:00:00 2001 From: Zhu Yu Date: Wed, 8 Apr 2026 18:49:16 +0800 Subject: [PATCH 16/17] Revert "fix(Linklayer): add Tx shadow buffer to cover lcrdv latency from AsyncBridge" This reverts commit 5f59e1f5d4500ebdd9c8583a187c9382c6988912. --- .../scala/coupledL2/tl2chi/chi/LinkLayer.scala | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala b/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala index 2aa9cfc2b..e9be325bb 100644 --- a/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala +++ b/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala @@ -264,9 +264,6 @@ class Decoupled2LCredit[T <: Bundle]( val out = ChannelIO(gen.cloneType) val state = Input(new LinkState()) }) - // Shadow Buffer (depth=32, flow mode for low latency) - val shadow_buffer = Module(new Queue(gen, 32, flow = true, pipe = false)) - shadow_buffer.io.enq <> io.in val out = Wire(io.out.cloneType) @@ -282,11 +279,8 @@ class Decoupled2LCredit[T <: Bundle]( val lcreditsMaxAll = lcreditsMax + overlcreditVal val lcreditPool = RegInit(overlcreditVal.U(log2Up(lcreditsMaxAll+1).W)) - val returnLCreditValid = !shadow_buffer.io.deq.valid && state === LinkStates.DEACTIVATE && lcreditPool =/= overlcreditVal.U - val can_issue_flit = shadow_buffer.io.deq.valid && lcreditPool=/=0.U && !disableFlit - shadow_buffer.io.deq.ready := can_issue_flit - - val flitv = can_issue_flit || returnLCreditValid + val returnLCreditValid = !io.in.valid && state === LinkStates.DEACTIVATE && lcreditPool =/= overlcreditVal.U + val flitv = io.in.fire || returnLCreditValid when (acceptLCredit) { when (!flitv) { @@ -299,10 +293,12 @@ class Decoupled2LCredit[T <: Bundle]( } } + io.in.ready := lcreditPool =/= 0.U && !disableFlit + io.out <> out out.flitpend := RegNext(true.B, init = false.B) // TODO out.flitv := RegNext(flitv, init = false.B) - out.flit := RegEnable(Mux(shadow_buffer.io.deq.valid, Cat(shadow_buffer.io.deq.bits.getElements.map(_.asUInt)), 0.U /* LCrdReturn */), flitv) + out.flit := RegEnable(Mux(io.in.valid, Cat(io.in.bits.getElements.map(_.asUInt)), 0.U /* LCrdReturn */), flitv) /** * performance counters From 222bd71f1e2f351ae368dccf618556e868665e63 Mon Sep 17 00:00:00 2001 From: Zhu Yu Date: Thu, 9 Apr 2026 11:00:49 +0800 Subject: [PATCH 17/17] fix(LinkLayer): fix when params enableCHIAsyncBridge remove Option atrribute --- src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala b/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala index e9be325bb..f4b9aac81 100644 --- a/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala +++ b/src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala @@ -274,7 +274,7 @@ class Decoupled2LCredit[T <: Bundle]( // The maximum number of L-Credits that a receiver can provide is 15. val lcreditsMax = 15 - val enableCHIAsync = cacheParams.enableCHIAsyncBridge.getOrElse(false) + val enableCHIAsync = cacheParams.enableCHIAsyncBridge val overlcreditVal = if(enableCHIAsync) overlcreditNum.getOrElse(0) else 0 val lcreditsMaxAll = lcreditsMax + overlcreditVal val lcreditPool = RegInit(overlcreditVal.U(log2Up(lcreditsMaxAll+1).W))