Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
2bd7cbd
chore(build): update mill for JDK 21+ support (#481)
Kumonda221-CrO3 Mar 26, 2026
6f34251
fix(AsyncBridge): handle lcredit return sequence safely
yulightenyu Mar 7, 2026
1140235
fix compile
yulightenyu Mar 7, 2026
0fd8899
remove txsactive sysco related
yulightenyu Mar 7, 2026
6a6fccb
fix(AsyncBridge): extend lcrdv AsyncQueue depth to 16
yulightenyu Mar 26, 2026
c2bf2d6
fix(LinkLayer): Enable txactive after sysco handshake done
yulightenyu Mar 26, 2026
1d9123f
fix: CMOALL allocate MSHR, the tag should come from dirResult NOT fro…
yulightenyu Mar 26, 2026
96250c2
fix(SinkA): fix CMOALL should operate line by line
yulightenyu Mar 26, 2026
96d436e
fix: remove change of lcrdv AsyncQueue depth change
yulightenyu Mar 26, 2026
e6fde00
fix(AsyncBridge): add shadow buffer(16) for tx lcrdv
yulightenyu Mar 26, 2026
e2deb32
fix(LinkLayer): txsactive enable should NOT wait for sysco done
yulightenyu Mar 27, 2026
036a079
fix: add coherency gating at the transaction layer TXREQ arbiter
yulightenyu Mar 27, 2026
ed4a2a5
fix(AsyncBridge): restore AsyncBridge performance commit
yulightenyu Apr 2, 2026
5f3d78a
fix(L2Param): change enableCHIAsyncBridge params type
yulightenyu Apr 8, 2026
811939f
fix(Linklayer): add Tx shadow buffer to cover lcrdv latency from Asyn…
yulightenyu Apr 3, 2026
98925d1
Revert "fix(Linklayer): add Tx shadow buffer to cover lcrdv latency f…
yulightenyu Apr 8, 2026
222bd71
fix(LinkLayer): fix when params enableCHIAsyncBridge remove Option at…
yulightenyu Apr 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,14 @@ jobs:
sudo bash install-verilator.sh

- name: Setup Scala
uses: olafurpg/setup-scala@v10
uses: olafurpg/setup-scala@v11
with:
java-version: openjdk@1.17

- name: Setup Mill
uses: jodersky/setup-mill@v0.2.3
with:
mill-version: 0.11.1
mill-version: 0.12.3

# - name: Check scalafmt
# run: make checkformat
Expand Down
2 changes: 1 addition & 1 deletion .mill-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.11.1
0.12.3
31 changes: 19 additions & 12 deletions build.sc
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@ import os.Path
import publish._
import $file.common
import $file.`rocket-chip`.common
import $file.`rocket-chip`.common
import $file.`rocket-chip`.cde.common
import $file.`rocket-chip`.hardfloat.build
import $file.`rocket-chip`.hardfloat.common

val defaultScalaVersion = "2.13.15"

Expand All @@ -16,7 +15,9 @@ def defaultVersions = Map(
"chisel-plugin" -> ivy"org.chipsalliance:::chisel-plugin:7.0.0"
)

trait HasChisel extends ScalaModule {
val pwd = os.Path(sys.env("MILL_WORKSPACE_ROOT"))

trait HasChisel extends SbtModule {
def chiselModule: Option[ScalaModule] = None

def chiselPluginJar: T[Option[PathRef]] = None
Expand All @@ -37,18 +38,24 @@ trait HasChisel extends ScalaModule {

object rocketchip extends `rocket-chip`.common.RocketChipModule with HasChisel {

val rcPath = os.pwd / "rocket-chip"
val rcPath = pwd / "rocket-chip"
override def millSourcePath = rcPath

def mainargsIvy = ivy"com.lihaoyi::mainargs:0.7.0"

def json4sJacksonIvy = ivy"org.json4s::json4s-jackson:4.0.7"

object macros extends `rocket-chip`.common.MacrosModule with HasChisel {
object macros extends `rocket-chip`.common.MacrosModule with SbtModule {

def scalaVersion: T[String] = T(defaultScalaVersion)

def scalaReflectIvy = ivy"org.scala-lang:scala-reflect:${scalaVersion}"
}

object cde extends `rocket-chip`.cde.common.CDEModule with HasChisel {
object cde extends `rocket-chip`.cde.common.CDEModule with ScalaModule {

def scalaVersion: T[String] = T(defaultScalaVersion)

override def millSourcePath = rcPath / "cde" / "cde"
}

Expand All @@ -64,8 +71,8 @@ object rocketchip extends `rocket-chip`.common.RocketChipModule with HasChisel {

}

object utility extends SbtModule with HasChisel {
override def millSourcePath = os.pwd / "utility"
object utility extends HasChisel {
override def millSourcePath = pwd / "utility"

override def moduleDeps = super.moduleDeps ++ Seq(rocketchip)

Expand All @@ -74,14 +81,14 @@ object utility extends SbtModule with HasChisel {
)
}

object huancun extends SbtModule with HasChisel {
override def millSourcePath = os.pwd / "HuanCun"
object huancun extends HasChisel {
override def millSourcePath = pwd / "HuanCun"
override def moduleDeps = super.moduleDeps ++ Seq(
rocketchip, utility
)
}

object CoupledL2 extends SbtModule with HasChisel with millbuild.common.CoupledL2Module {
object CoupledL2 extends HasChisel with $file.common.CoupledL2Module {

override def millSourcePath = millOuterCtx.millSourcePath

Expand All @@ -91,7 +98,7 @@ object CoupledL2 extends SbtModule with HasChisel with millbuild.common.CoupledL

def huancunModule: ScalaModule = huancun

object test extends SbtModuleTests with TestModule.ScalaTest
object test extends SbtTests with TestModule.ScalaTest

override def scalacOptions = super.scalacOptions() ++ Agg("-deprecation", "-feature")

Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/coupledL2/Directory.scala
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ class Directory(implicit p: Parameters) extends L2Module {
chosenWay,
PriorityEncoder(freeWayMask_s3)
)
val hit_s3 = Cat(hitVec).orR || req_s3.cmoAll
val hit_s3 = Cat(hitVec).orR || (req_s3.cmoAll && VecInit(metaAll_s3.map(_.state =/= MetaData.INVALID))(req_s3.cmoWay))
val way_s3 = Mux(req_s3.cmoAll, req_s3.cmoWay, Mux(hit_s3, hitWay, finalWay))
val meta_s3 = metaAll_s3(way_s3)
val tag_s3 = tagAll_s3(way_s3)
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/coupledL2/L2Param.scala
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ case class L2Param(
// L2 Flush
enableL2Flush: Boolean = false,
// AsyncBridge
enableCHIAsyncBridge: Option[Boolean] = None,
enableCHIAsyncBridge: Boolean = true,
// Performance analysis
enablePerf: Boolean = true,
// RollingDB
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/coupledL2/SinkA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ class SinkA(implicit p: Parameters) extends L2Module {
}.otherwise {
way.foreach { _ := wayVal + 1.U }
}
when (mshrValid) {
when (!mshrValid) {
state.foreach { _ := sCMOREQ }
}.otherwise {
state.foreach { _ := sWAITMSHR }
Expand Down
1 change: 1 addition & 0 deletions src/main/scala/coupledL2/tl2chi/MainPipe.scala
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes
io.toMSHRCtl.mshr_alloc_s3.bits.state := alloc_state
io.toMSHRCtl.mshr_alloc_s3.bits.task match { case task =>
task := req_s3
task.tag := Mux(io.cmoAllBlock.getOrElse(false.B), dirResult_s3.tag, req_s3.tag)
task.bufIdx := 0.U(bufIdxBits.W)
task.mshrTask := false.B
task.aliasTask.foreach(_ := cache_alias)
Expand Down
12 changes: 10 additions & 2 deletions src/main/scala/coupledL2/tl2chi/TL2CHICoupledL2.scala
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,22 @@ class TL2CHICoupledL2(implicit p: Parameters) extends CoupledL2Base {
)
}

//Coherency enable from Link Layer: when 1 cacheable request can be sent; otherwise they are gated
val coEnable = WireInit(false.B)

slices match {
case slices: Seq[Slice] =>
// TXREQ
val txreq_arb = Module(new RRArbiterInit(new CHIREQ, slices.size + 1)) // plus 1 for MMIO
val txreq = Wire(DecoupledIO(new CHIREQ))
slices.zip(txreq_arb.io.in.init).foreach { case (s, in) => in <> s.io.out.tx.req }
txreq_arb.io.in.last <> mmio.io.tx.req
txreq <> txreq_arb.io.out
//Coherency Gating cacheable request, mmio always pass
val is_mmio = txreq_arb.io.chosen === slices.size.U
val req_pass = coEnable || is_mmio
txreq.valid := txreq_arb.io.out.valid && req_pass
txreq.bits := txreq_arb.io.out.bits
txreq_arb.io.out.ready := txreq.ready && req_pass
txreq.bits.txnID := setSliceID(txreq_arb.io.out.bits.txnID, txreq_arb.io.chosen, mmio.io.tx.req.fire)

// TXRSP
Expand Down Expand Up @@ -267,7 +275,7 @@ class TL2CHICoupledL2(implicit p: Parameters) extends CoupledL2Base {
linkMonitor.io.exitco.foreach { _ :=
Cat(slices.zipWithIndex.map { case (s, i) => s.io.l2FlushDone.getOrElse(false.B)}).andR && io_cpu_halt.getOrElse(false.B)
}

coEnable := linkMonitor.io.coEnable
/**
* performance counters
*/
Expand Down
98 changes: 79 additions & 19 deletions src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ class AsyncPortIO(
val tx = new AsyncDownwardsLinkIO(params)
val rx = Flipped(new AsyncUpwardsLinkIO(params))
}

class ChannelWithActive[T <: Data](gen: T) extends Bundle {
val channel = new ChannelIO(gen)
val active = Bool()
}

/*
* This module enhances the standard async bridge by adding a front-end shadow buffer
* to decouple local processing from asynchronous latency and provide instant credit
Expand All @@ -73,15 +79,15 @@ object ToAsyncBundleWithBuf {
/*
1. Shadow Buffer (depth=16, flow mode for low latency)
*/
val shadow_buffer = Module(new Queue(chiselTypeOf(chn.flit), 16, flow = true, pipe = false))
val shadow_buffer = Module(new Queue(chiselTypeOf(chn.flit), 32, flow = true, pipe = false))
if (name.isDefined) { shadow_buffer.suggestName("shadowBuffer_" + name.get) }
shadow_buffer.io.enq.valid := chn.flitv
shadow_buffer.io.enq.bits := chn.flit
/*
2. For rx channel (CMN->L2), send out lcrdv right after a flit entering Shadow buffer if has space
*/
val deqReady = shadow_buffer.io.deq.ready
dontTouch(deqReady)
val hasSpace = shadow_buffer.io.count <= 16.U
dontTouch(hasSpace)
assert(!chn.flitv || shadow_buffer.io.enq.ready, s"${name.getOrElse("ToAsyncBundle")}: Shadow buffer overflow!")
/*
3. AsyncQueueSource (depth=4)
Expand All @@ -90,8 +96,35 @@ object ToAsyncBundleWithBuf {
if (name.isDefined) { source.suggestName("asyncQSource_" + name.get) }
source.io.enq <> shadow_buffer.io.deq

(source.io.async, deqReady)
(source.io.async, hasSpace)
}

def bitPulse(
bit: Bool,
params: AsyncQueueParams = AsyncQueueParams(),
name: Option[String] = None
) = {
/*
1. Shadow Buffer (depth=16, flow mode for low latency)
*/
val shadow_buffer = Module(new Queue(Bool(), 16, flow = true, pipe = false))
if (name.isDefined) { shadow_buffer.suggestName("lcrdvShadowBuffer_" + name.get) }
shadow_buffer.io.enq.valid := bit
shadow_buffer.io.enq.bits := DontCare
/*
2. AsyncQueueSource (depth =4)
*/
val source = Module(new AsyncQueueSource(UInt(0.W), params))
if (name.isDefined) { source.suggestName("asyncQBitSource_" + name.get) }
source.io.enq.valid := shadow_buffer.io.deq.valid
source.io.enq.bits := DontCare

shadow_buffer.io.deq.ready := source.io.enq.ready

source.io.async

}

}
object ToAsyncBundle {
def channel[T <: Data](
Expand Down Expand Up @@ -124,8 +157,9 @@ object FromAsyncBundle {
async: AsyncBundle[UInt],
params: AsyncQueueParams = AsyncQueueParams(),
name: Option[String] = None,
lcrdvReady: Option[Bool]= None
) = {
lcrdvReady: Option[Bool]= None,
withPowerAck: Boolean = false
): Data = {
val gen = chiselTypeOf(async.mem.head)
val out = Wire(new ChannelIO(gen))
val sink = Module(new AsyncQueueSink(gen, params))
Expand All @@ -137,7 +171,15 @@ object FromAsyncBundle {
// flitpend and lcrdv are assigned independently
out.flitpend := DontCare
out.lcrdv := DontCare
out
// extend out with 'Active' to indicate sink Queue is NOT empty
if (withPowerAck) {
val result = Wire(new ChannelWithActive(gen))
result.channel <> out
result.active := sink.io.deq.valid
result
} else {
out
}
}

def bitPulse[T <: Data](
Expand Down Expand Up @@ -232,18 +274,38 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit
val async = Flipped(new AsyncPortIO(params))
val deq = new PortIO
val resetFinish = Output(Bool())
val powerAck = new Bundle {
val QACTIVE = Output(Bool())
val QACCEPTn = Output(Bool())
val QREQ = Input(Bool())
}
})

val txState = RegInit(LinkStates.STOP)
val rxState = RegInit(LinkStates.STOP)

val txreq_lcrdvReady = Wire(Bool())
val txrsp_lcrdvReady = Wire(Bool())
val txdat_lcrdvReady = Wire(Bool())
io.deq.tx.req <> FromAsyncBundle.channel(io.async.tx.req.flit, params, Some("txreq_flit"), Some(txreq_lcrdvReady))
io.deq.tx.rsp <> FromAsyncBundle.channel(io.async.tx.rsp.flit, params, Some("txrsp_flit"), Some(txrsp_lcrdvReady))
io.deq.tx.dat <> FromAsyncBundle.channel(io.async.tx.dat.flit, params, Some("txdat_flit"), Some(txdat_lcrdvReady))

io.async.tx.req.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.req.lcrdv, params, Some("txreq_lcrdv"))
io.async.tx.rsp.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.rsp.lcrdv, params, Some("txrsp_lcrdv"))
io.async.tx.dat.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.dat.lcrdv, params, Some("txdat_lcrdv"))
// io.deq.tx.req <> FromAsyncBundle.channel(io.async.tx.req.flit, params, Some("txreq_flit"), Some(txreq_lcrdvReady))
// io.deq.tx.rsp <> FromAsyncBundle.channel(io.async.tx.rsp.flit, params, Some("txrsp_flit"), Some(txrsp_lcrdvReady))
// io.deq.tx.dat <> FromAsyncBundle.channel(io.async.tx.dat.flit, params, Some("txdat_flit"), Some(txdat_lcrdvReady))

val txreq = FromAsyncBundle.channel(io.async.tx.req.flit, params, Some("txreq_flit"), Some(txreq_lcrdvReady), true).asInstanceOf[ChannelWithActive[UInt]]
val txrsp = FromAsyncBundle.channel(io.async.tx.rsp.flit, params, Some("txrsp_flit"), Some(txrsp_lcrdvReady), true).asInstanceOf[ChannelWithActive[UInt]]
val txdat = FromAsyncBundle.channel(io.async.tx.dat.flit, params, Some("txdat_flit"), Some(txdat_lcrdvReady), true).asInstanceOf[ChannelWithActive[UInt]]
io.deq.tx.req <> txreq.channel
io.deq.tx.rsp <> txrsp.channel
io.deq.tx.dat <> txdat.channel
// Add handshake to confirm Sink Tx Queue is completely drained
val txActive = txreq.active || txrsp.active || txdat.active
io.powerAck.QACTIVE := txActive
io.powerAck.QACCEPTn := !(io.powerAck.QREQ && !txActive && txState === LinkStates.STOP)

io.async.tx.req.lcrdv <> ToAsyncBundleWithBuf.bitPulse(io.deq.tx.req.lcrdv, params, Some("txreq_lcrdv"))
io.async.tx.rsp.lcrdv <> ToAsyncBundleWithBuf.bitPulse(io.deq.tx.rsp.lcrdv, params, Some("txrsp_lcrdv"))
io.async.tx.dat.lcrdv <> ToAsyncBundleWithBuf.bitPulse(io.deq.tx.dat.lcrdv, params, Some("txdat_lcrdv"))

val async_rx_rsp = ToAsyncBundleWithBuf.channel(io.deq.rx.rsp, params, Some("rxrsp_flit"))
val async_rx_dat = ToAsyncBundleWithBuf.channel(io.deq.rx.dat, params, Some("rxdat_flit"))
Expand Down Expand Up @@ -294,9 +356,6 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit
/*
Duplicate Link Monitor tx/rx state FSM by using deq.rx deq.tx active signals which outuput to DownStream CHI
*/
val txState = RegInit(LinkStates.STOP)
val rxState = RegInit(LinkStates.STOP)

Seq(txState, rxState).zip(MixedVecInit(Seq(io.deq.tx, io.deq.rx))).foreach { case (state, link) =>
state := MuxLookup(Cat(link.linkactivereq, link.linkactiveack), LinkStates.STOP)(Seq(
Cat(true.B, false.B) -> LinkStates.ACTIVATE,
Expand All @@ -322,6 +381,7 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit
For tx channel, add l-credit manager module to generate 'ready' to block tx flit to DownStream CHI
a. The maximum number of L-Credits in tx channel is 4 inside bridge
b. Use L-Credits number more than 4 in CoupledL2 to cover lcrdv sync delay from DownStream CHI to CoupledL2
c. Normal flits are controlled by credits (from AsyncBridge Sink), Return flits in DEACTIVATE are controlled by credits (from L2)
*/
val txin = WireInit(0.U asTypeOf(Flipped(new DecoupledPortIO()))) //fake Decoupled IO to provide flitv
val txout = WireInit(0.U asTypeOf(new PortIO))//fake LCredit IO to provide lcrdv
Expand All @@ -334,9 +394,9 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit
Decoupled2LCredit(txin.tx.req, txout.tx.req, LinkState(txState), Some("txreq"))
Decoupled2LCredit(txin.tx.rsp, txout.tx.rsp, LinkState(txState), Some("txrsp"))
Decoupled2LCredit(txin.tx.dat, txout.tx.dat, LinkState(txState), Some("txdat"))
txreq_lcrdvReady := txin.tx.req.ready
txrsp_lcrdvReady := txin.tx.rsp.ready
txdat_lcrdvReady := txin.tx.dat.ready
txreq_lcrdvReady := txin.tx.req.ready || txState === LinkStates.DEACTIVATE
txrsp_lcrdvReady := txin.tx.rsp.ready || txState === LinkStates.DEACTIVATE
txdat_lcrdvReady := txin.tx.dat.ready || txState === LinkStates.DEACTIVATE

dontTouch(io)
}
4 changes: 3 additions & 1 deletion src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ class Decoupled2LCredit[T <: Bundle](

// The maximum number of L-Credits that a receiver can provide is 15.
val lcreditsMax = 15
val enableCHIAsync = cacheParams.enableCHIAsyncBridge.getOrElse(false)
val enableCHIAsync = cacheParams.enableCHIAsyncBridge
val overlcreditVal = if(enableCHIAsync) overlcreditNum.getOrElse(0) else 0
val lcreditsMaxAll = lcreditsMax + overlcreditVal
val lcreditPool = RegInit(overlcreditVal.U(log2Up(lcreditsMaxAll+1).W))
Expand Down Expand Up @@ -328,6 +328,7 @@ class LinkMonitor(implicit p: Parameters) extends L2Module with HasCHIOpcodes {
val in = Flipped(new DecoupledPortIO())
val out = new PortIO
val nodeID = Input(UInt(NODEID_WIDTH.W))
val coEnable = Output(Bool())
val exitco = Option.when(cacheParams.enableL2Flush) (Input(Bool()))
})
// val s_stop :: s_activate :: s_run :: s_deactivate :: Nil = Enum(4)
Expand Down Expand Up @@ -357,6 +358,7 @@ class LinkMonitor(implicit p: Parameters) extends L2Module with HasCHIOpcodes {
//exit coherecy + deactive tx/rx when l2 flush done
val exitco = io.exitco.getOrElse(false.B)
val exitcoDone = !io.out.syscoreq && !io.out.syscoack && RegNext(true.B, init = false.B)
io.coEnable := io.out.syscoreq && io.out.syscoack

io.out.tx.linkactivereq := RegNext(!exitcoDone, init = false.B)
io.out.rx.linkactiveack := RegNext(
Expand Down
Loading