Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
525a343
Add Modal deploy guardrail tooling
therealityreport Jun 10, 2026
4c1c05c
Address Modal deploy guardrail review
therealityreport Jun 10, 2026
dea0566
Harden Modal canary output
therealityreport Jun 10, 2026
9ca1486
Allow loopback internal admin proxy
therealityreport Jun 10, 2026
b7539ae
Remove Instagram comments backfill caps
therealityreport Jun 12, 2026
1a53af9
Resume Instagram comments from audit cursors
therealityreport Jun 12, 2026
0bd9dc2
Add audit cursor retry batching tools
therealityreport Jun 12, 2026
9e4ac79
Add audit cursor retry admin action
therealityreport Jun 12, 2026
e6626b7
Reduce Instagram scraper static asset traffic
therealityreport Jun 12, 2026
cb7b432
Fix Instagram comment recovery batching
therealityreport Jun 13, 2026
1748381
Add priority Instagram comment recovery lane
therealityreport Jun 13, 2026
aa55bd7
checkpoint: IG public comments speed/correctness baseline
therealityreport Jun 16, 2026
7e8cdf9
feat: capture backend social scraper updates
therealityreport Jun 16, 2026
aa121e5
Merge branch 'codex/publish/trr-backend/20260610-modal-deploy-guardra…
therealityreport Jun 16, 2026
030b107
Merge branch 'codex/source/trr-backend/20260616-takeover-all' into co…
therealityreport Jun 16, 2026
a949a29
WS1a+WS3: fetcher throughput/correctness + persistence integrity
therealityreport Jun 16, 2026
67d1b44
chore: refresh backend requirements lock
therealityreport Jun 16, 2026
0faaca0
chore: match backend lock to linux ci
therealityreport Jun 16, 2026
1e3b364
test: harden backend api route assertions
therealityreport Jun 16, 2026
b5586a1
fix: keep blocked instagram comments retryable
therealityreport Jun 16, 2026
19578a4
bug#2 fetcher gate: accept expected_count_unknown, stamp diagnostic_m…
therealityreport Jun 16, 2026
3103e45
WS4 R1: date-window targeting for IG public comments backfill
therealityreport Jun 16, 2026
70b4e20
test: stabilize backend route and timeout assertions
therealityreport Jun 16, 2026
d5c27f8
test: stabilize backend full-suite assertions
therealityreport Jun 16, 2026
0ed7f0e
test: remove brittle duplicate socials route count
therealityreport Jun 16, 2026
65233a1
fix: address backend pr review blockers
therealityreport Jun 16, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 26 additions & 12 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ SOCIAL_THREADS_COOKIE_AUTO_REFRESH=true
SOCIAL_THREADS_COOKIE_VALIDATION_TTL_SECONDS=900
SOCIAL_THREADS_COOKIE_REFRESH_HEADLESS=true
SOCIAL_THREADS_COOKIE_REFRESH_TIMEOUT_SECONDS=120
# Modal Threads full-history discovery defaults to the Decodo browser/API proxy
# Modal Threads full-history discovery defaults to the Decodo browser/http proxy
# when DECODO_USERNAME and DECODO_PASSWORD are present. The generated Decodo
# username rotates by default; sticky sessions require USE_STICKY_PROXY=true.
SOCIAL_THREADS_POSTS_SCRAPLING_ENABLED=true
Expand All @@ -427,12 +427,19 @@ SOCIAL_THREADS_POSTS_USE_STICKY_PROXY=false
SOCIAL_THREADS_POSTS_PROXY_SESSION_TTL_SECONDS=600

# ----------------------------------------------
# Decodo usage / cost alert (daily poll). All optional — absent => poll is a no-op.
# Decodo residential proxy and usage/cost alert settings.
# TRR social scraping uses custom TRR code. Decodo is only the residential proxy
# provider for those lanes; SCRAPER_API_TOKEN / Decodo Web Scraping API is not
# required for Instagram, Threads, TikTok, or SocialBlade scraping.
# Usage/cost API fields are optional — absent => poll is a no-op.
# ----------------------------------------------
# Decodo Public API auth: token (preferred) OR username/password Basic.
DECODO_API_TOKEN=
DECODO_API_USERNAME=
DECODO_API_PASSWORD=
# Optional explicit proxy URL. Prefer setting this through the Decodo plugin
# after `smoke-proxy-rotation.mjs --live --max-requests 1` passes.
DECODO_PROXY_URL=
# Optional auth-scheme prefix for the token (e.g. Bearer / Token); blank sends the raw key.
DECODO_API_AUTH_SCHEME=
# Optional: scope the traffic query by proxy type if your account requires it.
Expand Down Expand Up @@ -494,6 +501,8 @@ TRR_MODAL_GOOGLE_NEWS_FUNCTION=run_google_news_sync
TRR_MODAL_REDDIT_REFRESH_FUNCTION=run_reddit_refresh
TRR_MODAL_SOCIAL_JOB_FUNCTION=run_social_job
TRR_MODAL_SOCIAL_JOB_CONCURRENCY_LIMIT=8
TRR_MODAL_SOCIAL_COMMENTS_JOB_CONCURRENCY_LIMIT=10
TRR_MODAL_SOCIAL_COMMENTS_RECOVERY_JOB_CONCURRENCY_LIMIT=10
TRR_MODAL_SOCIAL_RECOVERY_FUNCTION=sweep_social_dispatch_queue
TRR_MODAL_SOCIALBLADE_FUNCTION=run_socialblade_scrape
# Backward-compatible fallback secret knob for local/dev only.
Expand Down Expand Up @@ -536,12 +545,12 @@ SOCIAL_INSTAGRAM_COMMENTS_PROXY_URLS=
SOCIAL_INSTAGRAM_COMMENTS_USE_STICKY_PROXY=false
SOCIAL_INSTAGRAM_COMMENTS_PROXY_SHARD_SESSIONS=true
SOCIAL_INSTAGRAM_COMMENTS_PROXY_SESSION_TTL_SECONDS=600
SOCIAL_INSTAGRAM_COMMENTS_MAX_POSTS_PER_RUN=50
SOCIAL_INSTAGRAM_COMMENTS_MAX_COMMENTS_PER_POST=200
SOCIAL_INSTAGRAM_COMMENT_PAGINATION_MAX_PAGES=250
SOCIAL_INSTAGRAM_COMMENTS_MAX_POSTS_PER_RUN=0
SOCIAL_INSTAGRAM_COMMENTS_MAX_COMMENTS_PER_POST=0
SOCIAL_INSTAGRAM_COMMENT_PAGINATION_MAX_PAGES=0
SOCIAL_INSTAGRAM_COMMENT_PAGINATION_MAX_SECONDS=180
SOCIAL_INSTAGRAM_COMMENTS_LAUNCH_AUTH_CHECK=false
SOCIAL_INSTAGRAM_REPLY_PAGINATION_MAX_PAGES=100
SOCIAL_INSTAGRAM_REPLY_PAGINATION_MAX_PAGES=0
SOCIAL_INSTAGRAM_REPLY_PAGINATION_MAX_SECONDS=120
# Opt-in comments strategy that keeps one continuous post/session flow and saves once per post.
SOCIAL_INSTAGRAM_COMMENTS_SINGLE_SESSION_LOAD_ALL_ENABLED=false
Expand All @@ -551,10 +560,15 @@ SOCIAL_INSTAGRAM_COMMENTS_SINGLE_SESSION_RENDERED_STALL_LIMIT=3
SOCIAL_INSTAGRAM_COMMENTS_SINGLE_SESSION_RENDERED_DEADLINE_SECONDS=45
SOCIAL_INSTAGRAM_COMMENTS_SINGLE_SESSION_MAX_IN_MEMORY_ROWS=5000
SOCIAL_INSTAGRAM_COMMENTS_HEADLESS=true
INSTAGRAM_BROWSER_NETWORK_POLICY_ENABLED=true
INSTAGRAM_BROWSER_BLOCK_STATIC_ASSETS=true
INSTAGRAM_BROWSER_DISABLE_EXTRA_RESOURCES=true
INSTAGRAM_BROWSER_NETWORK_POLICY_REPORT_ONLY=false
SOCIAL_INSTAGRAM_COMMENTS_PROFILE_SHARD_COUNT=8
SOCIAL_INSTAGRAM_COMMENTS_GLOBAL_RATE_LIMIT_MODE=file_lock
# Per-post comments fetch overlap. Default 1 preserves serial fetch/persist behavior; clamp is 1..8.
SOCIAL_INSTAGRAM_COMMENTS_PER_POST_CONCURRENCY=1
SOCIAL_INSTAGRAM_COMMENTS_MAX_SHARD_COUNT=1000
SOCIAL_INSTAGRAM_COMMENTS_GLOBAL_RATE_LIMIT_MODE=advisory
# Per-post comments fetch overlap. Modal defaults use 2 with advisory global pacing; clamp is 1..8.
SOCIAL_INSTAGRAM_COMMENTS_PER_POST_CONCURRENCY=2
SOCIAL_INSTAGRAM_CATALOG_BACKFILL_FORCE_DETAIL_FETCH=true
SOCIAL_INSTAGRAM_DETAILS_REFRESH_LOCAL_WORKERS=4
SOCIAL_INSTAGRAM_DETAILS_REFRESH_MAX_DETAIL_FETCHES=500
Expand All @@ -566,7 +580,7 @@ SOCIAL_INLINE_COMMENTS_WORKERS=4
SOCIAL_WORKER_MIN_STAGE_RUNNERS=6
SOCIAL_WORKER_ALLOW_STAGE_DISABLE=0
SOCIAL_WORKER_POOL_POSTS=8
SOCIAL_WORKER_POOL_COMMENTS=8
SOCIAL_WORKER_POOL_COMMENTS=10
SOCIAL_WORKER_POOL_MEDIA_MIRROR=6
SOCIAL_WORKER_POOL_COMMENT_MEDIA_MIRROR=6
SOCIAL_WORKER_POOL_SHARED_ACCOUNT_DISCOVERY=3
Expand All @@ -577,7 +591,7 @@ SOCIAL_WORKER_POOL_SEASON_MATERIALIZE=10
SOCIAL_WORKER_POOL_ANALYTICS_REFRESH=4
SOCIAL_WORKER_POOL_INTERVAL_SEC=2
SOCIAL_RUN_IN_FLIGHT_CAP=24
SOCIAL_POSTS_COMMENTS_PLATFORM_CAP_INSTAGRAM=8
SOCIAL_POSTS_COMMENTS_PLATFORM_CAP_INSTAGRAM=10
SOCIAL_POSTS_COMMENTS_PLATFORM_CAP_TIKTOK=1
SOCIAL_POSTS_COMMENTS_PLATFORM_CAP_TWITTER=1
SOCIAL_POSTS_COMMENTS_PLATFORM_CAP_YOUTUBE=1
Expand All @@ -588,7 +602,7 @@ SOCIAL_JOB_STALE_SECONDS=300
SOCIAL_JOB_STALE_SECONDS_YOUTUBE_POSTS=900
SOCIAL_JOB_STALE_SECONDS_YOUTUBE_COMMENTS=600
SOCIAL_MODAL_DISPATCH_RETRY_DELAY_SECONDS=120
SOCIAL_MODAL_DISPATCH_LIMIT=8
SOCIAL_MODAL_DISPATCH_LIMIT=25
SOCIAL_PLATFORM_CAP_PER_ACCOUNT_SCALING=false
SOCIAL_DB_UPSERT_BATCH_SIZE_COMMENTS=300
SOCIAL_DB_UPSERT_BATCH_SIZE_POSTS=100
Expand Down
74 changes: 74 additions & 0 deletions .tmp-backfill-monitor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/env python
"""Poll the BravoTV backfill run and print one line per meaningful change."""
import sys
import time

import psycopg2
from dotenv import dotenv_values

RUN = "a8c979c2-fc96-4056-9af5-64b26d57ac7b"
ENV = dotenv_values("/Users/thomashulihan/Projects/TRR/TRR-Backend/.env")
URL = ENV.get("TRR_DB_URL")

prev = {}


def snapshot():
conn = psycopg2.connect(URL, connect_timeout=10)
try:
cur = conn.cursor()
cur.execute(
"select status, config->>'launch_state' from social.scrape_runs where id=%s",
(RUN,),
)
row = cur.fetchone() or (None, None)
snap = {"run_status": row[0], "launch_state": row[1]}
cur.execute(
"""select job_type, status, count(*) from social.scrape_jobs
where run_id=%s group by 1,2 order by 1,2""",
(RUN,),
)
snap["jobs"] = {f"{jt}:{st}": n for jt, st, n in cur.fetchall()}
cur.execute(
"""select pages_scanned, posts_checked, posts_saved, status, exhausted
from social.shared_account_run_frontiers where run_id=%s limit 1""",
(RUN,),
)
f = cur.fetchone()
if f:
snap["frontier"] = f"pages={f[0]} checked={f[1]} saved={f[2]} status={f[3]} exhausted={f[4]}"
cur.execute(
"""select job_type, left(coalesce(error_message,''),140) from social.scrape_jobs
where run_id=%s and status='failed' order by completed_at desc limit 3""",
(RUN,),
)
snap["failures"] = [f"{jt}: {em}" for jt, em in cur.fetchall()]
return snap
finally:
conn.close()


while True:
try:
snap = snapshot()
except Exception as exc: # noqa: BLE001
msg = str(exc)[:120].replace("\n", " ")
if prev.get("err") != msg:
print(f"MONITOR-ERROR: {msg}", flush=True)
prev["err"] = msg
time.sleep(60)
continue
prev.pop("err", None)
for key in ("run_status", "launch_state", "frontier"):
if snap.get(key) != prev.get(key):
print(f"{key}: {prev.get(key)} -> {snap.get(key)}", flush=True)
if snap["jobs"] != prev.get("jobs"):
print(f"jobs: {snap['jobs']}", flush=True)
for failure in snap["failures"]:
if failure not in prev.get("failures", []):
print(f"FAILED-JOB: {failure}", flush=True)
prev.update(snap)
if snap.get("run_status") in ("completed", "failed", "cancelled"):
print(f"TERMINAL: run {snap['run_status']}", flush=True)
break
time.sleep(60)
1 change: 0 additions & 1 deletion api/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,6 @@ async def require_internal_admin(request: Request) -> dict:
detail="Authentication service unavailable",
headers={"x-error-code": "AUTH_SERVICE_UNAVAILABLE"},
) from exc

if current_user:
raise HTTPException(status_code=403, detail="Allowlist admin access required")
if attempted_internal_admin_verification:
Expand Down
17 changes: 17 additions & 0 deletions api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@
import time
import uuid
from contextlib import asynccontextmanager
from pathlib import Path

try:
from dotenv import load_dotenv
except Exception: # pragma: no cover - python-dotenv is expected locally, optional in slim runtimes.
load_dotenv = None # type: ignore[assignment]

if load_dotenv is not None:
load_dotenv(Path(__file__).resolve().parents[1] / ".env", override=False)

from fastapi import FastAPI, Request, Response
from fastapi.middleware.cors import CORSMiddleware
Expand Down Expand Up @@ -51,6 +60,7 @@
expected_supabase_issuer,
expected_supabase_project_ref,
)
from trr_backend.socials.read_models.account_profile.common import instagram_comment_rollup_health

configure_runtime_observability(service_name="trr-backend-api")

Expand Down Expand Up @@ -776,6 +786,13 @@ def admin_health_db_pressure(_: InternalAdminUser = None) -> dict[str, object]:
return snapshot


@app.get("/admin/health/instagram-comment-rollups")
@app.get("/api/v1/admin/health/instagram-comment-rollups")
def admin_health_instagram_comment_rollups(sample_limit: int = 25, _: InternalAdminUser = None) -> dict[str, object]:
"""Internal exact-count health check for persisted Instagram comment rollups."""
return instagram_comment_rollup_health(sample_limit=sample_limit)


@app.get("/health/runtime")
async def health_runtime() -> dict[str, object]:
"""DB-free runtime snapshot for local control-plane diagnostics."""
Expand Down
Loading
Loading