Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,28 @@
logger = logging.getLogger(__name__)


def _resolve_upload_dir(base_dir: str, user_id: str) -> str:
"""Resolve the per-user upload directory under ``python_uploads``.

``user_id`` originates from an untrusted request header, so it must be
treated as a single, opaque directory name. Rejecting anything that is not
a single non-".."/non-absolute path segment (and verifying containment)
prevents path traversal such as ``user_id: ../../../tmp`` from escaping the
uploads root and writing files to arbitrary locations on the server.
"""
uploads_root = (Path(base_dir) / "python_uploads").resolve()
user_path = Path(user_id)
if user_path.is_absolute() or len(user_path.parts) != 1 or user_id in (".", ".."):
raise ValueError("user_id must be a single safe path segment")

upload_dir = (uploads_root / user_path).resolve()
try:
upload_dir.relative_to(uploads_root)
except ValueError as exc:
raise ValueError("user_id must stay inside the uploads directory") from exc
return str(upload_dir)


def _resolve_upload_path(upload_dir: str, filename: str) -> str:
upload_dir_path = Path(upload_dir).resolve()
filename_path = Path(filename)
Expand Down Expand Up @@ -51,7 +73,10 @@ async def python_file_upload(
):
base_dir = CFG.SYSTEM_APP.work_dir

upload_dir = os.path.join(base_dir, "python_uploads", user_id)
try:
upload_dir = _resolve_upload_dir(base_dir, user_id)
except ValueError:
return Result.failed(msg="Invalid user identifier")
os.makedirs(upload_dir, exist_ok=True)

file_path = _resolve_upload_path(upload_dir, file.filename)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Regression tests for issue #3104: the python file-upload endpoint must not
let an untrusted ``user_id`` header escape the uploads directory via path
traversal.

Suggested location in the repo:
packages/dbgpt-app/tests/openapi/test_python_upload_path_traversal.py

Run (from the repo, with dev deps installed):
pytest packages/dbgpt-app/tests/openapi/test_python_upload_path_traversal.py -v
"""
import os

import pytest

from dbgpt_app.openapi.api_v1.python_upload_api import _resolve_upload_dir


def test_traversal_user_id_is_rejected():
with pytest.raises(ValueError):
_resolve_upload_dir("/srv/work", "../../../../../../tmp/DBGPT_PWN")


@pytest.mark.parametrize("bad", ["..", "/etc", "a/b", "../evil", "../../etc/cron.d"])
def test_unsafe_user_ids_are_rejected(bad):
with pytest.raises(ValueError):
_resolve_upload_dir("/srv/work", bad)


@pytest.mark.parametrize("good", ["default", "admin", "user_42", "3f2b-uuid", "user@example.com"])
def test_legit_user_ids_stay_inside_uploads_root(tmp_path, good):
uploads_root = os.path.realpath(os.path.join(str(tmp_path), "python_uploads"))
resolved = os.path.realpath(_resolve_upload_dir(str(tmp_path), good))
assert resolved.startswith(uploads_root + os.sep)
# the user_id is preserved as the leaf directory name
assert os.path.basename(resolved) == good