[WIP] SWE-Agent recipe adapted to the new black-box Agent Framework#91
[WIP] SWE-Agent recipe adapted to the new black-box Agent Framework#91wangtiance wants to merge 1 commit into
Conversation
There was a problem hiding this comment.
Code Review
This pull request introduces the SWE-agent framework integration for VERL, enabling reinforcement learning on software engineering tasks. The implementation includes a robust configuration system, Docker-based sandboxing, dataset preparation utilities for synthetic tasks, and a specialized reward function for patch-based evaluation. Feedback focuses on improving code quality and robustness, specifically by removing redundant initializations and unused imports, tightening file permissions for lock files, enhancing the regex used for git diff parsing, and ensuring that configuration parsing errors are not silently swallowed.
| from transformers import AutoTokenizer | ||
| tokenizer = AutoTokenizer.from_pretrained(model_config.path, trust_remote_code=True) | ||
|
|
||
| load_balancer = GlobalRequestLoadBalancer.remote( | ||
| server_actor_ids=server_addresses, | ||
| ) | ||
|
|
||
| gateway_count = int(OmegaConf.select(config, "actor_rollout_ref.rollout.agent_framework.gateway_count", default=1)) | ||
| servers = list(zip(server_addresses, server_handles, strict=True)) | ||
|
|
||
| from transformers import AutoTokenizer | ||
| tokenizer = AutoTokenizer.from_pretrained(model_config.path, trust_remote_code=True) | ||
|
|
||
| from verl.agent.gateway.runtime import GatewayServingRuntime |
There was a problem hiding this comment.
The AutoTokenizer is imported and initialized twice within the create method. This is redundant and should be cleaned up to improve readability and avoid unnecessary overhead.
| from transformers import AutoTokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(model_config.path, trust_remote_code=True) | |
| load_balancer = GlobalRequestLoadBalancer.remote( | |
| server_actor_ids=server_addresses, | |
| ) | |
| gateway_count = int(OmegaConf.select(config, "actor_rollout_ref.rollout.agent_framework.gateway_count", default=1)) | |
| servers = list(zip(server_addresses, server_handles, strict=True)) | |
| from transformers import AutoTokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(model_config.path, trust_remote_code=True) | |
| from verl.agent.gateway.runtime import GatewayServingRuntime | |
| from transformers import AutoTokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(model_config.path, trust_remote_code=True) | |
| load_balancer = GlobalRequestLoadBalancer.remote( | |
| server_actor_ids=server_addresses, | |
| ) | |
| gateway_count = int(OmegaConf.select(config, "actor_rollout_ref.rollout.agent_framework.gateway_count", default=1)) | |
| servers = list(zip(server_addresses, server_handles, strict=True)) | |
| from verl.agent.gateway.runtime import GatewayServingRuntime |
| import uuid | ||
| from typing import Any, Optional | ||
|
|
||
| import numpy as np |
| while True: | ||
| for slot_idx in range(max_parallel_tasks_per_worker): | ||
| lock_path = os.path.join(lock_dir, f"slot_{slot_idx}.lock") | ||
| fd = os.open(lock_path, os.O_CREAT | os.O_RDWR | getattr(os, "O_CLOEXEC", 0), 0o666) |
There was a problem hiding this comment.
Using 0o666 for lock file permissions makes them world-writable. It is recommended to use more restrictive permissions like 0o600 (owner read/write only) to enhance security, especially for lock files in shared temporary directories.
| fd = os.open(lock_path, os.O_CREAT | os.O_RDWR | getattr(os, "O_CLOEXEC", 0), 0o666) | |
| fd = os.open(lock_path, os.O_CREAT | os.O_RDWR | getattr(os, "O_CLOEXEC", 0), 0o600) |
| """Extract set of changed files from a patch.""" | ||
| if not patch: | ||
| return set() | ||
| pattern = r"diff --git a/(.+?) b/(.+)" |
There was a problem hiding this comment.
| return json.loads(val) | ||
| except (json.JSONDecodeError, TypeError): | ||
| return {} |
There was a problem hiding this comment.
| exec_dir: Working directory for the subprocess | ||
| (avoids YAML parsing issues with ``docker`` subdir). | ||
| swe_agent_timeout: Overall timeout in seconds. | ||
| proxy_port: ModelProxy port (for logging only). |
Adapted from https://github.com/verl-project/verl-recipe/tree/main/swe_agent, replacing the AgentLoopManager with the new gateway-based agent framework (verl-project/verl#5931 and zackcxb/verl#1)