File size: 3,394 Bytes
2e07357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
"""

Data models for the Ad Fraud Investigation Environment.



Defines the Action, Observation, and State types used by the OpenEnv interface.

The agent interacts with a queue of ads, investigating them and rendering verdicts.

"""

from __future__ import annotations

from typing import Dict, List, Literal, Optional

from openenv.core.env_server.types import Action, Observation, State
from pydantic import Field


class AdReviewAction(Action):
    """

    Action space for the ad fraud investigation agent.



    Three action types:

    - investigate: Spend budget to reveal information about an ad

    - verdict: Approve, reject, or escalate an ad

    - link_accounts: Flag two ads as part of the same fraud network

    """

    action_type: Literal["investigate", "verdict", "link_accounts"]
    ad_id: str = Field(..., description="Target ad identifier (e.g. 'ad_001')")

    investigation_target: Optional[
        Literal[
            "advertiser_history",
            "landing_page",
            "payment_method",
            "targeting_overlap",
            "creative_similarity",
            "campaign_structure",
        ]
    ] = Field(None, description="What to investigate (required for action_type='investigate')")

    verdict: Optional[Literal["approve", "reject", "escalate"]] = Field(
        None, description="Verdict decision (required for action_type='verdict')"
    )
    confidence: Optional[float] = Field(
        None, ge=0.0, le=1.0, description="Agent's confidence in verdict (0.0-1.0)"
    )

    linked_ad_id: Optional[str] = Field(
        None, description="Other ad in suspected fraud ring (required for action_type='link_accounts')"
    )
    link_reason: Optional[str] = Field(
        None, description="Why the agent believes these ads are connected"
    )


class AdReviewObservation(Observation):
    """

    Observation returned after each step.



    Text-heavy by design so LLM agents can reason about the content naturally.

    Structured data is in queue_status for programmatic access.

    """

    queue_summary: str = Field(
        default="", description="Natural language overview of the ad queue"
    )
    current_ad_info: str = Field(
        default="", description="Details of the ad currently in focus"
    )
    investigation_findings: str = Field(
        default="", description="Accumulated investigation results"
    )
    verdict_history_summary: str = Field(
        default="", description="Summary of verdicts rendered so far"
    )
    feedback: str = Field(
        default="", description="Natural language feedback on the last action taken"
    )
    available_ads: List[str] = Field(
        default_factory=list, description="Ad IDs still pending review"
    )
    queue_status: Dict = Field(
        default_factory=dict,
        description="Structured status: total_ads, reviewed, pending, budget, step",
    )


class AdFraudState(State):
    """

    Internal environment state exposed via the state() property.



    Inherits episode_id and step_count from State.

    Uses extra='allow' so custom fields are permitted.

    """

    task_id: str = ""
    total_ads: int = 0
    reviewed_count: int = 0
    remaining_budget: int = 0
    verdicts: Dict = Field(default_factory=dict)
    grader_score: Optional[float] = None