Creating a human review job programmatically

Whether you are on the free plan or a paid plan, you can create human review jobs directly in code with either the RunManager or in runTestSuite.

run_test_suite / runTestSuite

from dataclasses import dataclass

from autoblocks.testing.evaluators import BaseHasAllSubstrings
from autoblocks.testing.models import BaseTestCase
from autoblocks.testing.models import CreateHumanReviewJob
from autoblocks.testing.run import run_test_suite
from autoblocks.testing.util import md5

@dataclass
class TestCase(BaseTestCase):
    input: str
    expected_substrings: list[str]

    def hash(self) -> str:
        return md5(self.input) # Unique identifier for a test case

class HasAllSubstrings(BaseHasAllSubstrings[TestCase, str]):
    id = "has-all-substrings"

    def test_case_mapper(self, test_case: TestCase) -> list[str]:
        return test_case.expected_substrings

    def output_mapper(self, output: str) -> str:
        return output

run_test_suite(
    id="my-test-suite",
    test_cases=[
        TestCase(
            input="hello world",
            expected_substrings=["hello", "world"],
        )
    ], # Replace with your test cases
    fn=lambda test_case: test_case.input, # Replace with your LLM call
    evaluators=[HasAllSubstrings()], # Replace with your evaluators
    human_review_job=CreateHumanReviewJob(
        assignee_email_address="example@example.com",
        name="Review for accuracy",
    )
)

Run Manager

from dataclasses import dataclass

from autoblocks.testing.models import BaseTestCase
from autoblocks.testing.models import HumanReviewField
from autoblocks.testing.models import HumanReviewFieldContentType
from autoblocks.testing.run import RunManager
from autoblocks.testing.util import md5


# Update with your test case type
@dataclass
class TestCase(BaseTestCase):
    input: str

    def serialize_for_human_review(self) -> list[HumanReviewField]:
        return [
            HumanReviewField(
                name="Input",
                value=self.input,
                content_type=HumanReviewFieldContentType.TEXT,
            ),
        ]

    def hash(self) -> str:
        return md5(self.input)


# Update with your output type
@dataclass
class Output:
    output: str

    def serialize_for_human_review(self) -> list[HumanReviewField]:
        return [
            HumanReviewField(
                name="Output",
                value=self.output,
                content_type=HumanReviewFieldContentType.TEXT,
            ),
        ]


run = RunManager[TestCase, Output](
    test_id="test-id",
)

run.start()
# Add results from your test suite here
run.add_result(
    test_case=TestCase(input="Hello, world!"),
    output=Output(output="Hi, world!"),
)
run.end()

run.create_human_review_job(
    assignee_email_address="${emailAddress}",
    name="Review for accuracy",
)

Using the results

You can use the results of a human review job for a variety of purposes, such as:

  • Fine tuning an evaluation model
  • Few shot examples in your LLM judges
  • Improving your core product based on expert feedback
  • and more!