Commit
·
6fffc74
1
Parent(s):
9a2680b
Add an abstract CodeTesting Flow and working CF_CodeTesting Flow.
Browse files- CF_Code.py +1 -0
- CF_Code.yaml +4 -5
- CF_CodeCollab.yaml +18 -10
- CF_CodeCritic.yaml +1 -0
- CF_CodeDebug.py +6 -0
- CF_CodeDebug.yaml +64 -0
- CF_CodeReflect.yaml +20 -23
- CF_CodeTesting.py +32 -0
- CF_CodeTesting.yaml +85 -0
- CF_CodeWithPlan.yaml +3 -4
- CF_Plan.yaml +3 -4
- CF_PlanCollab.yaml +18 -10
- CF_PlanCritic.yaml +1 -1
- CF_PlanReflect.yaml +8 -7
- CodeTesting.py +68 -0
- FixedReply_CodeReflect.py +6 -0
- FixedReply_CodeReflect.yaml +25 -0
- __init__.py +15 -0
- src/data_transformations/__init__.py +2 -0
- src/data_transformations/correctness_flag.py +15 -0
- src/data_transformations/test_results_summary_generation.py +102 -0
CF_Code.py
CHANGED
@@ -4,6 +4,7 @@ from flows.flow_verse import load_class
|
|
4 |
repository_id = os.environ.get("OpenAIChatAtomicFlow")
|
5 |
OpenAIChatAtomicFlow = load_class(repository_id, "OpenAIChatAtomicFlow")
|
6 |
|
|
|
7 |
class CF_Code(OpenAIChatAtomicFlow):
|
8 |
def __init__(self, **kwargs):
|
9 |
super().__init__(**kwargs)
|
|
|
4 |
repository_id = os.environ.get("OpenAIChatAtomicFlow")
|
5 |
OpenAIChatAtomicFlow = load_class(repository_id, "OpenAIChatAtomicFlow")
|
6 |
|
7 |
+
|
8 |
class CF_Code(OpenAIChatAtomicFlow):
|
9 |
def __init__(self, **kwargs):
|
10 |
super().__init__(**kwargs)
|
CF_Code.yaml
CHANGED
@@ -73,13 +73,12 @@ input_keys:
|
|
73 |
|
74 |
output_keys:
|
75 |
- "code"
|
76 |
-
|
77 |
-
|
78 |
-
code_extractor:
|
79 |
-
_target_: flows.message_annotators.RegexFirstOccurrenceExtractor
|
80 |
regex: '(?<=```python)([\s\S]*?)(?=```)'
|
81 |
regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
82 |
-
|
|
|
83 |
strip: True
|
84 |
assert_unique: True
|
85 |
verbose: True
|
|
|
73 |
|
74 |
output_keys:
|
75 |
- "code"
|
76 |
+
output_data_transformations:
|
77 |
+
- _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
|
|
|
|
|
78 |
regex: '(?<=```python)([\s\S]*?)(?=```)'
|
79 |
regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
80 |
+
input_key: "raw_response"
|
81 |
+
output_key: "code"
|
82 |
strip: True
|
83 |
assert_unique: True
|
84 |
verbose: True
|
CF_CodeCollab.yaml
CHANGED
@@ -27,7 +27,7 @@ subflows_config:
|
|
27 |
_target_: langchain.PromptTemplate
|
28 |
template: |2-
|
29 |
# Feedback on the last proposed solution
|
30 |
-
{{
|
31 |
|
32 |
|
33 |
Consider the original problem statement, the last proposed solution and the provided feedback. Does the solution need to be updated? If so, provide the corrected version of the code in the following format:
|
@@ -37,15 +37,23 @@ subflows_config:
|
|
37 |
otherwise, reply:
|
38 |
"Final answer."
|
39 |
input_variables:
|
40 |
-
-
|
41 |
partial_variables:
|
42 |
code_placeholder: "{{python_code}}"
|
43 |
template_format: jinja2
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
verbose: True
|
50 |
output_keys:
|
51 |
- "code"
|
@@ -55,7 +63,7 @@ subflows_config:
|
|
55 |
class_name: CF_CodeCritic
|
56 |
overrides:
|
57 |
name: CodeCritic
|
58 |
-
|
59 |
-
- _target_: flows.
|
60 |
old_key2new_key:
|
61 |
-
raw_response: "
|
|
|
27 |
_target_: langchain.PromptTemplate
|
28 |
template: |2-
|
29 |
# Feedback on the last proposed solution
|
30 |
+
{{code_feedback}}
|
31 |
|
32 |
|
33 |
Consider the original problem statement, the last proposed solution and the provided feedback. Does the solution need to be updated? If so, provide the corrected version of the code in the following format:
|
|
|
37 |
otherwise, reply:
|
38 |
"Final answer."
|
39 |
input_variables:
|
40 |
+
- code_feedback
|
41 |
partial_variables:
|
42 |
code_placeholder: "{{python_code}}"
|
43 |
template_format: jinja2
|
44 |
+
default_human_input_key: "code_feedback"
|
45 |
+
output_data_transformations:
|
46 |
+
- _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
|
47 |
+
regex: '(?<=```python)([\s\S]*?)(?=```)'
|
48 |
+
regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
49 |
+
input_key: "raw_response"
|
50 |
+
output_key: "code"
|
51 |
+
strip: True
|
52 |
+
assert_unique: True
|
53 |
+
verbose: True
|
54 |
+
- _target_: flows.data_transformations.EndOfInteraction
|
55 |
+
end_of_interaction_string: "Final answer"
|
56 |
+
output_key: "end_of_interaction"
|
57 |
verbose: True
|
58 |
output_keys:
|
59 |
- "code"
|
|
|
63 |
class_name: CF_CodeCritic
|
64 |
overrides:
|
65 |
name: CodeCritic
|
66 |
+
output_data_transformations:
|
67 |
+
- _target_: flows.data_transformations.KeyRename
|
68 |
old_key2new_key:
|
69 |
+
raw_response: "code_feedback"
|
CF_CodeCritic.yaml
CHANGED
@@ -73,3 +73,4 @@ input_keys:
|
|
73 |
- "output_description"
|
74 |
- "io_examples_and_explanation"
|
75 |
- "code"
|
|
|
|
73 |
- "output_description"
|
74 |
- "io_examples_and_explanation"
|
75 |
- "code"
|
76 |
+
output_keys: []
|
CF_CodeDebug.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flows.base_flows import GeneratorCriticFlow
|
2 |
+
|
3 |
+
|
4 |
+
class CF_CodeDebug(GeneratorCriticFlow):
|
5 |
+
def __init__(self, **kwargs):
|
6 |
+
super().__init__(**kwargs)
|
CF_CodeDebug.yaml
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: "CodeDebug_Flow"
|
2 |
+
verbose: True
|
3 |
+
description: "ToDO: add description"
|
4 |
+
|
5 |
+
reset_generator_every_round: False
|
6 |
+
reset_critic_every_round: True
|
7 |
+
max_rounds: 2 # ToDo: To increase to 4
|
8 |
+
early_exit_key: "end_of_interaction"
|
9 |
+
|
10 |
+
input_keys:
|
11 |
+
- "problem_description"
|
12 |
+
- "input_description"
|
13 |
+
- "output_description"
|
14 |
+
- "io_examples_and_explanation"
|
15 |
+
- "public_tests_individual_io"
|
16 |
+
output_keys:
|
17 |
+
- "code"
|
18 |
+
|
19 |
+
subflows_config:
|
20 |
+
- _target_: flows.flow_verse.instantiate_flow
|
21 |
+
repository_id: ${oc.env:CC_FLOWS}
|
22 |
+
class_name: CF_Code
|
23 |
+
overrides:
|
24 |
+
name: "CodeGenerator"
|
25 |
+
model_name: "gpt-4"
|
26 |
+
human_message_prompt_template:
|
27 |
+
template: |2-
|
28 |
+
{{query}}
|
29 |
+
|
30 |
+
|
31 |
+
Consider the problem statement, the last proposed solution, and its issue. Provide a corrected version of the code that solves the original problem and resolves the issue, without any explanation, in the following format:
|
32 |
+
```python
|
33 |
+
{{code_placeholder}}
|
34 |
+
```
|
35 |
+
input_variables:
|
36 |
+
- query
|
37 |
+
partial_variables:
|
38 |
+
code_placeholder: "{{python_code}}"
|
39 |
+
input_data_transformations:
|
40 |
+
- _target_: flows.data_transformations.KeyRename
|
41 |
+
old_key2new_key:
|
42 |
+
"test_results_summary": "query"
|
43 |
+
output_data_transformations:
|
44 |
+
- _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
|
45 |
+
regex: '(?<=```python)([\s\S]*?)(?=```)'
|
46 |
+
regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
47 |
+
input_key: "raw_response"
|
48 |
+
output_key: "code"
|
49 |
+
strip: True
|
50 |
+
assert_unique: True
|
51 |
+
verbose: True
|
52 |
+
- _target_: flows.data_transformations.EndOfInteraction
|
53 |
+
end_of_interaction_string: "Final answer"
|
54 |
+
output_key: "end_of_interaction"
|
55 |
+
verbose: True
|
56 |
+
output_keys:
|
57 |
+
- "code"
|
58 |
+
- "end_of_interaction"
|
59 |
+
|
60 |
+
- _target_: flows.flow_verse.instantiate_flow
|
61 |
+
repository_id: ${oc.env:CC_FLOWS}
|
62 |
+
class_name: CF_CodeTesting
|
63 |
+
overrides:
|
64 |
+
name: "CodeTestingCritic"
|
CF_CodeReflect.yaml
CHANGED
@@ -12,7 +12,6 @@ input_keys:
|
|
12 |
- "input_description"
|
13 |
- "output_description"
|
14 |
- "io_examples_and_explanation"
|
15 |
-
|
16 |
output_keys:
|
17 |
- "code"
|
18 |
|
@@ -23,29 +22,27 @@ subflows_config:
|
|
23 |
overrides:
|
24 |
name: "CodeGenerator"
|
25 |
model_name: "gpt-4"
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
verbose: True
|
32 |
output_keys:
|
33 |
- "code"
|
34 |
- "end_of_interaction"
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
input_keys: []
|
40 |
-
outputs_transformations:
|
41 |
-
- _target_: flows.outputs_transformations.Rename
|
42 |
-
old_key2new_key:
|
43 |
-
raw_response: "query"
|
44 |
-
fixed_reply: |2-
|
45 |
-
Consider the problem statement and the last proposed solution. Are you sure that the solution is provided in the requested format, and crucially, solves the problem?
|
46 |
-
If that is not the case, provide the corrected version of the code in the following format:
|
47 |
-
```python
|
48 |
-
{{python_code}}
|
49 |
-
```
|
50 |
-
otherwise, reply:
|
51 |
-
"Final answer."
|
|
|
12 |
- "input_description"
|
13 |
- "output_description"
|
14 |
- "io_examples_and_explanation"
|
|
|
15 |
output_keys:
|
16 |
- "code"
|
17 |
|
|
|
22 |
overrides:
|
23 |
name: "CodeGenerator"
|
24 |
model_name: "gpt-4"
|
25 |
+
input_data_transformations:
|
26 |
+
- _target_: flows.data_transformations.KeyRename
|
27 |
+
old_key2new_key:
|
28 |
+
"code_reflect_message": "query"
|
29 |
+
output_data_transformations:
|
30 |
+
- _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
|
31 |
+
regex: '(?<=```python)([\s\S]*?)(?=```)'
|
32 |
+
regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
33 |
+
input_key: "raw_response"
|
34 |
+
output_key: "code"
|
35 |
+
strip: True
|
36 |
+
assert_unique: True
|
37 |
+
verbose: True
|
38 |
+
- _target_: flows.data_transformations.EndOfInteraction
|
39 |
+
end_of_interaction_string: "Final answer"
|
40 |
+
output_key: "end_of_interaction"
|
41 |
verbose: True
|
42 |
output_keys:
|
43 |
- "code"
|
44 |
- "end_of_interaction"
|
45 |
+
|
46 |
+
- _target_: flows.flow_verse.instantiate_flow
|
47 |
+
repository_id: ${oc.env:CC_FLOWS}
|
48 |
+
class_name: FixedReply_CodeReflect
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CF_CodeTesting.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Dict
|
2 |
+
|
3 |
+
from flows import utils
|
4 |
+
from .src.evaluation import testing_utils_codeforces
|
5 |
+
from .CodeTesting import CodeTesting
|
6 |
+
|
7 |
+
log = utils.get_pylogger(__name__)
|
8 |
+
|
9 |
+
# ToDo: Add a flags to control whether hidden, public or both tests should be used for evaluation
|
10 |
+
|
11 |
+
|
12 |
+
class CF_CodeTesting(CodeTesting):
|
13 |
+
REQUIRED_KEYS_CONFIG = []
|
14 |
+
REQUIRED_KEYS_KWARGS = []
|
15 |
+
|
16 |
+
def __init__(self, **kwargs):
|
17 |
+
super().__init__(**kwargs)
|
18 |
+
|
19 |
+
def _get_test_data(self, input_data: Dict):
|
20 |
+
"""This function retrieves (or generates) input-output pairs that will be used to test the implementation."""
|
21 |
+
return input_data["public_tests_individual_io"]
|
22 |
+
|
23 |
+
def _run_tests(self, input_data: Dict, test_data: Dict) -> Dict[str, Any]:
|
24 |
+
testing_results = testing_utils_codeforces.evaluate_solution_for_problem(
|
25 |
+
candidate_solution=input_data["code"],
|
26 |
+
public_tests_io=test_data
|
27 |
+
)
|
28 |
+
|
29 |
+
for test_output in testing_results["public_tests_results"]:
|
30 |
+
test_output["input"] = "\n".join(test_output["input"])
|
31 |
+
|
32 |
+
return testing_results
|
CF_CodeTesting.yaml
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
description: "ToDo: add description"
|
2 |
+
input_keys:
|
3 |
+
- "code"
|
4 |
+
- "public_tests_individual_io"
|
5 |
+
output_keys:
|
6 |
+
- "all_tests_passed"
|
7 |
+
- "tests_results_summary"
|
8 |
+
output_data_transformations:
|
9 |
+
- _target_: CC_flows.src.data_transformations.CorrectnessFlag # ToDo: This import style would not work if the flow is synced in the current implementation (the outer directory is a hash and not the name; figure out how to do the import robustly; e.g., using relative imports)
|
10 |
+
input_key: "public_tests_results" # ToDo: Add support for nested keys and update this to raw_response.public_tests_results?
|
11 |
+
output_key: "all_tests_passed"
|
12 |
+
- _target_: CC_flows.src.data_transformations.TestResultsSummaryGeneration
|
13 |
+
output_key: "tests_results_summary"
|
14 |
+
|
15 |
+
single_test_error_message: True
|
16 |
+
|
17 |
+
no_error_template: |2-
|
18 |
+
${.issue_title}
|
19 |
+
All of the executed tests passed.
|
20 |
+
|
21 |
+
compilation_error_template: |2-
|
22 |
+
${.issue_title}
|
23 |
+
The execution resulted in a compilation error.
|
24 |
+
## Compilation error message:
|
25 |
+
{{error_message}}
|
26 |
+
timeout_error_template: |2-
|
27 |
+
${.issue_title}
|
28 |
+
The execution timed out, the solution is not efficient enough.
|
29 |
+
runtime_error_template: |2-
|
30 |
+
${.issue_title}
|
31 |
+
The execution resulted in a runtime error on the following test.
|
32 |
+
## [Failed test] Input
|
33 |
+
```
|
34 |
+
{{test_input}}
|
35 |
+
```
|
36 |
+
## [Failed test] Runtime error message
|
37 |
+
{{error_message}}
|
38 |
+
single_test_error_template: |2-
|
39 |
+
${.issue_title}
|
40 |
+
The Python code does not solve the problem in the problem description due to logical errors. It fails the following test:
|
41 |
+
## [Failed test] Input
|
42 |
+
```
|
43 |
+
{{test_input}}
|
44 |
+
```
|
45 |
+
## [Failed test] Expected output
|
46 |
+
```
|
47 |
+
{{expected_output}}
|
48 |
+
```
|
49 |
+
## [Failed test] Generated output
|
50 |
+
```
|
51 |
+
{{generated_output}}
|
52 |
+
```
|
53 |
+
all_tests_header: |2-
|
54 |
+
${.issue_title}
|
55 |
+
The Python code does not solve the problem in the problem description due to logical errors. It fails on the following tests.
|
56 |
+
test_error_template: |2-
|
57 |
+
## [Failed test {{idx}}]
|
58 |
+
### [Failed test {{idx}}] Input
|
59 |
+
```
|
60 |
+
{{test_input}}
|
61 |
+
```
|
62 |
+
### [Failed test {{idx}}] Expected output
|
63 |
+
```
|
64 |
+
{{expected_output}}
|
65 |
+
```
|
66 |
+
### [Failed test {{idx}}] Generated output
|
67 |
+
```
|
68 |
+
{{generated_output}}
|
69 |
+
```
|
70 |
+
tests_separator: "\n\n"
|
71 |
+
|
72 |
+
issue_title: "# Issue with the last proposed solution"
|
73 |
+
|
74 |
+
feedback_title: "# Feedback on the last proposed solution"
|
75 |
+
|
76 |
+
no_code_template: |2-
|
77 |
+
${.feedback_title}
|
78 |
+
The code was not provided in the correct output format specified in the request or it was not provided at all.
|
79 |
+
feedback_only_template: |2-
|
80 |
+
${.feedback_title}
|
81 |
+
{{feedback_content}}
|
82 |
+
feedback_and_issue_template: |2-
|
83 |
+
{{issue_description}}
|
84 |
+
|
85 |
+
{{feedback_content}}
|
CF_CodeWithPlan.yaml
CHANGED
@@ -81,12 +81,11 @@ input_keys:
|
|
81 |
output_keys:
|
82 |
- "code"
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
_target_: flows.message_annotators.RegexFirstOccurrenceExtractor
|
87 |
regex: '(?<=```python)([\s\S]*?)(?=```)'
|
88 |
regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
89 |
-
|
90 |
strip: True
|
91 |
assert_unique: True
|
92 |
verbose: True
|
|
|
81 |
output_keys:
|
82 |
- "code"
|
83 |
|
84 |
+
output_data_transformations:
|
85 |
+
- _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
|
|
|
86 |
regex: '(?<=```python)([\s\S]*?)(?=```)'
|
87 |
regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
88 |
+
output_key: "code"
|
89 |
strip: True
|
90 |
assert_unique: True
|
91 |
verbose: True
|
CF_Plan.yaml
CHANGED
@@ -75,13 +75,12 @@ input_keys:
|
|
75 |
output_keys:
|
76 |
- "plan"
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
_target_: flows.message_annotators.RegexFirstOccurrenceExtractor
|
81 |
regex: '(?<=Conceptual solution)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
|
82 |
regex_fallback:
|
83 |
- '(?<=Conceptual solution:)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
|
84 |
-
|
85 |
strip: True
|
86 |
assert_unique: True
|
87 |
verbose: True
|
|
|
75 |
output_keys:
|
76 |
- "plan"
|
77 |
|
78 |
+
output_data_transformations:
|
79 |
+
- _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
|
|
|
80 |
regex: '(?<=Conceptual solution)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
|
81 |
regex_fallback:
|
82 |
- '(?<=Conceptual solution:)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
|
83 |
+
output_key: "plan"
|
84 |
strip: True
|
85 |
assert_unique: True
|
86 |
verbose: True
|
CF_PlanCollab.yaml
CHANGED
@@ -27,7 +27,7 @@ subflows_config:
|
|
27 |
_target_: langchain.PromptTemplate
|
28 |
template: |2-
|
29 |
# Feedback on the last proposed conceptual solution
|
30 |
-
{{
|
31 |
|
32 |
|
33 |
Consider the original problem statement, the last proposed solution and the provided feedback. Does the solution need to be updated? If so, provide the corrected version of the conceptual solution in the following format:
|
@@ -36,15 +36,23 @@ subflows_config:
|
|
36 |
otherwise, reply:
|
37 |
"Final answer."
|
38 |
input_variables:
|
39 |
-
-
|
40 |
partial_variables:
|
41 |
plan_placeholder: "{{conceptual_solution}}"
|
42 |
template_format: jinja2
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
verbose: True
|
49 |
output_keys:
|
50 |
- "plan"
|
@@ -54,7 +62,7 @@ subflows_config:
|
|
54 |
class_name: CF_PlanCritic
|
55 |
overrides:
|
56 |
name: PlanCritic
|
57 |
-
|
58 |
-
- _target_: flows.
|
59 |
old_key2new_key:
|
60 |
-
raw_response: "
|
|
|
27 |
_target_: langchain.PromptTemplate
|
28 |
template: |2-
|
29 |
# Feedback on the last proposed conceptual solution
|
30 |
+
{{plan_feedback}}
|
31 |
|
32 |
|
33 |
Consider the original problem statement, the last proposed solution and the provided feedback. Does the solution need to be updated? If so, provide the corrected version of the conceptual solution in the following format:
|
|
|
36 |
otherwise, reply:
|
37 |
"Final answer."
|
38 |
input_variables:
|
39 |
+
- plan_feedback
|
40 |
partial_variables:
|
41 |
plan_placeholder: "{{conceptual_solution}}"
|
42 |
template_format: jinja2
|
43 |
+
default_human_input_key: "plan_feedback"
|
44 |
+
output_data_transformations:
|
45 |
+
- _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
|
46 |
+
regex: '(?<=Conceptual solution)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
|
47 |
+
regex_fallback:
|
48 |
+
- '(?<=Conceptual solution:)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
|
49 |
+
output_key: "plan"
|
50 |
+
strip: True
|
51 |
+
assert_unique: True
|
52 |
+
verbose: True
|
53 |
+
- _target_: flows.data_transformations.EndOfInteraction
|
54 |
+
end_of_interaction_string: "Final answer"
|
55 |
+
output_key: "end_of_interaction"
|
56 |
verbose: True
|
57 |
output_keys:
|
58 |
- "plan"
|
|
|
62 |
class_name: CF_PlanCritic
|
63 |
overrides:
|
64 |
name: PlanCritic
|
65 |
+
output_data_transformations:
|
66 |
+
- _target_: flows.data_transformations.KeyRename
|
67 |
old_key2new_key:
|
68 |
+
raw_response: "plan_feedback"
|
CF_PlanCritic.yaml
CHANGED
@@ -74,4 +74,4 @@ input_keys:
|
|
74 |
- "plan"
|
75 |
|
76 |
output_keys:
|
77 |
-
- "
|
|
|
74 |
- "plan"
|
75 |
|
76 |
output_keys:
|
77 |
+
- "plan_feedback"
|
CF_PlanReflect.yaml
CHANGED
@@ -26,21 +26,22 @@ subflows_config:
|
|
26 |
response_annotators:
|
27 |
end_of_interaction_annotator:
|
28 |
_target_: flows.message_annotators.EndOfInteraction
|
29 |
-
|
30 |
key: "end_of_interaction"
|
31 |
verbose: True
|
32 |
output_keys:
|
33 |
- "plan"
|
34 |
- "end_of_interaction"
|
35 |
-
- _target_: flows.
|
|
|
|
|
36 |
overrides:
|
37 |
name: "PlanFixedReplyCritic"
|
38 |
description: "ToDo: Add description"
|
39 |
-
input_keys:
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
raw_response: "query"
|
44 |
fixed_reply: |2-
|
45 |
Consider the problem statement and the last proposed solution. Are you sure that the solution is provided in the requested format, and crucially, solves the problem?
|
46 |
If that is not the case, provide the corrected version of the conceptual solution in the following format:
|
|
|
26 |
response_annotators:
|
27 |
end_of_interaction_annotator:
|
28 |
_target_: flows.message_annotators.EndOfInteraction
|
29 |
+
end_of_interaction_string: "Final answer"
|
30 |
key: "end_of_interaction"
|
31 |
verbose: True
|
32 |
output_keys:
|
33 |
- "plan"
|
34 |
- "end_of_interaction"
|
35 |
+
- _target_: flows.flow_verse.instantiate_flow
|
36 |
+
repository_id: ${oc.env:CC_FLOWS}
|
37 |
+
class_name: CF_Reflect
|
38 |
overrides:
|
39 |
name: "PlanFixedReplyCritic"
|
40 |
description: "ToDo: Add description"
|
41 |
+
input_keys:
|
42 |
+
- "plan"
|
43 |
+
output_keys:
|
44 |
+
- "query"
|
|
|
45 |
fixed_reply: |2-
|
46 |
Consider the problem statement and the last proposed solution. Are you sure that the solution is provided in the requested format, and crucially, solves the problem?
|
47 |
If that is not the case, provide the corrected version of the conceptual solution in the following format:
|
CodeTesting.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from copy import deepcopy
|
2 |
+
from typing import Optional, Any, List, Dict
|
3 |
+
|
4 |
+
from flows import utils
|
5 |
+
from flows.base_flows.abstract import AtomicFlow
|
6 |
+
|
7 |
+
log = utils.get_pylogger(__name__)
|
8 |
+
|
9 |
+
|
10 |
+
class CodeTesting(AtomicFlow):
|
11 |
+
REQUIRED_KEYS_CONFIG = []
|
12 |
+
REQUIRED_KEYS_KWARGS = []
|
13 |
+
|
14 |
+
def __init__(self, **kwargs):
|
15 |
+
super().__init__(**kwargs)
|
16 |
+
|
17 |
+
def _get_test_data(self, input_data: Dict):
|
18 |
+
"""This function retrieves (or generates) input-output pairs that will be used to test the implementation."""
|
19 |
+
raise NotImplementedError()
|
20 |
+
|
21 |
+
def _run_tests(self, input_data: Dict, test_data: Dict) -> Dict[str, Any]:
|
22 |
+
raise NotImplementedError()
|
23 |
+
|
24 |
+
@classmethod
|
25 |
+
def instantiate_from_config(cls, config):
|
26 |
+
flow_config = deepcopy(config)
|
27 |
+
|
28 |
+
kwargs = {"flow_config": flow_config}
|
29 |
+
kwargs["input_data_transformations"] = cls._set_up_data_transformations(config["input_data_transformations"])
|
30 |
+
kwargs["output_data_transformations"] = cls._set_up_data_transformations(config["output_data_transformations"])
|
31 |
+
|
32 |
+
# ~~~ Instantiate flow ~~~
|
33 |
+
return cls(**kwargs)
|
34 |
+
|
35 |
+
def run(self,
|
36 |
+
input_data: Dict[str, Any],
|
37 |
+
private_keys: Optional[List[str]] = [],
|
38 |
+
keys_to_ignore_for_hash: Optional[List[str]] = []) -> Dict[str, Any]:
|
39 |
+
|
40 |
+
# ~~~ Retrieve the test data ~~~
|
41 |
+
test_data = self._get_test_data(input_data)
|
42 |
+
|
43 |
+
# ~~~ Run tests ~~~
|
44 |
+
response = self._run_tests(input_data, test_data)
|
45 |
+
|
46 |
+
return response
|
47 |
+
|
48 |
+
# from typing import Optional, Any, List, Dict
|
49 |
+
#
|
50 |
+
# from flows.base_flows.abstract import AtomicFlow
|
51 |
+
# from flows.utils.general_helpers import validate_parameters
|
52 |
+
#
|
53 |
+
# class CodeTester(AtomicFlow):
|
54 |
+
# REQUIRED_KEYS_CONFIG = []
|
55 |
+
# REQUIRED_KEYS_KWARGS = []
|
56 |
+
#
|
57 |
+
# def __init__(self, **kwargs):
|
58 |
+
# super().__init__(**kwargs)
|
59 |
+
#
|
60 |
+
# @classmethod
|
61 |
+
# def _validate_parameters(cls, kwargs):
|
62 |
+
# validate_parameters(cls, kwargs) # is this necessary?
|
63 |
+
#
|
64 |
+
# def run(self,
|
65 |
+
# input_data: Dict[str, Any],
|
66 |
+
# private_keys: Optional[List[str]] = [],
|
67 |
+
# keys_to_ignore_for_hash: Optional[List[str]] = []) -> Dict[str, Any]:
|
68 |
+
|
FixedReply_CodeReflect.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flows.base_flows import FixedReplyAtomicFlow
|
2 |
+
|
3 |
+
|
4 |
+
class FixedReply_CodeReflect(FixedReplyAtomicFlow):
|
5 |
+
def __init__(self, **kwargs):
|
6 |
+
super().__init__(**kwargs)
|
FixedReply_CodeReflect.yaml
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: "CodeReflectCritic"
|
2 |
+
description: "A flow that prompts the user to reflect on their code and provide a corrected version if necessary."
|
3 |
+
verbose: True
|
4 |
+
|
5 |
+
input_keys:
|
6 |
+
- "code"
|
7 |
+
|
8 |
+
output_keys:
|
9 |
+
- "code_reflect_message"
|
10 |
+
output_data_transformations:
|
11 |
+
- _target_: flows.data_transformations.KeyRename
|
12 |
+
old_key2new_key:
|
13 |
+
raw_response: "code_reflect_message"
|
14 |
+
|
15 |
+
fixed_reply: |2-
|
16 |
+
Consider the problem statement and the last proposed solution. Are you sure that the solution is provided in the requested format, and crucially, solves the problem?
|
17 |
+
If that is not the case, provide the corrected version of the code in the following format:
|
18 |
+
```python
|
19 |
+
{{python_code}}
|
20 |
+
```
|
21 |
+
otherwise, reply:
|
22 |
+
"Final answer."
|
23 |
+
|
24 |
+
|
25 |
+
|
__init__.py
CHANGED
@@ -1,16 +1,31 @@
|
|
|
|
1 |
from .CF_Code import CF_Code
|
|
|
|
|
|
|
2 |
from .CF_CodeReflect import CF_CodeReflect
|
3 |
|
|
|
4 |
from .CF_CodeCritic import CF_CodeCritic
|
5 |
from .CF_CodeCollab import CF_CodeCollab
|
6 |
|
|
|
7 |
from .CF_Plan import CF_Plan
|
8 |
from .CF_CodeWithPlan import CF_CodeWithPlan
|
9 |
from .CF_Plan_Code import CF_Plan_Code
|
10 |
|
|
|
11 |
from .CF_PlanReflect import CF_PlanReflect
|
12 |
from .CF_PlanReflect_Code import CF_PlanReflect_Code
|
13 |
|
|
|
14 |
from .CF_PlanCritic import CF_PlanCritic
|
15 |
from .CF_PlanCollab import CF_PlanCollab
|
16 |
from .CF_PlanCollab_Code import CF_PlanCollab_Code
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# cf-code
|
2 |
from .CF_Code import CF_Code
|
3 |
+
|
4 |
+
# cf-code_reflect
|
5 |
+
from .FixedReply_CodeReflect import FixedReply_CodeReflect
|
6 |
from .CF_CodeReflect import CF_CodeReflect
|
7 |
|
8 |
+
# cf-code_collab
|
9 |
from .CF_CodeCritic import CF_CodeCritic
|
10 |
from .CF_CodeCollab import CF_CodeCollab
|
11 |
|
12 |
+
# cf-plan-code (and cf-plan_oracle-code)
|
13 |
from .CF_Plan import CF_Plan
|
14 |
from .CF_CodeWithPlan import CF_CodeWithPlan
|
15 |
from .CF_Plan_Code import CF_Plan_Code
|
16 |
|
17 |
+
# cf-plan_reflect-code
|
18 |
from .CF_PlanReflect import CF_PlanReflect
|
19 |
from .CF_PlanReflect_Code import CF_PlanReflect_Code
|
20 |
|
21 |
+
# cf-plan_collab-code
|
22 |
from .CF_PlanCritic import CF_PlanCritic
|
23 |
from .CF_PlanCollab import CF_PlanCollab
|
24 |
from .CF_PlanCollab_Code import CF_PlanCollab_Code
|
25 |
+
|
26 |
+
# cf-code_debug
|
27 |
+
from .CF_CodeTesting import CF_CodeTesting
|
28 |
+
from .CF_CodeDebug import CF_CodeDebug
|
29 |
+
|
30 |
+
# from .CF_Debug import CF_Debug
|
31 |
+
# from .CF_CodeTestDebug import CF_CodeTestDebug
|
src/data_transformations/__init__.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from .correctness_flag import CorrectnessFlag
|
2 |
+
from .test_results_summary_generation import TestResultsSummaryGeneration
|
src/data_transformations/correctness_flag.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, Any
|
2 |
+
|
3 |
+
from flows.data_transformations.abstract import DataTransformation
|
4 |
+
|
5 |
+
|
6 |
+
class CorrectnessFlag(DataTransformation):
|
7 |
+
def __init__(self, output_key, input_key):
|
8 |
+
super().__init__(output_key)
|
9 |
+
self.input_key = input_key
|
10 |
+
|
11 |
+
def __call__(self, data_dict: Dict[str, Any], **kwargs) -> Dict[str, Any]:
|
12 |
+
test_data = data_dict["raw_response"]
|
13 |
+
all_tests_passed = all([test_result["status"] for test_result in test_data[self.input_key]])
|
14 |
+
data_dict[self.output_key] = all_tests_passed
|
15 |
+
return data_dict
|
src/data_transformations/test_results_summary_generation.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, Any
|
2 |
+
|
3 |
+
import jinja2
|
4 |
+
|
5 |
+
from flows.data_transformations.abstract import DataTransformation
|
6 |
+
|
7 |
+
|
8 |
+
class TestResultsSummaryGeneration(DataTransformation):
|
9 |
+
def __init__(self, output_key, **kwargs):
|
10 |
+
super().__init__(output_key)
|
11 |
+
self.params = kwargs
|
12 |
+
|
13 |
+
def __call__(self, data_dict: Dict[str, Any], **kwargs) -> Dict[str, Any]:
|
14 |
+
if data_dict["all_tests_passed"]:
|
15 |
+
# the execution did not result in any errors
|
16 |
+
data_dict["test_results_summary"] = self.params["no_error_template"]
|
17 |
+
return data_dict
|
18 |
+
|
19 |
+
test_data = data_dict["raw_response"]
|
20 |
+
|
21 |
+
if not test_data["compilation_status"]:
|
22 |
+
# compilation error occurred
|
23 |
+
kwargs = {
|
24 |
+
"error_message": test_data["compilation_error_message"].strip(),
|
25 |
+
}
|
26 |
+
|
27 |
+
message_content = (
|
28 |
+
jinja2.Environment(loader=jinja2.BaseLoader())
|
29 |
+
.from_string(self.params["compilation_error_template"])
|
30 |
+
.render(**kwargs)
|
31 |
+
)
|
32 |
+
elif test_data["timeout_error"]:
|
33 |
+
# timeout error occurred
|
34 |
+
|
35 |
+
message_content = self.params["timeout_error_template"]
|
36 |
+
else:
|
37 |
+
# code compiled successfully without timeouts
|
38 |
+
|
39 |
+
# retrieve the failed tests
|
40 |
+
failed_tests = [
|
41 |
+
test_result
|
42 |
+
for test_result in test_data["public_tests_results"]
|
43 |
+
if not test_result["status"]
|
44 |
+
]
|
45 |
+
|
46 |
+
runtime_error_test = None
|
47 |
+
for test_result in failed_tests:
|
48 |
+
if test_result["generated_output"] is None:
|
49 |
+
# runtime error occurred
|
50 |
+
runtime_error_test = test_result
|
51 |
+
|
52 |
+
if runtime_error_test:
|
53 |
+
# construct the error message for the runtime error
|
54 |
+
kwargs = {
|
55 |
+
"test_input": runtime_error_test["input"],
|
56 |
+
"error_message": runtime_error_test["error_message"].strip(),
|
57 |
+
}
|
58 |
+
|
59 |
+
message_content = (
|
60 |
+
jinja2.Environment(loader=jinja2.BaseLoader())
|
61 |
+
.from_string(self.params["runtime_error_template"])
|
62 |
+
.render(**kwargs)
|
63 |
+
)
|
64 |
+
else:
|
65 |
+
# construct the error message corresponding to a logical error
|
66 |
+
|
67 |
+
if self.params["single_test_error_message"]:
|
68 |
+
# construct the error message for a single (the first) failed test
|
69 |
+
first_failed_test = failed_tests[0]
|
70 |
+
|
71 |
+
kwargs = {
|
72 |
+
"test_input": first_failed_test["input"],
|
73 |
+
"expected_output": first_failed_test["expected_output"],
|
74 |
+
"generated_output": first_failed_test["generated_output"],
|
75 |
+
}
|
76 |
+
|
77 |
+
message_content = (
|
78 |
+
jinja2.Environment(loader=jinja2.BaseLoader())
|
79 |
+
.from_string(self.params["single_test_error_template"])
|
80 |
+
.render(**kwargs)
|
81 |
+
)
|
82 |
+
else:
|
83 |
+
# construct the error message covering all failed tests
|
84 |
+
parts = [self.params["all_tests_header"]]
|
85 |
+
|
86 |
+
for idx, test_result in enumerate(failed_tests):
|
87 |
+
kwargs = {
|
88 |
+
"idx": idx + 1,
|
89 |
+
"test_input": test_result["input"],
|
90 |
+
"expected_output": test_result["expected_output"],
|
91 |
+
"generated_output": test_result["generated_output"],
|
92 |
+
}
|
93 |
+
|
94 |
+
parts.append(
|
95 |
+
jinja2.Environment(loader=jinja2.BaseLoader())
|
96 |
+
.from_string(self.params["test_error_template"])
|
97 |
+
.render(**kwargs)
|
98 |
+
)
|
99 |
+
|
100 |
+
message_content = self.params["tests_separator"].join(parts)
|
101 |
+
data_dict["test_results_summary"] = message_content
|
102 |
+
return data_dict
|