martinjosifoski
/

CC_flows

Model card Files Files and versions Community

martinjosifoski commited on Jun 30, 2023

Commit

6fffc74

1 Parent(s): 9a2680b

Add an abstract CodeTesting Flow and working CF_CodeTesting Flow.

Browse files

Files changed (21) hide show

CF_Code.py +1 -0
CF_Code.yaml +4 -5
CF_CodeCollab.yaml +18 -10
CF_CodeCritic.yaml +1 -0
CF_CodeDebug.py +6 -0
CF_CodeDebug.yaml +64 -0
CF_CodeReflect.yaml +20 -23
CF_CodeTesting.py +32 -0
CF_CodeTesting.yaml +85 -0
CF_CodeWithPlan.yaml +3 -4
CF_Plan.yaml +3 -4
CF_PlanCollab.yaml +18 -10
CF_PlanCritic.yaml +1 -1
CF_PlanReflect.yaml +8 -7
CodeTesting.py +68 -0
FixedReply_CodeReflect.py +6 -0
FixedReply_CodeReflect.yaml +25 -0
__init__.py +15 -0
src/data_transformations/__init__.py +2 -0
src/data_transformations/correctness_flag.py +15 -0
src/data_transformations/test_results_summary_generation.py +102 -0

CF_Code.py CHANGED Viewed

@@ -4,6 +4,7 @@ from flows.flow_verse import load_class
 repository_id = os.environ.get("OpenAIChatAtomicFlow")
 OpenAIChatAtomicFlow = load_class(repository_id, "OpenAIChatAtomicFlow")
 class CF_Code(OpenAIChatAtomicFlow):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)

 repository_id = os.environ.get("OpenAIChatAtomicFlow")
 OpenAIChatAtomicFlow = load_class(repository_id, "OpenAIChatAtomicFlow")
 class CF_Code(OpenAIChatAtomicFlow):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)

CF_Code.yaml CHANGED Viewed

@@ -73,13 +73,12 @@ input_keys:
 output_keys:
   - "code"
-response_annotators:
-  code_extractor:
-    _target_: flows.message_annotators.RegexFirstOccurrenceExtractor
     regex: '(?<=```python)([\s\S]*?)(?=```)'
     regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
-    key: "code"
     strip: True
     assert_unique: True
     verbose: True

 output_keys:
   - "code"
+output_data_transformations:
+  - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
     regex: '(?<=```python)([\s\S]*?)(?=```)'
     regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
+    input_key: "raw_response"
+    output_key: "code"
     strip: True
     assert_unique: True
     verbose: True

CF_CodeCollab.yaml CHANGED Viewed

@@ -27,7 +27,7 @@ subflows_config:
         _target_: langchain.PromptTemplate
         template: |2-
           # Feedback on the last proposed solution
-          {{query}}
           Consider the original problem statement, the last proposed solution and the provided feedback. Does the solution need to be updated? If so, provide the corrected version of the code in the following format:
@@ -37,15 +37,23 @@ subflows_config:
           otherwise, reply:
           "Final answer."
         input_variables:
-          - query
         partial_variables:
           code_placeholder: "{{python_code}}"
         template_format: jinja2
-      response_annotators:
-        end_of_interaction_annotator:
-          _target_: flows.message_annotators.EndOfInteraction
-          end_of_interaction_message: "Final answer"
-          key: "end_of_interaction"
           verbose: True
       output_keys:
         - "code"
@@ -55,7 +63,7 @@ subflows_config:
     class_name: CF_CodeCritic
     overrides:
       name: CodeCritic
-      outputs_transformations:
-        - _target_: flows.outputs_transformations.Rename
           old_key2new_key:
-            raw_response: "query"

         _target_: langchain.PromptTemplate
         template: |2-
           # Feedback on the last proposed solution
+          {{code_feedback}}
           Consider the original problem statement, the last proposed solution and the provided feedback. Does the solution need to be updated? If so, provide the corrected version of the code in the following format:
           otherwise, reply:
           "Final answer."
         input_variables:
+          - code_feedback
         partial_variables:
           code_placeholder: "{{python_code}}"
         template_format: jinja2
+      default_human_input_key: "code_feedback"
+      output_data_transformations:
+        - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
+          regex: '(?<=```python)([\s\S]*?)(?=```)'
+          regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
+          input_key: "raw_response"
+          output_key: "code"
+          strip: True
+          assert_unique: True
+          verbose: True
+        - _target_: flows.data_transformations.EndOfInteraction
+          end_of_interaction_string: "Final answer"
+          output_key: "end_of_interaction"
           verbose: True
       output_keys:
         - "code"
     class_name: CF_CodeCritic
     overrides:
       name: CodeCritic
+      output_data_transformations:
+        - _target_: flows.data_transformations.KeyRename
           old_key2new_key:
+            raw_response: "code_feedback"

CF_CodeCritic.yaml CHANGED Viewed

@@ -73,3 +73,4 @@ input_keys:
   - "output_description"
   - "io_examples_and_explanation"
   - "code"

   - "output_description"
   - "io_examples_and_explanation"
   - "code"
+output_keys: []

CF_CodeDebug.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from flows.base_flows import GeneratorCriticFlow
+class CF_CodeDebug(GeneratorCriticFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

CF_CodeDebug.yaml ADDED Viewed

	@@ -0,0 +1,64 @@

+name: "CodeDebug_Flow"
+verbose: True
+description: "ToDO: add description"
+reset_generator_every_round: False
+reset_critic_every_round: True
+max_rounds: 2 # ToDo: To increase to 4
+early_exit_key: "end_of_interaction"
+input_keys:
+  - "problem_description"
+  - "input_description"
+  - "output_description"
+  - "io_examples_and_explanation"
+  - "public_tests_individual_io"
+output_keys:
+  - "code"
+subflows_config:
+  - _target_: flows.flow_verse.instantiate_flow
+    repository_id: ${oc.env:CC_FLOWS}
+    class_name: CF_Code
+    overrides:
+      name: "CodeGenerator"
+      model_name: "gpt-4"
+      human_message_prompt_template:
+        template: |2-
+          {{query}}
+          Consider the problem statement, the last proposed solution, and its issue. Provide a corrected version of the code that solves the original problem and resolves the issue, without any explanation, in the following format:
+          ```python
+          {{code_placeholder}}
+          ```
+        input_variables:
+          - query
+        partial_variables:
+          code_placeholder: "{{python_code}}"
+      input_data_transformations:
+        - _target_: flows.data_transformations.KeyRename
+          old_key2new_key:
+            "test_results_summary": "query"
+      output_data_transformations:
+        - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
+          regex: '(?<=```python)([\s\S]*?)(?=```)'
+          regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
+          input_key: "raw_response"
+          output_key: "code"
+          strip: True
+          assert_unique: True
+          verbose: True
+        - _target_: flows.data_transformations.EndOfInteraction
+          end_of_interaction_string: "Final answer"
+          output_key: "end_of_interaction"
+          verbose: True
+      output_keys:
+        - "code"
+        - "end_of_interaction"
+  - _target_: flows.flow_verse.instantiate_flow
+    repository_id: ${oc.env:CC_FLOWS}
+    class_name: CF_CodeTesting
+    overrides:
+      name: "CodeTestingCritic"

CF_CodeReflect.yaml CHANGED Viewed

@@ -12,7 +12,6 @@ input_keys:
   - "input_description"
   - "output_description"
   - "io_examples_and_explanation"
 output_keys:
   - "code"
@@ -23,29 +22,27 @@ subflows_config:
     overrides:
       name: "CodeGenerator"
       model_name: "gpt-4"
-      response_annotators:
-        end_of_interaction_annotator:
-          _target_: flows.message_annotators.EndOfInteraction
-          end_of_interaction_message: "Final answer"
-          key: "end_of_interaction"
           verbose: True
       output_keys:
         - "code"
         - "end_of_interaction"
-  - _target_: flows.base_flows.FixedReplyAtomicFlow.instantiate_with_overrides
-    overrides:
-      name: "CodeFixedReplyCritic"
-      description: "ToDo: Add description"
-      input_keys: []
-      outputs_transformations:
-        - _target_: flows.outputs_transformations.Rename
-          old_key2new_key:
-            raw_response: "query"
-      fixed_reply: |2-
-        Consider the problem statement and the last proposed solution. Are you sure that the solution is provided in the requested format, and crucially, solves the problem?
-        If that is not the case, provide the corrected version of the code in the following format:
-        ```python
-        {{python_code}}
-        ```
-        otherwise, reply:
-        "Final answer."

   - "input_description"
   - "output_description"
   - "io_examples_and_explanation"
 output_keys:
   - "code"
     overrides:
       name: "CodeGenerator"
       model_name: "gpt-4"
+      input_data_transformations:
+        - _target_: flows.data_transformations.KeyRename
+          old_key2new_key:
+            "code_reflect_message": "query"
+      output_data_transformations:
+        - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
+          regex: '(?<=```python)([\s\S]*?)(?=```)'
+          regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
+          input_key: "raw_response"
+          output_key: "code"
+          strip: True
+          assert_unique: True
+          verbose: True
+        - _target_: flows.data_transformations.EndOfInteraction
+          end_of_interaction_string: "Final answer"
+          output_key: "end_of_interaction"
           verbose: True
       output_keys:
         - "code"
         - "end_of_interaction"
+  - _target_: flows.flow_verse.instantiate_flow
+    repository_id: ${oc.env:CC_FLOWS}
+    class_name: FixedReply_CodeReflect

CF_CodeTesting.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from typing import Any, Dict
+from flows import utils
+from .src.evaluation import testing_utils_codeforces
+from .CodeTesting import CodeTesting
+log = utils.get_pylogger(__name__)
+# ToDo: Add a flags to control whether hidden, public or both tests should be used for evaluation
+class CF_CodeTesting(CodeTesting):
+    REQUIRED_KEYS_CONFIG = []
+    REQUIRED_KEYS_KWARGS = []
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def _get_test_data(self, input_data: Dict):
+        """This function retrieves (or generates) input-output pairs that will be used to test the implementation."""
+        return input_data["public_tests_individual_io"]
+    def _run_tests(self, input_data: Dict, test_data: Dict) -> Dict[str, Any]:
+        testing_results = testing_utils_codeforces.evaluate_solution_for_problem(
+            candidate_solution=input_data["code"],
+            public_tests_io=test_data
+        )
+        for test_output in testing_results["public_tests_results"]:
+            test_output["input"] = "\n".join(test_output["input"])
+        return testing_results

CF_CodeTesting.yaml ADDED Viewed

	@@ -0,0 +1,85 @@

+description: "ToDo: add description"
+input_keys:
+  - "code"
+  - "public_tests_individual_io"
+output_keys:
+  - "all_tests_passed"
+  - "tests_results_summary"
+output_data_transformations:
+  - _target_: CC_flows.src.data_transformations.CorrectnessFlag  # ToDo: This import style would not work if the flow is synced in the current implementation (the outer directory is a hash and not the name; figure out how to do the import robustly; e.g., using relative imports)
+    input_key: "public_tests_results"  # ToDo: Add support for nested keys and update this to raw_response.public_tests_results?
+    output_key: "all_tests_passed"
+  - _target_: CC_flows.src.data_transformations.TestResultsSummaryGeneration
+    output_key: "tests_results_summary"
+    single_test_error_message: True
+    no_error_template: |2-
+      ${.issue_title}
+      All of the executed tests passed.
+    compilation_error_template: |2-
+      ${.issue_title}
+      The execution resulted in a compilation error.
+      ## Compilation error message:
+      {{error_message}}
+    timeout_error_template: |2-
+      ${.issue_title}
+      The execution timed out, the solution is not efficient enough.
+    runtime_error_template: |2-
+      ${.issue_title}
+      The execution resulted in a runtime error on the following test.
+      ## [Failed test] Input
+      ```
+      {{test_input}}
+      ```
+      ## [Failed test] Runtime error message
+      {{error_message}}
+    single_test_error_template: |2-
+      ${.issue_title}
+      The Python code does not solve the problem in the problem description due to logical errors. It fails the following test:
+      ## [Failed test] Input
+      ```
+      {{test_input}}
+      ```
+      ## [Failed test] Expected output
+      ```
+      {{expected_output}}
+      ```
+      ## [Failed test] Generated output
+      ```
+      {{generated_output}}
+      ```
+    all_tests_header: |2-
+      ${.issue_title}
+      The Python code does not solve the problem in the problem description due to logical errors. It fails on the following tests.
+    test_error_template: |2-
+      ## [Failed test {{idx}}]
+      ### [Failed test {{idx}}] Input
+      ```
+      {{test_input}}
+      ```
+      ### [Failed test {{idx}}] Expected output
+      ```
+      {{expected_output}}
+      ```
+      ### [Failed test {{idx}}] Generated output
+      ```
+      {{generated_output}}
+      ```
+    tests_separator: "\n\n"
+    issue_title: "# Issue with the last proposed solution"
+    feedback_title: "# Feedback on the last proposed solution"
+    no_code_template: |2-
+      ${.feedback_title}
+      The code was not provided in the correct output format specified in the request or it was not provided at all.
+    feedback_only_template: |2-
+      ${.feedback_title}
+      {{feedback_content}}
+    feedback_and_issue_template: |2-
+      {{issue_description}}
+      {{feedback_content}}

CF_CodeWithPlan.yaml CHANGED Viewed

@@ -81,12 +81,11 @@ input_keys:
 output_keys:
   - "code"
-response_annotators:
-  code_extractor:
-    _target_: flows.message_annotators.RegexFirstOccurrenceExtractor
     regex: '(?<=```python)([\s\S]*?)(?=```)'
     regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
-    key: "code"
     strip: True
     assert_unique: True
     verbose: True

 output_keys:
   - "code"
+output_data_transformations:
+  - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
     regex: '(?<=```python)([\s\S]*?)(?=```)'
     regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
+    output_key: "code"
     strip: True
     assert_unique: True
     verbose: True

CF_Plan.yaml CHANGED Viewed

@@ -75,13 +75,12 @@ input_keys:
 output_keys:
   - "plan"
-response_annotators:
-  plan_extractor:
-    _target_: flows.message_annotators.RegexFirstOccurrenceExtractor
     regex: '(?<=Conceptual solution)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
     regex_fallback:
       - '(?<=Conceptual solution:)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
-    key: "plan"
     strip: True
     assert_unique: True
     verbose: True

 output_keys:
   - "plan"
+output_data_transformations:
+  - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
     regex: '(?<=Conceptual solution)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
     regex_fallback:
       - '(?<=Conceptual solution:)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
+    output_key: "plan"
     strip: True
     assert_unique: True
     verbose: True

CF_PlanCollab.yaml CHANGED Viewed

@@ -27,7 +27,7 @@ subflows_config:
         _target_: langchain.PromptTemplate
         template: |2-
           # Feedback on the last proposed conceptual solution
-          {{query}}
           Consider the original problem statement, the last proposed solution and the provided feedback. Does the solution need to be updated? If so, provide the corrected version of the conceptual solution in the following format:
@@ -36,15 +36,23 @@ subflows_config:
           otherwise, reply:
           "Final answer."
         input_variables:
-          - query
         partial_variables:
           plan_placeholder: "{{conceptual_solution}}"
         template_format: jinja2
-      response_annotators:
-        end_of_interaction_annotator:
-          _target_: flows.message_annotators.EndOfInteraction
-          end_of_interaction_message: "Final answer"
-          key: "end_of_interaction"
           verbose: True
       output_keys:
         - "plan"
@@ -54,7 +62,7 @@ subflows_config:
     class_name: CF_PlanCritic
     overrides:
       name: PlanCritic
-      outputs_transformations:
-        - _target_: flows.outputs_transformations.Rename
           old_key2new_key:
-            raw_response: "query"

         _target_: langchain.PromptTemplate
         template: |2-
           # Feedback on the last proposed conceptual solution
+          {{plan_feedback}}
           Consider the original problem statement, the last proposed solution and the provided feedback. Does the solution need to be updated? If so, provide the corrected version of the conceptual solution in the following format:
           otherwise, reply:
           "Final answer."
         input_variables:
+          - plan_feedback
         partial_variables:
           plan_placeholder: "{{conceptual_solution}}"
         template_format: jinja2
+      default_human_input_key: "plan_feedback"
+      output_data_transformations:
+        - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
+          regex: '(?<=Conceptual solution)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
+          regex_fallback:
+            - '(?<=Conceptual solution:)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
+          output_key: "plan"
+          strip: True
+          assert_unique: True
+          verbose: True
+        - _target_: flows.data_transformations.EndOfInteraction
+          end_of_interaction_string: "Final answer"
+          output_key: "end_of_interaction"
           verbose: True
       output_keys:
         - "plan"
     class_name: CF_PlanCritic
     overrides:
       name: PlanCritic
+      output_data_transformations:
+        - _target_: flows.data_transformations.KeyRename
           old_key2new_key:
+            raw_response: "plan_feedback"

CF_PlanCritic.yaml CHANGED Viewed

@@ -74,4 +74,4 @@ input_keys:
   - "plan"
 output_keys:
-  - "query"

   - "plan"
 output_keys:
+  - "plan_feedback"

CF_PlanReflect.yaml CHANGED Viewed

@@ -26,21 +26,22 @@ subflows_config:
       response_annotators:
         end_of_interaction_annotator:
           _target_: flows.message_annotators.EndOfInteraction
-          end_of_interaction_message: "Final answer"
           key: "end_of_interaction"
           verbose: True
       output_keys:
         - "plan"
         - "end_of_interaction"
-  - _target_: flows.base_flows.FixedReplyAtomicFlow.instantiate_with_overrides
     overrides:
       name: "PlanFixedReplyCritic"
       description: "ToDo: Add description"
-      input_keys: []
-      outputs_transformations:
-        - _target_: flows.outputs_transformations.Rename
-          old_key2new_key:
-            raw_response: "query"
       fixed_reply: |2-
         Consider the problem statement and the last proposed solution. Are you sure that the solution is provided in the requested format, and crucially, solves the problem?
         If that is not the case, provide the corrected version of the conceptual solution in the following format:

       response_annotators:
         end_of_interaction_annotator:
           _target_: flows.message_annotators.EndOfInteraction
+          end_of_interaction_string: "Final answer"
           key: "end_of_interaction"
           verbose: True
       output_keys:
         - "plan"
         - "end_of_interaction"
+  - _target_: flows.flow_verse.instantiate_flow
+    repository_id: ${oc.env:CC_FLOWS}
+    class_name: CF_Reflect
     overrides:
       name: "PlanFixedReplyCritic"
       description: "ToDo: Add description"
+      input_keys:
+        - "plan"
+      output_keys:
+        - "query"
       fixed_reply: |2-
         Consider the problem statement and the last proposed solution. Are you sure that the solution is provided in the requested format, and crucially, solves the problem?
         If that is not the case, provide the corrected version of the conceptual solution in the following format:

CodeTesting.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from copy import deepcopy
+from typing import Optional, Any, List, Dict
+from flows import utils
+from flows.base_flows.abstract import AtomicFlow
+log = utils.get_pylogger(__name__)
+class CodeTesting(AtomicFlow):
+    REQUIRED_KEYS_CONFIG = []
+    REQUIRED_KEYS_KWARGS = []
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def _get_test_data(self, input_data: Dict):
+        """This function retrieves (or generates) input-output pairs that will be used to test the implementation."""
+        raise NotImplementedError()
+    def _run_tests(self, input_data: Dict, test_data: Dict) -> Dict[str, Any]:
+        raise NotImplementedError()
+    @classmethod
+    def instantiate_from_config(cls, config):
+        flow_config = deepcopy(config)
+        kwargs = {"flow_config": flow_config}
+        kwargs["input_data_transformations"] = cls._set_up_data_transformations(config["input_data_transformations"])
+        kwargs["output_data_transformations"] = cls._set_up_data_transformations(config["output_data_transformations"])
+        # ~~~ Instantiate flow ~~~
+        return cls(**kwargs)
+    def run(self,
+            input_data: Dict[str, Any],
+            private_keys: Optional[List[str]] = [],
+            keys_to_ignore_for_hash: Optional[List[str]] = []) -> Dict[str, Any]:
+        # ~~~ Retrieve the test data ~~~
+        test_data = self._get_test_data(input_data)
+        # ~~~ Run tests ~~~
+        response = self._run_tests(input_data, test_data)
+        return response
+# from typing import Optional, Any, List, Dict
+#
+# from flows.base_flows.abstract import AtomicFlow
+# from flows.utils.general_helpers import validate_parameters
+#
+# class CodeTester(AtomicFlow):
+#     REQUIRED_KEYS_CONFIG = []
+#     REQUIRED_KEYS_KWARGS = []
+#
+#     def __init__(self, **kwargs):
+#         super().__init__(**kwargs)
+#
+#     @classmethod
+#     def _validate_parameters(cls, kwargs):
+#         validate_parameters(cls, kwargs)  # is this necessary?
+#
+#     def run(self,
+#             input_data: Dict[str, Any],
+#             private_keys: Optional[List[str]] = [],
+#             keys_to_ignore_for_hash: Optional[List[str]] = []) -> Dict[str, Any]:

FixedReply_CodeReflect.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from flows.base_flows import FixedReplyAtomicFlow
+class FixedReply_CodeReflect(FixedReplyAtomicFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

FixedReply_CodeReflect.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+name: "CodeReflectCritic"
+description: "A flow that prompts the user to reflect on their code and provide a corrected version if necessary."
+verbose: True
+input_keys:
+  - "code"
+output_keys:
+  - "code_reflect_message"
+output_data_transformations:
+  - _target_: flows.data_transformations.KeyRename
+    old_key2new_key:
+      raw_response: "code_reflect_message"
+fixed_reply: |2-
+    Consider the problem statement and the last proposed solution. Are you sure that the solution is provided in the requested format, and crucially, solves the problem?
+    If that is not the case, provide the corrected version of the code in the following format:
+    ```python
+    {{python_code}}
+    ```
+    otherwise, reply:
+    "Final answer."

__init__.py CHANGED Viewed

@@ -1,16 +1,31 @@
 from .CF_Code import CF_Code
 from .CF_CodeReflect import CF_CodeReflect
 from .CF_CodeCritic import CF_CodeCritic
 from .CF_CodeCollab import CF_CodeCollab
 from .CF_Plan import CF_Plan
 from .CF_CodeWithPlan import CF_CodeWithPlan
 from .CF_Plan_Code import CF_Plan_Code
 from .CF_PlanReflect import CF_PlanReflect
 from .CF_PlanReflect_Code import CF_PlanReflect_Code
 from .CF_PlanCritic import CF_PlanCritic
 from .CF_PlanCollab import CF_PlanCollab
 from .CF_PlanCollab_Code import CF_PlanCollab_Code

+# cf-code
 from .CF_Code import CF_Code
+# cf-code_reflect
+from .FixedReply_CodeReflect import FixedReply_CodeReflect
 from .CF_CodeReflect import CF_CodeReflect
+# cf-code_collab
 from .CF_CodeCritic import CF_CodeCritic
 from .CF_CodeCollab import CF_CodeCollab
+# cf-plan-code (and cf-plan_oracle-code)
 from .CF_Plan import CF_Plan
 from .CF_CodeWithPlan import CF_CodeWithPlan
 from .CF_Plan_Code import CF_Plan_Code
+# cf-plan_reflect-code
 from .CF_PlanReflect import CF_PlanReflect
 from .CF_PlanReflect_Code import CF_PlanReflect_Code
+# cf-plan_collab-code
 from .CF_PlanCritic import CF_PlanCritic
 from .CF_PlanCollab import CF_PlanCollab
 from .CF_PlanCollab_Code import CF_PlanCollab_Code
+# cf-code_debug
+from .CF_CodeTesting import CF_CodeTesting
+from .CF_CodeDebug import CF_CodeDebug
+# from .CF_Debug import CF_Debug
+# from .CF_CodeTestDebug import CF_CodeTestDebug

src/data_transformations/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .correctness_flag import CorrectnessFlag
2	+ from .test_results_summary_generation import TestResultsSummaryGeneration

src/data_transformations/correctness_flag.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from typing import Dict, Any
+from flows.data_transformations.abstract import DataTransformation
+class CorrectnessFlag(DataTransformation):
+    def __init__(self, output_key, input_key):
+        super().__init__(output_key)
+        self.input_key = input_key
+    def __call__(self, data_dict: Dict[str, Any], **kwargs) -> Dict[str, Any]:
+        test_data = data_dict["raw_response"]
+        all_tests_passed = all([test_result["status"] for test_result in test_data[self.input_key]])
+        data_dict[self.output_key] = all_tests_passed
+        return data_dict

src/data_transformations/test_results_summary_generation.py ADDED Viewed

	@@ -0,0 +1,102 @@

+from typing import Dict, Any
+import jinja2
+from flows.data_transformations.abstract import DataTransformation
+class TestResultsSummaryGeneration(DataTransformation):
+    def __init__(self, output_key, **kwargs):
+        super().__init__(output_key)
+        self.params = kwargs
+    def __call__(self, data_dict: Dict[str, Any], **kwargs) -> Dict[str, Any]:
+        if data_dict["all_tests_passed"]:
+            # the execution did not result in any errors
+            data_dict["test_results_summary"] = self.params["no_error_template"]
+            return data_dict
+        test_data = data_dict["raw_response"]
+        if not test_data["compilation_status"]:
+            # compilation error occurred
+            kwargs = {
+                "error_message": test_data["compilation_error_message"].strip(),
+            }
+            message_content = (
+                jinja2.Environment(loader=jinja2.BaseLoader())
+                .from_string(self.params["compilation_error_template"])
+                .render(**kwargs)
+            )
+        elif test_data["timeout_error"]:
+            # timeout error occurred
+            message_content = self.params["timeout_error_template"]
+        else:
+            # code compiled successfully without timeouts
+            # retrieve the failed tests
+            failed_tests = [
+                test_result
+                for test_result in test_data["public_tests_results"]
+                if not test_result["status"]
+            ]
+            runtime_error_test = None
+            for test_result in failed_tests:
+                if test_result["generated_output"] is None:
+                    # runtime error occurred
+                    runtime_error_test = test_result
+            if runtime_error_test:
+                # construct the error message for the runtime error
+                kwargs = {
+                    "test_input": runtime_error_test["input"],
+                    "error_message": runtime_error_test["error_message"].strip(),
+                }
+                message_content = (
+                    jinja2.Environment(loader=jinja2.BaseLoader())
+                    .from_string(self.params["runtime_error_template"])
+                    .render(**kwargs)
+                )
+            else:
+                # construct the error message corresponding to a logical error
+                if self.params["single_test_error_message"]:
+                    # construct the error message for a single (the first) failed test
+                    first_failed_test = failed_tests[0]
+                    kwargs = {
+                        "test_input": first_failed_test["input"],
+                        "expected_output": first_failed_test["expected_output"],
+                        "generated_output": first_failed_test["generated_output"],
+                    }
+                    message_content = (
+                        jinja2.Environment(loader=jinja2.BaseLoader())
+                        .from_string(self.params["single_test_error_template"])
+                        .render(**kwargs)
+                    )
+                else:
+                    # construct the error message covering all failed tests
+                    parts = [self.params["all_tests_header"]]
+                    for idx, test_result in enumerate(failed_tests):
+                        kwargs = {
+                            "idx": idx + 1,
+                            "test_input": test_result["input"],
+                            "expected_output": test_result["expected_output"],
+                            "generated_output": test_result["generated_output"],
+                        }
+                        parts.append(
+                            jinja2.Environment(loader=jinja2.BaseLoader())
+                            .from_string(self.params["test_error_template"])
+                            .render(**kwargs)
+                        )
+                    message_content = self.params["tests_separator"].join(parts)
+        data_dict["test_results_summary"] = message_content
+        return data_dict