Spaces:

darylfunggg
/

analyze

Runtime error

App Files Files Community

Daryl Fung commited on Dec 5, 2023

Commit

fe2f6e9

0 Parent(s):

initial commit

Browse files

Files changed (4) hide show

Dockerfile +19 -0
clearml.conf +265 -0
main.py +48 -0
requirements.txt +5 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,19 @@

+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY clearml.conf $HOME/clearml.conf
+COPY --chown=user . $HOME/app
+CMD ["python", "main.py"]

clearml.conf ADDED Viewed

	@@ -0,0 +1,265 @@

+# ClearML SDK configuration file
+api {
+    # Notice: 'host' is the api server (default port 8008), not the web server.
+    api_server: https://api.clear.ml
+    web_server: https://app.clear.ml
+    files_server: https://files.clear.ml
+    # Credentials are generated using the webapp, https://app.clear.ml/settings
+    # Override with os environment: CLEARML_API_ACCESS_KEY / CLEARML_API_SECRET_KEY
+    credentials {"access_key": ${CLEARML_API_ACCESS_KEY}, "secret_key": ${CLEARML_API_SECRET_KEY}}
+}
+sdk {
+    # ClearML - default SDK configuration
+    storage {
+        cache {
+            # Defaults to <system_temp_folder>/clearml_cache
+            default_base_dir: "~/.clearml/cache"
+            # default_cache_manager_size: 100
+        }
+        direct_access: [
+            # Objects matching are considered to be available for direct access, i.e. they will not be downloaded
+            # or cached, and any download request will return a direct reference.
+            # Objects are specified in glob format, available for url and content_type.
+            { url: "file://*" }  # file-urls are always directly referenced
+        ]
+    }
+    metrics {
+        # History size for debug files per metric/variant. For each metric/variant combination with an attached file
+        # (e.g. debug image event), file names for the uploaded files will be recycled in such a way that no more than
+        # X files are stored in the upload destination for each metric/variant combination.
+        file_history_size: 100
+        # Max history size for matplotlib imshow files per plot title.
+        # File names for the uploaded images will be recycled in such a way that no more than
+        # X images are stored in the upload destination for each matplotlib plot title.
+        matplotlib_untitled_history_size: 100
+        # Limit the number of digits after the dot in plot reporting (reducing plot report size)
+        # plot_max_num_digits: 5
+        # Settings for generated debug images
+        images {
+            format: JPEG
+            quality: 87
+            subsampling: 0
+        }
+        # Support plot-per-graph fully matching Tensorboard behavior (i.e. if this is set to true, each series should have its own graph)
+        tensorboard_single_series_per_graph: false
+    }
+    network {
+        # Number of retries before failing to upload file
+        file_upload_retries: 3
+        metrics {
+            # Number of threads allocated to uploading files (typically debug images) when transmitting metrics for
+            # a specific iteration
+            file_upload_threads: 4
+            # Warn about upload starvation if no uploads were made in specified period while file-bearing events keep
+            # being sent for upload
+            file_upload_starvation_warning_sec: 120
+        }
+        iteration {
+            # Max number of retries when getting frames if the server returned an error (http code 500)
+            max_retries_on_server_error: 5
+            # Backoff factory for consecutive retry attempts.
+            # SDK will wait for {backoff factor} * (2 ^ ({number of total retries} - 1)) between retries.
+            retry_backoff_factor_sec: 10
+        }
+    }
+    aws {
+        s3 {
+            # S3 credentials, used for read/write access by various SDK elements
+            # The following settings will be used for any bucket not specified below in the "credentials" section
+            # ---------------------------------------------------------------------------------------------------
+            region: ""
+            # Specify explicit keys
+            key: ""
+            secret: ""
+            # Or enable credentials chain to let Boto3 pick the right credentials.
+            # This includes picking credentials from environment variables,
+            # credential file and IAM role using metadata service.
+            # Refer to the latest Boto3 docs
+            use_credentials_chain: false
+            # Additional ExtraArgs passed to boto3 when uploading files. Can also be set per-bucket under "credentials".
+            extra_args: {}
+            # ---------------------------------------------------------------------------------------------------
+            credentials: [
+                # specifies key/secret credentials to use when handling s3 urls (read or write)
+                # {
+                #     bucket: "my-bucket-name"
+                #     key: "my-access-key"
+                #     secret: "my-secret-key"
+                # },
+                # {
+                #     # This will apply to all buckets in this host (unless key/value is specifically provided for a given bucket)
+                #     host: "my-minio-host:9000"
+                #     key: "12345678"
+                #     secret: "12345678"
+                #     multipart: false
+                #     secure: false
+                # }
+            ]
+        }
+        boto3 {
+            pool_connections: 512
+            max_multipart_concurrency: 16
+            multipart_threshold: 8388608 # 8MB
+            multipart_chunksize: 8388608 # 8MB
+        }
+    }
+    google.storage {
+        # # Default project and credentials file
+        # # Will be used when no bucket configuration is found
+        # project: "clearml"
+        # credentials_json: "/path/to/credentials.json"
+        # pool_connections: 512
+        # pool_maxsize: 1024
+        # # Specific credentials per bucket and sub directory
+        # credentials = [
+        #     {
+        #         bucket: "my-bucket"
+        #         subdir: "path/in/bucket" # Not required
+        #         project: "clearml"
+        #         credentials_json: "/path/to/credentials.json"
+        #     },
+        # ]
+    }
+    azure.storage {
+        # max_connections: 2
+        # containers: [
+        #     {
+        #         account_name: "clearml"
+        #         account_key: "secret"
+        #         # container_name:
+        #     }
+        # ]
+    }
+    log {
+        # debugging feature: set this to true to make null log propagate messages to root logger (so they appear in stdout)
+        null_log_propagate: false
+        task_log_buffer_capacity: 66
+        # disable urllib info and lower levels
+        disable_urllib3_info: true
+    }
+    development {
+        # Development-mode options
+        # dev task reuse window
+        task_reuse_time_window_in_hours: 72.0
+        # Run VCS repository detection asynchronously
+        vcs_repo_detect_async: true
+        # Store uncommitted git/hg source code diff in experiment manifest when training in development mode
+        # This stores "git diff" or "hg diff" into the experiment's "script.requirements.diff" section
+        store_uncommitted_code_diff: true
+        # Support stopping an experiment in case it was externally stopped, status was changed or task was reset
+        support_stopping: true
+        # Default Task output_uri. if output_uri is not provided to Task.init, default_output_uri will be used instead.
+        default_output_uri: ""
+        # Default auto generated requirements optimize for smaller requirements
+        # If True, analyze the entire repository regardless of the entry point.
+        # If False, first analyze the entry point script, if it does not contain other to local files,
+        # do not analyze the entire repository.
+        force_analyze_entire_repo: false
+        # If set to true, *clearml* update message will not be printed to the console
+        # this value can be overwritten with os environment variable CLEARML_SUPPRESS_UPDATE_MESSAGE=1
+        suppress_update_message: false
+        # If this flag is true (default is false), instead of analyzing the code with Pigar, analyze with `pip freeze`
+        detect_with_pip_freeze: false
+        # Log specific environment variables. OS environments are listed in the "Environment" section
+        # of the Hyper-Parameters.
+        # multiple selected variables are supported including the suffix '*'.
+        # For example: "AWS_*" will log any OS environment variable starting with 'AWS_'.
+        # This value can be overwritten with os environment variable CLEARML_LOG_ENVIRONMENT="[AWS_*, CUDA_VERSION]"
+        # Example: log_os_environments: ["AWS_*", "CUDA_VERSION"]
+        log_os_environments: []
+        # Development mode worker
+        worker {
+            # Status report period in seconds
+            report_period_sec: 2
+            # The number of events to report
+            report_event_flush_threshold: 100
+            # ping to the server - check connectivity
+            ping_period_sec: 30
+            # Log all stdout & stderr
+            log_stdout: true
+            # Carriage return (\r) support. If zero (0) \r treated as \n and flushed to backend
+            # Carriage return flush support in seconds, flush consecutive line feeds (\r) every X (default: 10) seconds
+            console_cr_flush_period: 10
+            # compatibility feature, report memory usage for the entire machine
+            # default (false), report only on the running process and its sub-processes
+            report_global_mem_used: false
+            # if provided, start resource reporting after this amount of seconds
+            #report_start_sec: 30
+        }
+    }
+    # Apply top-level environment section from configuration into os.environ
+    apply_environment: false
+    # Top-level environment section is in the form of:
+    #   environment {
+    #     key: value
+    #     ...
+    #   }
+    # and is applied to the OS environment as `key=value` for each key/value pair
+    # Apply top-level files section from configuration into local file system
+    apply_files: false
+    # Top-level files section allows auto-generating files at designated paths with a predefined contents
+    # and target format. Options include:
+    #  contents: the target file's content, typically a string (or any base type int/float/list/dict etc.)
+    #  format: a custom format for the contents. Currently supported value is `base64` to automatically decode a
+    #          base64-encoded contents string, otherwise ignored
+    #  path: the target file's path, may include ~ and inplace env vars
+    #  target_format: format used to encode contents before writing into the target file. Supported values are json,
+    #                 yaml, yml and bytes (in which case the file will be written in binary mode). Default is text mode.
+    #  overwrite: overwrite the target file in case it exists. Default is true.
+    #
+    # Example:
+    #   files {
+    #     myfile1 {
+    #       contents: "The quick brown fox jumped over the lazy dog"
+    #       path: "/tmp/fox.txt"
+    #     }
+    #     myjsonfile {
+    #       contents: {
+    #         some {
+    #           nested {
+    #             value: [1, 2, 3, 4]
+    #           }
+    #         }
+    #       }
+    #       path: "/tmp/test.json"
+    #       target_format: json
+    #     }
+    #   }
+}

main.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from fastapi import FastAPI
+from clearml import Task, Dataset
+import ast
+import uvicorn
+import os
+from pycaret.classification import load_model, predict_model
+from tvDatafeed import Interval as TVInterval
+interval_dict = {i.value: i for i in TVInterval}
+# load model
+model_task = Task.get_task(os.getenv("MODEL_TASK_ID"))
+model_filename = model_task.artifacts['model'].get_local_copy()
+model = load_model(model_filename[:-4])
+# get data info
+dataset_id = model_task.data.hyperparams['Args']['dataset_id'].value
+data_params = Dataset.get(dataset_id).get_metadata()
+# create get_data definition from task
+parsed_script = ast.parse(Task.get_task(task_name='get_data').export_task()['script']['diff'])
+get_data_script = [node for node in ast.walk(parsed_script) if isinstance(node, ast.FunctionDef) and node.name == 'get_data'][0]
+get_data_module = ast.Module([get_data_script])
+get_data_module.type_ignores = []
+get_data = compile(get_data_module, filename='<string>', mode='exec')
+exec(get_data)
+# create process_data definition from task
+parsed_script = ast.parse(Task.get_task(task_name='process_data').export_task()['script']['diff'])
+process_data_script = [node for node in ast.walk(parsed_script) if isinstance(node, ast.FunctionDef) and node.name == 'process_data'][0]
+process_data_module = ast.Module([process_data_script])
+process_data_module.type_ignores = []
+process_data = compile(process_data_module, filename='<string>', mode='exec')
+exec(process_data)
+app = FastAPI()
+@app.get(f'/{data_params["exchange"]}/{data_params["symbol"]}')
+def predict():
+    data = get_data(data_params['symbol'], data_params['exchange'], data_params['interval'], 100)
+    processed_data = process_data(data, int(data_params['window_length']), int(data_params['target_length']), training=False)
+    predictions = predict_model(model, processed_data[-1:])
+    return predictions[['prediction_label', 'prediction_score']]
+if __name__ == '__main__':
+    uvicorn.run(app)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+clearml==1.13.2
+fastapi==0.104.1
+pycaret==3.2.0
+git+https://github.com/rongardF/tvdatafeed.git
+uvicorn==0.24.0.post1