Daryl Fung commited on
Commit
fe2f6e9
·
0 Parent(s):

initial commit

Browse files
Files changed (4) hide show
  1. Dockerfile +19 -0
  2. clearml.conf +265 -0
  3. main.py +48 -0
  4. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ RUN useradd -m -u 1000 user
10
+ USER user
11
+ ENV HOME=/home/user \
12
+ PATH=/home/user/.local/bin:$PATH
13
+
14
+ WORKDIR $HOME/app
15
+ COPY clearml.conf $HOME/clearml.conf
16
+
17
+ COPY --chown=user . $HOME/app
18
+
19
+ CMD ["python", "main.py"]
clearml.conf ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ClearML SDK configuration file
2
+ api {
3
+ # Notice: 'host' is the api server (default port 8008), not the web server.
4
+ api_server: https://api.clear.ml
5
+ web_server: https://app.clear.ml
6
+ files_server: https://files.clear.ml
7
+ # Credentials are generated using the webapp, https://app.clear.ml/settings
8
+ # Override with os environment: CLEARML_API_ACCESS_KEY / CLEARML_API_SECRET_KEY
9
+ credentials {"access_key": ${CLEARML_API_ACCESS_KEY}, "secret_key": ${CLEARML_API_SECRET_KEY}}
10
+ }
11
+ sdk {
12
+ # ClearML - default SDK configuration
13
+
14
+ storage {
15
+ cache {
16
+ # Defaults to <system_temp_folder>/clearml_cache
17
+ default_base_dir: "~/.clearml/cache"
18
+ # default_cache_manager_size: 100
19
+ }
20
+
21
+ direct_access: [
22
+ # Objects matching are considered to be available for direct access, i.e. they will not be downloaded
23
+ # or cached, and any download request will return a direct reference.
24
+ # Objects are specified in glob format, available for url and content_type.
25
+ { url: "file://*" } # file-urls are always directly referenced
26
+ ]
27
+ }
28
+
29
+ metrics {
30
+ # History size for debug files per metric/variant. For each metric/variant combination with an attached file
31
+ # (e.g. debug image event), file names for the uploaded files will be recycled in such a way that no more than
32
+ # X files are stored in the upload destination for each metric/variant combination.
33
+ file_history_size: 100
34
+
35
+ # Max history size for matplotlib imshow files per plot title.
36
+ # File names for the uploaded images will be recycled in such a way that no more than
37
+ # X images are stored in the upload destination for each matplotlib plot title.
38
+ matplotlib_untitled_history_size: 100
39
+
40
+ # Limit the number of digits after the dot in plot reporting (reducing plot report size)
41
+ # plot_max_num_digits: 5
42
+
43
+ # Settings for generated debug images
44
+ images {
45
+ format: JPEG
46
+ quality: 87
47
+ subsampling: 0
48
+ }
49
+
50
+ # Support plot-per-graph fully matching Tensorboard behavior (i.e. if this is set to true, each series should have its own graph)
51
+ tensorboard_single_series_per_graph: false
52
+ }
53
+
54
+ network {
55
+ # Number of retries before failing to upload file
56
+ file_upload_retries: 3
57
+
58
+ metrics {
59
+ # Number of threads allocated to uploading files (typically debug images) when transmitting metrics for
60
+ # a specific iteration
61
+ file_upload_threads: 4
62
+
63
+ # Warn about upload starvation if no uploads were made in specified period while file-bearing events keep
64
+ # being sent for upload
65
+ file_upload_starvation_warning_sec: 120
66
+ }
67
+
68
+ iteration {
69
+ # Max number of retries when getting frames if the server returned an error (http code 500)
70
+ max_retries_on_server_error: 5
71
+ # Backoff factory for consecutive retry attempts.
72
+ # SDK will wait for {backoff factor} * (2 ^ ({number of total retries} - 1)) between retries.
73
+ retry_backoff_factor_sec: 10
74
+ }
75
+ }
76
+ aws {
77
+ s3 {
78
+ # S3 credentials, used for read/write access by various SDK elements
79
+
80
+ # The following settings will be used for any bucket not specified below in the "credentials" section
81
+ # ---------------------------------------------------------------------------------------------------
82
+ region: ""
83
+ # Specify explicit keys
84
+ key: ""
85
+ secret: ""
86
+ # Or enable credentials chain to let Boto3 pick the right credentials.
87
+ # This includes picking credentials from environment variables,
88
+ # credential file and IAM role using metadata service.
89
+ # Refer to the latest Boto3 docs
90
+ use_credentials_chain: false
91
+ # Additional ExtraArgs passed to boto3 when uploading files. Can also be set per-bucket under "credentials".
92
+ extra_args: {}
93
+ # ---------------------------------------------------------------------------------------------------
94
+
95
+
96
+ credentials: [
97
+ # specifies key/secret credentials to use when handling s3 urls (read or write)
98
+ # {
99
+ # bucket: "my-bucket-name"
100
+ # key: "my-access-key"
101
+ # secret: "my-secret-key"
102
+ # },
103
+ # {
104
+ # # This will apply to all buckets in this host (unless key/value is specifically provided for a given bucket)
105
+ # host: "my-minio-host:9000"
106
+ # key: "12345678"
107
+ # secret: "12345678"
108
+ # multipart: false
109
+ # secure: false
110
+ # }
111
+ ]
112
+ }
113
+ boto3 {
114
+ pool_connections: 512
115
+ max_multipart_concurrency: 16
116
+ multipart_threshold: 8388608 # 8MB
117
+ multipart_chunksize: 8388608 # 8MB
118
+ }
119
+ }
120
+ google.storage {
121
+ # # Default project and credentials file
122
+ # # Will be used when no bucket configuration is found
123
+ # project: "clearml"
124
+ # credentials_json: "/path/to/credentials.json"
125
+ # pool_connections: 512
126
+ # pool_maxsize: 1024
127
+
128
+ # # Specific credentials per bucket and sub directory
129
+ # credentials = [
130
+ # {
131
+ # bucket: "my-bucket"
132
+ # subdir: "path/in/bucket" # Not required
133
+ # project: "clearml"
134
+ # credentials_json: "/path/to/credentials.json"
135
+ # },
136
+ # ]
137
+ }
138
+ azure.storage {
139
+ # max_connections: 2
140
+
141
+ # containers: [
142
+ # {
143
+ # account_name: "clearml"
144
+ # account_key: "secret"
145
+ # # container_name:
146
+ # }
147
+ # ]
148
+ }
149
+
150
+ log {
151
+ # debugging feature: set this to true to make null log propagate messages to root logger (so they appear in stdout)
152
+ null_log_propagate: false
153
+ task_log_buffer_capacity: 66
154
+
155
+ # disable urllib info and lower levels
156
+ disable_urllib3_info: true
157
+ }
158
+
159
+ development {
160
+ # Development-mode options
161
+
162
+ # dev task reuse window
163
+ task_reuse_time_window_in_hours: 72.0
164
+
165
+ # Run VCS repository detection asynchronously
166
+ vcs_repo_detect_async: true
167
+
168
+ # Store uncommitted git/hg source code diff in experiment manifest when training in development mode
169
+ # This stores "git diff" or "hg diff" into the experiment's "script.requirements.diff" section
170
+ store_uncommitted_code_diff: true
171
+
172
+ # Support stopping an experiment in case it was externally stopped, status was changed or task was reset
173
+ support_stopping: true
174
+
175
+ # Default Task output_uri. if output_uri is not provided to Task.init, default_output_uri will be used instead.
176
+ default_output_uri: ""
177
+
178
+ # Default auto generated requirements optimize for smaller requirements
179
+ # If True, analyze the entire repository regardless of the entry point.
180
+ # If False, first analyze the entry point script, if it does not contain other to local files,
181
+ # do not analyze the entire repository.
182
+ force_analyze_entire_repo: false
183
+
184
+ # If set to true, *clearml* update message will not be printed to the console
185
+ # this value can be overwritten with os environment variable CLEARML_SUPPRESS_UPDATE_MESSAGE=1
186
+ suppress_update_message: false
187
+
188
+ # If this flag is true (default is false), instead of analyzing the code with Pigar, analyze with `pip freeze`
189
+ detect_with_pip_freeze: false
190
+
191
+ # Log specific environment variables. OS environments are listed in the "Environment" section
192
+ # of the Hyper-Parameters.
193
+ # multiple selected variables are supported including the suffix '*'.
194
+ # For example: "AWS_*" will log any OS environment variable starting with 'AWS_'.
195
+ # This value can be overwritten with os environment variable CLEARML_LOG_ENVIRONMENT="[AWS_*, CUDA_VERSION]"
196
+ # Example: log_os_environments: ["AWS_*", "CUDA_VERSION"]
197
+ log_os_environments: []
198
+
199
+ # Development mode worker
200
+ worker {
201
+ # Status report period in seconds
202
+ report_period_sec: 2
203
+
204
+ # The number of events to report
205
+ report_event_flush_threshold: 100
206
+
207
+ # ping to the server - check connectivity
208
+ ping_period_sec: 30
209
+
210
+ # Log all stdout & stderr
211
+ log_stdout: true
212
+
213
+ # Carriage return (\r) support. If zero (0) \r treated as \n and flushed to backend
214
+ # Carriage return flush support in seconds, flush consecutive line feeds (\r) every X (default: 10) seconds
215
+ console_cr_flush_period: 10
216
+
217
+ # compatibility feature, report memory usage for the entire machine
218
+ # default (false), report only on the running process and its sub-processes
219
+ report_global_mem_used: false
220
+
221
+ # if provided, start resource reporting after this amount of seconds
222
+ #report_start_sec: 30
223
+ }
224
+ }
225
+
226
+ # Apply top-level environment section from configuration into os.environ
227
+ apply_environment: false
228
+ # Top-level environment section is in the form of:
229
+ # environment {
230
+ # key: value
231
+ # ...
232
+ # }
233
+ # and is applied to the OS environment as `key=value` for each key/value pair
234
+
235
+ # Apply top-level files section from configuration into local file system
236
+ apply_files: false
237
+ # Top-level files section allows auto-generating files at designated paths with a predefined contents
238
+ # and target format. Options include:
239
+ # contents: the target file's content, typically a string (or any base type int/float/list/dict etc.)
240
+ # format: a custom format for the contents. Currently supported value is `base64` to automatically decode a
241
+ # base64-encoded contents string, otherwise ignored
242
+ # path: the target file's path, may include ~ and inplace env vars
243
+ # target_format: format used to encode contents before writing into the target file. Supported values are json,
244
+ # yaml, yml and bytes (in which case the file will be written in binary mode). Default is text mode.
245
+ # overwrite: overwrite the target file in case it exists. Default is true.
246
+ #
247
+ # Example:
248
+ # files {
249
+ # myfile1 {
250
+ # contents: "The quick brown fox jumped over the lazy dog"
251
+ # path: "/tmp/fox.txt"
252
+ # }
253
+ # myjsonfile {
254
+ # contents: {
255
+ # some {
256
+ # nested {
257
+ # value: [1, 2, 3, 4]
258
+ # }
259
+ # }
260
+ # }
261
+ # path: "/tmp/test.json"
262
+ # target_format: json
263
+ # }
264
+ # }
265
+ }
main.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from clearml import Task, Dataset
3
+ import ast
4
+ import uvicorn
5
+ import os
6
+ from pycaret.classification import load_model, predict_model
7
+ from tvDatafeed import Interval as TVInterval
8
+
9
+ interval_dict = {i.value: i for i in TVInterval}
10
+
11
+ # load model
12
+ model_task = Task.get_task(os.getenv("MODEL_TASK_ID"))
13
+ model_filename = model_task.artifacts['model'].get_local_copy()
14
+ model = load_model(model_filename[:-4])
15
+
16
+ # get data info
17
+ dataset_id = model_task.data.hyperparams['Args']['dataset_id'].value
18
+ data_params = Dataset.get(dataset_id).get_metadata()
19
+
20
+ # create get_data definition from task
21
+ parsed_script = ast.parse(Task.get_task(task_name='get_data').export_task()['script']['diff'])
22
+ get_data_script = [node for node in ast.walk(parsed_script) if isinstance(node, ast.FunctionDef) and node.name == 'get_data'][0]
23
+ get_data_module = ast.Module([get_data_script])
24
+ get_data_module.type_ignores = []
25
+ get_data = compile(get_data_module, filename='<string>', mode='exec')
26
+ exec(get_data)
27
+
28
+ # create process_data definition from task
29
+ parsed_script = ast.parse(Task.get_task(task_name='process_data').export_task()['script']['diff'])
30
+ process_data_script = [node for node in ast.walk(parsed_script) if isinstance(node, ast.FunctionDef) and node.name == 'process_data'][0]
31
+ process_data_module = ast.Module([process_data_script])
32
+ process_data_module.type_ignores = []
33
+ process_data = compile(process_data_module, filename='<string>', mode='exec')
34
+ exec(process_data)
35
+
36
+
37
+ app = FastAPI()
38
+
39
+ @app.get(f'/{data_params["exchange"]}/{data_params["symbol"]}')
40
+ def predict():
41
+ data = get_data(data_params['symbol'], data_params['exchange'], data_params['interval'], 100)
42
+ processed_data = process_data(data, int(data_params['window_length']), int(data_params['target_length']), training=False)
43
+ predictions = predict_model(model, processed_data[-1:])
44
+ return predictions[['prediction_label', 'prediction_score']]
45
+
46
+
47
+ if __name__ == '__main__':
48
+ uvicorn.run(app)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ clearml==1.13.2
2
+ fastapi==0.104.1
3
+ pycaret==3.2.0
4
+ git+https://github.com/rongardF/tvdatafeed.git
5
+ uvicorn==0.24.0.post1