diff --git a/workflows/api_urls.py b/workflows/api_urls.py index 17bc89a..b385706 100644 --- a/workflows/api_urls.py +++ b/workflows/api_urls.py @@ -1,6 +1,7 @@ from django.conf.urls import patterns, include, url from rest_framework import routers from workflows import api_views +from workflows import views_integration router = routers.DefaultRouter() router.register(r'workflows', api_views.WorkflowViewSet) @@ -11,6 +12,8 @@ urlpatterns = patterns('', - url(r'^', include(router.urls)), - url(r'^api-auth/', include('rest_framework.urls', namespace='rest_framework')) -) + url(r'^create_workflow[/]?$', views_integration.CreateWorkflowAPIView.as_view(), name='create_workflow'), + + url(r'^', include(router.urls)), + url(r'^api-auth/', include('rest_framework.urls', namespace='rest_framework')) + ) diff --git a/workflows/pdmanager/__init__.py b/workflows/pdmanager/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/workflows/pdmanager/library.py b/workflows/pdmanager/library.py new file mode 100644 index 0000000..5b45656 --- /dev/null +++ b/workflows/pdmanager/library.py @@ -0,0 +1,190 @@ +import datetime +import json +import re + +import numpy as np +import pandas as pd +import requests +from django.conf import settings + + + +def auth(): + us,pa=(settings.PD_MANAGER_DB_USERNAME, settings.PD_MANAGER_DB_PASSWORD) + PD_MANAGER_DB_URL = settings.PD_MANAGER_DB_URL + url = "%s/oauth/token" % PD_MANAGER_DB_URL + + response = requests.post(url, + data={'username':us, 'password':pa, 'grant_type':'password'}) + + acc_token = json.loads(str(response.text))['access_token'] + acc_token = str(acc_token) + + headers_acc_token = {"Authorization": "bearer %s"%(acc_token)} + return headers_acc_token + + +def pandas_dataframe_to_arff(df): + import arff + + d = {'relation': 'aaaa', 'attributes':[]} + data_tr = [] + + # attributes and data_tr + for j,c in enumerate(df.columns): + if df[c].dtype in [np.dtype('int64'), np.dtype('float64')]: + d['attributes'].append( (c, 'NUMERIC') ) + + else: + att_values0 = list(df[c].unique()) + att_values = [v for v in att_values0 if not(pd.isnull(v))] + + d['attributes'].append( (c, att_values) ) + + # handle missing values in column + col_data = ['?' if pd.isnull(el) else el for el in list(df[c])] + data_tr.append( col_data ) + + # transpose data (and np.transpose() does not work well with strings) + d['data'] = [] + for i in range(len(df)): + row = [] + for j in range(df.shape[1]): + row.append(data_tr[j][i]) + d['data'].append(row) + + arff_str = arff.dumps(d) + + return arff_str + + +def download_dataset(ds_id, h): + url_1 = 'https://pdmanager.3dnetmedical.com/api/datasets/%s'%ds_id + + response_1 = requests.get(url_1, headers=h) + print str(response_1) + if response_1.status_code != 200: + raise Exception("Error downloading data from central Db") + + j = json.loads(response_1.text) + + return j + +def parse_observations(l_observations, column_names=None, pat_id=None, obs_type=None): + """ Parse observations + + :param pat_id: + :param obs_type: eg. finger_tapping, or gait, ... + :param l_observations: list of strings of format "1498941988645, 59.0, 168.47, 304.0, 0.0;" + :param column_names: names of rest of the columns (besides time) + :return: + """ + + df = [] + if len(l_observations)==0: + return [] + for l in l_observations: + if len(l.strip()) == 0: + break + l = l.strip() + if l.find(";")>=0: + l = l.split(";")[0] + df.append([float(v) for v in l.split(",")]) + + if column_names is None: + columns = ["time"] + ["var_{:02d}".format(j) for j in range(len(l.split(","))-1)] + else: + if len(l.split(",")) - 1 == len(column_names): + columns = ["time"] + column_names + else: + raise Exception("Observation parsing problem: Number of column names needed is {} while provided is {}!".format(len(l.split(",")) - 1, len(column_names))) + + df = pd.DataFrame(df, columns=columns) + df['time'] = df.apply(lambda r: datetime.datetime.fromtimestamp(r["time"] / 1000.0), axis=1) + df['pat_id'] = pat_id + df['obs_type'] = obs_type + + # print len(df), "obs in ", df.time.min().date(), df.time.min().date(), "\n\n" + + # transform to long format + df = pd.melt(df, id_vars="pat_id time obs_type".split(), var_name="variable", value_name="value") + + return df + +def convert(name): + s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() + + + +def pdmanager_import_dataset(input_dict): + """Import dataset from the main database""" + + # --- dataset id, target variable and predictor variables --------------- + d_id = input_dict['ds_id'] + target, target_val_prefix = ('gait_var_00', 'Gait') + + # non_motor + l_nm = "fingerTappingSimple fingerTappingAlternate".split() + + # motor + l_m = ["gait"] + # ------------------------------------------------------------------------ + + dict_json = download_dataset(d_id, auth()) + + + num_p = len(dict_json["Values"]) + print("Parsing data for {} patients".format(num_p)) + + df = [] + if num_p >= 2: + for j, v in enumerate(dict_json["Values"]): # for each patient + pat_id = "pat_{:02d}_{}_{}".format(j, v["PatientInfo"]["gender"].replace(" ", ""), + v["PatientInfo"]["age"].replace(" ", "")) + + # non_motor + if len(v["NonMotor"].keys()) > 0: + for t in l_nm: + df_ = parse_observations(v["NonMotor"][t], pat_id=pat_id, obs_type=convert(t)) + df = pd.concat([df, df_]) if len(df) > 0 else df_ + + # motor + if len(v["Motor"].keys()) > 0: + for t in l_m: + df_ = parse_observations(v["Motor"][t], pat_id=pat_id, obs_type=convert(t)) + df = pd.concat([df, df_]) if len(df) > 0 else df_ + + df = df.sort("pat_id time obs_type variable".split()).reset_index(drop=1) + + # non_m and m data on same day + df['date'] = df['time'].dt.date + + df_ = df.groupby("pat_id date obs_type variable".split()).value.max().reset_index() + df_['obs_type_var'] = df_['obs_type'] + "_" + df_['variable'] + df_['pat_date'] = df_.apply(lambda r: r['pat_id'] + "_" + str(r['date']), axis=1) + + df_ = df_.pivot(index="pat_date", columns="obs_type_var", values="value") + + # target must not have nulls - drop all such rows + df_ = df_.dropna(subset=[target]) + df_.ix[df_[target] == 0.0, target] = '{}_0'.format(target_val_prefix) + df_.ix[df_[target] == 1.0, target] = '{}_1'.format(target_val_prefix) + df_.ix[df_[target] == 2.0, target] = '{}_2'.format(target_val_prefix) + df_.ix[df_[target] == 3.0, target] = '{}_3'.format(target_val_prefix) + df_.ix[df_[target] == 4.0, target] = '{}_4'.format(target_val_prefix) + + num_predictors = df_.shape[1] - 1 + + # drop rows for which we have no non_m data + df_['num_nulls'] = df_.transpose().isnull().sum() + df_ = df_[df_.num_nulls < num_predictors] + df_ = df_.drop(['num_nulls'], axis=1) + + + arff_str = pandas_dataframe_to_arff(df_) + + with open("pdm_sample_dataset_%3d_rows.arff"%len(df), "w") as f: + f.write(arff_str) + + return {'arff_data': str(arff_str)} diff --git a/workflows/pdmanager/package_data/categories/a6a59bb7-5f2c-41b7-a110-7f20a7152cd1.json b/workflows/pdmanager/package_data/categories/a6a59bb7-5f2c-41b7-a110-7f20a7152cd1.json new file mode 100644 index 0000000..390d667 --- /dev/null +++ b/workflows/pdmanager/package_data/categories/a6a59bb7-5f2c-41b7-a110-7f20a7152cd1.json @@ -0,0 +1,9 @@ +{ + "model": "workflows.category", + "fields": { + "name": "PD_Manager", + "parent": null, + "order": 1, + "uid": "a6a59bb7-5f2c-41b7-a110-7f20a7152cd1" + } +} \ No newline at end of file diff --git a/workflows/pdmanager/package_data/widgets/6ca99fb5-e8c4-4649-80e1-7e1872e2b819.json b/workflows/pdmanager/package_data/widgets/6ca99fb5-e8c4-4649-80e1-7e1872e2b819.json new file mode 100644 index 0000000..3a3ba55 --- /dev/null +++ b/workflows/pdmanager/package_data/widgets/6ca99fb5-e8c4-4649-80e1-7e1872e2b819.json @@ -0,0 +1,56 @@ +[ + { + "model": "workflows.abstractwidget", + "fields": { + "category": "a6a59bb7-5f2c-41b7-a110-7f20a7152cd1", + "treeview_image": "", + "uid": "6ca99fb5-e8c4-4649-80e1-7e1872e2b819", + "windows_queue": false, + "package": "pdmanager", + "interaction_view": "", + "has_progress_bar": false, + "image": "", + "description": "Imports data from the central database of the PD_Manager project.", + "static_image": "project_logo.png", + "action": "pdmanager_import_dataset", + "visualization_view": "", + "streaming_visualization_view": "", + "post_interact_action": "", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "is_streaming": false, + "order": 1, + "name": "Import PD_Manager data" + } + }, + { + "model": "workflows.abstractinput", + "fields": { + "widget": "6ca99fb5-e8c4-4649-80e1-7e1872e2b819", + "name": "dataset_id", + "short_name": "ds", + "default": "", + "description": "The ID of the dataset to be imported", + "required": true, + "multi": false, + "parameter_type": "text", + "variable": "ds_id", + "parameter": true, + "order": 1, + "uid": "91c0e707-5a95-4a5b-b061-4133d55f95fb" + } + }, + { + "model": "workflows.abstractoutput", + "fields": { + "widget": "6ca99fb5-e8c4-4649-80e1-7e1872e2b819", + "name": "Imported Data", + "short_name": "arf", + "description": "Imported data in ARFF format", + "variable": "arff_data", + "order": 1, + "uid": "4670b2b1-7bf5-4516-8a20-759df02966ea" + } + } +] \ No newline at end of file diff --git a/workflows/pdmanager/predefined_workflow.json b/workflows/pdmanager/predefined_workflow.json new file mode 100644 index 0000000..71552f6 --- /dev/null +++ b/workflows/pdmanager/predefined_workflow.json @@ -0,0 +1,265 @@ +{ + "widgets": [ + { + "inputs": [ + { + "name": "Params", + "short_name": "par", + "inner_output": null, + "multi_id": 0, + "required": false, + "value": "", + "options": [], + "parameter_type": "text", + "variable": "params", + "pk": 44, + "outer_output": null, + "parameter": true, + "order": 1, + "description": "" + } + ], + "name": "Decision Tree - J48", + "abstract_widget": "90e92fe2-5899-47da-bb5a-1f6fd5755629", + "workflow": null, + "outputs": [ + { + "name": "J48 Learner", + "short_name": "lrn", + "outer_input": null, + "variable": "J48_learner", + "pk": 36, + "inner_input": null, + "order": 1, + "description": "" + } + ], + "abstract_widget_package": "weka_local", + "y": 29, + "x": 30, + "type": "regular" + }, + { + "inputs": [ + { + "name": "Learner", + "short_name": "lea", + "inner_output": null, + "multi_id": 0, + "required": false, + "value": null, + "options": [], + "parameter_type": null, + "variable": "learner", + "pk": 45, + "outer_output": null, + "parameter": false, + "order": 1, + "description": "used to build the classifier.\r\n\r\nProvided by:\r\na Weka Classification widget" + }, + { + "name": "Instances", + "short_name": "ins", + "inner_output": null, + "multi_id": 0, + "required": false, + "value": null, + "options": [], + "parameter_type": null, + "variable": "instances", + "pk": 46, + "outer_output": null, + "parameter": false, + "order": 2, + "description": "to be used by the provided learner to create a classifier.\r\n\r\nProvided by:\r\nArff to Weka Instances" + } + ], + "name": "Build Classifier", + "abstract_widget": "4ff5974f-495a-424f-ba5c-c86afa766825", + "workflow": null, + "outputs": [ + { + "name": "Classifier", + "short_name": "cla", + "outer_input": null, + "variable": "classifier", + "pk": 37, + "inner_input": null, + "order": 1, + "description": "based on the provided data and learner.\r\n\r\nUsed by:\r\nWeka Apply Classifier" + } + ], + "abstract_widget_package": "weka_local", + "y": 53, + "x": 511, + "type": "regular" + }, + { + "inputs": [ + { + "name": "Classifier", + "short_name": "cls", + "inner_output": null, + "multi_id": 0, + "required": false, + "value": null, + "options": [], + "parameter_type": null, + "variable": "classifier", + "pk": 47, + "outer_output": null, + "parameter": false, + "order": 1, + "description": "" + }, + { + "name": "Image type", + "short_name": "imt", + "inner_output": null, + "multi_id": 0, + "required": false, + "value": "vector", + "options": [ + { + "name": "Raster image (PNG file)", + "value": "raster" + }, + { + "name": "Vector image (SVG file)", + "value": "vector" + } + ], + "parameter_type": "select", + "variable": "img_type", + "pk": 48, + "outer_output": null, + "parameter": true, + "order": 1, + "description": "Whether to create a raster (PNG) or vector (SVG) image" + } + ], + "name": "Display WEKA Decision Tree", + "abstract_widget": "3a2411b7-9e72-418d-81b1-4009e714e46b", + "workflow": null, + "outputs": [], + "abstract_widget_package": "weka_local", + "y": 52, + "x": 758, + "type": "regular" + }, + { + "inputs": [ + { + "name": "arff file", + "short_name": "arf", + "inner_output": null, + "multi_id": 0, + "required": false, + "value": null, + "options": [], + "parameter_type": null, + "variable": "arff", + "pk": 49, + "outer_output": null, + "parameter": false, + "order": 1, + "description": "" + }, + { + "name": "Class index", + "short_name": "cla", + "inner_output": null, + "multi_id": 0, + "required": false, + "value": "", + "options": [], + "parameter_type": "text", + "variable": "class_index", + "pk": 50, + "outer_output": null, + "parameter": true, + "order": 1, + "description": "" + } + ], + "name": "Arff to Weka Instances", + "abstract_widget": "50ea6b8c-c528-433b-9062-a62691902103", + "workflow": null, + "outputs": [ + { + "name": "Instances", + "short_name": "ins", + "outer_input": null, + "variable": "instances", + "pk": 38, + "inner_input": null, + "order": 1, + "description": "" + } + ], + "abstract_widget_package": "weka_local", + "y": 191, + "x": 283, + "type": "regular" + }, + { + "inputs": [ + { + "name": "dataset_id", + "short_name": "ds", + "inner_output": null, + "multi_id": 0, + "required": true, + "value": "", + "options": [], + "parameter_type": "text", + "variable": "ds_id", + "pk": 63, + "outer_output": null, + "parameter": true, + "order": 1, + "description": "The ID of the dataset to be imported" + } + ], + "name": "Import PD_Manager data", + "abstract_widget": "6ca99fb5-e8c4-4649-80e1-7e1872e2b819", + "workflow": null, + "outputs": [ + { + "name": "Imported Data", + "short_name": "arf", + "outer_input": null, + "variable": "arff_data", + "pk": 53, + "inner_input": null, + "order": 1, + "description": "Imported data in ARFF format" + } + ], + "abstract_widget_package": "pdmanager", + "y": 230, + "x": 23, + "type": "regular" + } + ], + "connections": [ + { + "output_id": 38, + "input_id": 46 + }, + { + "output_id": 36, + "input_id": 45 + }, + { + "output_id": 37, + "input_id": 47 + }, + { + "output_id": 53, + "input_id": 49 + } + ], + "name": "PD_Manager Integration Workflow - user not defined", + "description": "" +} diff --git a/workflows/pdmanager/static/pdmanager/icons/treeview/project_logo.png b/workflows/pdmanager/static/pdmanager/icons/treeview/project_logo.png new file mode 100644 index 0000000..2780f25 Binary files /dev/null and b/workflows/pdmanager/static/pdmanager/icons/treeview/project_logo.png differ diff --git a/workflows/pdmanager/static/pdmanager/icons/widget/project_logo.png b/workflows/pdmanager/static/pdmanager/icons/widget/project_logo.png new file mode 100644 index 0000000..2780f25 Binary files /dev/null and b/workflows/pdmanager/static/pdmanager/icons/widget/project_logo.png differ diff --git a/workflows/views_integration.py b/workflows/views_integration.py new file mode 100644 index 0000000..0ca53db --- /dev/null +++ b/workflows/views_integration.py @@ -0,0 +1,68 @@ +import json + +from django.conf import settings +from django.contrib.auth.models import User +from django.shortcuts import redirect +from rest_framework.views import APIView + +from .models import Workflow + + +# ==================== +# Helper classes +# ==================== +class CreateWorkflow(object): + """ + Helper class for creating a workflow for an existing CF user + """ + + def __init__(self, username): + self.username = username + u_l = User.objects.filter(username=username) + self.user = u_l[0] + pass + + def create_workflow(self): + # Create a workflow + + json_data = json.loads(open("workflows/pdmanager/predefined_workflow.json").read()) + + w = Workflow() + w.user = self.user + w.public = False + w.import_from_json(json_data,{},{}) + w.name = "PD_Manager Integration Workflow - %s" % (self.user.username) + w.save() + + w_import = w.widgets.filter(name__icontains="import")[0] + i_ds = w_import.inputs.all()[0] + i_ds.value = settings.PD_MANAGER_SECRET + i_ds.save() + + return w.id + + +# ==================== +# API Views +# ==================== + +class CreateWorkflowAPIView(APIView): + """ + An API view which creates a predifened workflow + """ + + authentication_classes = () + permission_classes = () + + def get(self, request, *args, **kw): + + username = settings.PD_MANAGER_CF_USERNAME + data = {'secret': request.GET.get('secret', None)} + + if data['secret'] == settings.PD_MANAGER_SECRET: + registerHelperClass = CreateWorkflow(username) + workflow_id = registerHelperClass.create_workflow() + + new_workflow = Workflow.objects.get(id=workflow_id) + + return redirect(new_workflow.get_absolute_url())