Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions workflows/api_urls.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from django.conf.urls import patterns, include, url
from rest_framework import routers
from workflows import api_views
from workflows import views_integration

router = routers.DefaultRouter()
router.register(r'workflows', api_views.WorkflowViewSet)
Expand All @@ -11,6 +12,8 @@


urlpatterns = patterns('',
url(r'^', include(router.urls)),
url(r'^api-auth/', include('rest_framework.urls', namespace='rest_framework'))
)
url(r'^create_workflow[/]?$', views_integration.CreateWorkflowAPIView.as_view(), name='create_workflow'),

url(r'^', include(router.urls)),
url(r'^api-auth/', include('rest_framework.urls', namespace='rest_framework'))
)
Empty file added workflows/pdmanager/__init__.py
Empty file.
190 changes: 190 additions & 0 deletions workflows/pdmanager/library.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
import datetime
import json
import re

import numpy as np
import pandas as pd
import requests
from django.conf import settings



def auth():
us,pa=(settings.PD_MANAGER_DB_USERNAME, settings.PD_MANAGER_DB_PASSWORD)
PD_MANAGER_DB_URL = settings.PD_MANAGER_DB_URL
url = "%s/oauth/token" % PD_MANAGER_DB_URL

response = requests.post(url,
data={'username':us, 'password':pa, 'grant_type':'password'})

acc_token = json.loads(str(response.text))['access_token']
acc_token = str(acc_token)

headers_acc_token = {"Authorization": "bearer %s"%(acc_token)}
return headers_acc_token


def pandas_dataframe_to_arff(df):
import arff

d = {'relation': 'aaaa', 'attributes':[]}
data_tr = []

# attributes and data_tr
for j,c in enumerate(df.columns):
if df[c].dtype in [np.dtype('int64'), np.dtype('float64')]:
d['attributes'].append( (c, 'NUMERIC') )

else:
att_values0 = list(df[c].unique())
att_values = [v for v in att_values0 if not(pd.isnull(v))]

d['attributes'].append( (c, att_values) )

# handle missing values in column
col_data = ['?' if pd.isnull(el) else el for el in list(df[c])]
data_tr.append( col_data )

# transpose data (and np.transpose() does not work well with strings)
d['data'] = []
for i in range(len(df)):
row = []
for j in range(df.shape[1]):
row.append(data_tr[j][i])
d['data'].append(row)

arff_str = arff.dumps(d)

return arff_str


def download_dataset(ds_id, h):
url_1 = 'https://pdmanager.3dnetmedical.com/api/datasets/%s'%ds_id

response_1 = requests.get(url_1, headers=h)
print str(response_1)
if response_1.status_code != 200:
raise Exception("Error downloading data from central Db")

j = json.loads(response_1.text)

return j

def parse_observations(l_observations, column_names=None, pat_id=None, obs_type=None):
""" Parse observations

:param pat_id:
:param obs_type: eg. finger_tapping, or gait, ...
:param l_observations: list of strings of format "1498941988645, 59.0, 168.47, 304.0, 0.0;"
:param column_names: names of rest of the columns (besides time)
:return:
"""

df = []
if len(l_observations)==0:
return []
for l in l_observations:
if len(l.strip()) == 0:
break
l = l.strip()
if l.find(";")>=0:
l = l.split(";")[0]
df.append([float(v) for v in l.split(",")])

if column_names is None:
columns = ["time"] + ["var_{:02d}".format(j) for j in range(len(l.split(","))-1)]
else:
if len(l.split(",")) - 1 == len(column_names):
columns = ["time"] + column_names
else:
raise Exception("Observation parsing problem: Number of column names needed is {} while provided is {}!".format(len(l.split(",")) - 1, len(column_names)))

df = pd.DataFrame(df, columns=columns)
df['time'] = df.apply(lambda r: datetime.datetime.fromtimestamp(r["time"] / 1000.0), axis=1)
df['pat_id'] = pat_id
df['obs_type'] = obs_type

# print len(df), "obs in ", df.time.min().date(), df.time.min().date(), "\n\n"

# transform to long format
df = pd.melt(df, id_vars="pat_id time obs_type".split(), var_name="variable", value_name="value")

return df

def convert(name):
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()



def pdmanager_import_dataset(input_dict):
"""Import dataset from the main database"""

# --- dataset id, target variable and predictor variables ---------------
d_id = input_dict['ds_id']
target, target_val_prefix = ('gait_var_00', 'Gait')

# non_motor
l_nm = "fingerTappingSimple fingerTappingAlternate".split()

# motor
l_m = ["gait"]
# ------------------------------------------------------------------------

dict_json = download_dataset(d_id, auth())


num_p = len(dict_json["Values"])
print("Parsing data for {} patients".format(num_p))

df = []
if num_p >= 2:
for j, v in enumerate(dict_json["Values"]): # for each patient
pat_id = "pat_{:02d}_{}_{}".format(j, v["PatientInfo"]["gender"].replace(" ", ""),
v["PatientInfo"]["age"].replace(" ", ""))

# non_motor
if len(v["NonMotor"].keys()) > 0:
for t in l_nm:
df_ = parse_observations(v["NonMotor"][t], pat_id=pat_id, obs_type=convert(t))
df = pd.concat([df, df_]) if len(df) > 0 else df_

# motor
if len(v["Motor"].keys()) > 0:
for t in l_m:
df_ = parse_observations(v["Motor"][t], pat_id=pat_id, obs_type=convert(t))
df = pd.concat([df, df_]) if len(df) > 0 else df_

df = df.sort("pat_id time obs_type variable".split()).reset_index(drop=1)

# non_m and m data on same day
df['date'] = df['time'].dt.date

df_ = df.groupby("pat_id date obs_type variable".split()).value.max().reset_index()
df_['obs_type_var'] = df_['obs_type'] + "_" + df_['variable']
df_['pat_date'] = df_.apply(lambda r: r['pat_id'] + "_" + str(r['date']), axis=1)

df_ = df_.pivot(index="pat_date", columns="obs_type_var", values="value")

# target must not have nulls - drop all such rows
df_ = df_.dropna(subset=[target])
df_.ix[df_[target] == 0.0, target] = '{}_0'.format(target_val_prefix)
df_.ix[df_[target] == 1.0, target] = '{}_1'.format(target_val_prefix)
df_.ix[df_[target] == 2.0, target] = '{}_2'.format(target_val_prefix)
df_.ix[df_[target] == 3.0, target] = '{}_3'.format(target_val_prefix)
df_.ix[df_[target] == 4.0, target] = '{}_4'.format(target_val_prefix)

num_predictors = df_.shape[1] - 1

# drop rows for which we have no non_m data
df_['num_nulls'] = df_.transpose().isnull().sum()
df_ = df_[df_.num_nulls < num_predictors]
df_ = df_.drop(['num_nulls'], axis=1)


arff_str = pandas_dataframe_to_arff(df_)

with open("pdm_sample_dataset_%3d_rows.arff"%len(df), "w") as f:
f.write(arff_str)

return {'arff_data': str(arff_str)}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"model": "workflows.category",
"fields": {
"name": "PD_Manager",
"parent": null,
"order": 1,
"uid": "a6a59bb7-5f2c-41b7-a110-7f20a7152cd1"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
[
{
"model": "workflows.abstractwidget",
"fields": {
"category": "a6a59bb7-5f2c-41b7-a110-7f20a7152cd1",
"treeview_image": "",
"uid": "6ca99fb5-e8c4-4649-80e1-7e1872e2b819",
"windows_queue": false,
"package": "pdmanager",
"interaction_view": "",
"has_progress_bar": false,
"image": "",
"description": "Imports data from the central database of the PD_Manager project.",
"static_image": "project_logo.png",
"action": "pdmanager_import_dataset",
"visualization_view": "",
"streaming_visualization_view": "",
"post_interact_action": "",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"is_streaming": false,
"order": 1,
"name": "Import PD_Manager data"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "6ca99fb5-e8c4-4649-80e1-7e1872e2b819",
"name": "dataset_id",
"short_name": "ds",
"default": "",
"description": "The ID of the dataset to be imported",
"required": true,
"multi": false,
"parameter_type": "text",
"variable": "ds_id",
"parameter": true,
"order": 1,
"uid": "91c0e707-5a95-4a5b-b061-4133d55f95fb"
}
},
{
"model": "workflows.abstractoutput",
"fields": {
"widget": "6ca99fb5-e8c4-4649-80e1-7e1872e2b819",
"name": "Imported Data",
"short_name": "arf",
"description": "Imported data in ARFF format",
"variable": "arff_data",
"order": 1,
"uid": "4670b2b1-7bf5-4516-8a20-759df02966ea"
}
}
]
Loading