from importlib.resources import files
from os import listdir
import requests
import numpy as np
import pandas as pd
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
[docs]
def getExampleBlobData(numSamples):
"""Get example data set with circle distribution.
Returns
-------
Xscaled : np.ndarray
Scaled dataset.
"""
X, _ = make_blobs(n_samples=numSamples, centers=1, cluster_std=30, random_state=0)
sc = StandardScaler()
Xscaled = sc.fit_transform(X)
return Xscaled
[docs]
def getExampleSquareData(numSamples):
"""Get example data set with uniform square distribution.
Returns
-------
df : pd.DataFrame
Dataset.
"""
x = np.random.uniform(-10, 10, numSamples)
y = np.random.uniform(-10, 10, numSamples)
X = np.array([[xi, yi] for xi, yi in zip(x, y)])
sc = StandardScaler()
Xscaled = sc.fit_transform(X)
return Xscaled
[docs]
def getCaseStudyData():
"""Get case study data set (skeleton).
Returns
-------
Xscaled : np.ndarray
Scaled dataset.
"""
# Load the data
r = requests.get('http://jse.amstat.org/datasets/body.dat.txt')
data = np.array(list(map(lambda x: list(map(float, x.split())), r.text.splitlines())))
# Order the columns in the similar order that appears in previous studies.
columns = ['AnkleDiam', 'KneeDiam', 'WristDiam', 'Bitro', 'Biil', 'ElbowDiam', 'ChestDiam', 'ChestDp', 'Biac', 'Height', 'Gender']
selectedCols = [8, 7, 6, 2, 1, 5, 4, 3, 0, 23, 24]
df = pd.DataFrame(data[:, selectedCols], columns=columns)
# Map the entries in the gender column into strings
gender = {1.0: 'male', 0.0: 'female'}
df['Gender'] = df.apply(lambda row: gender[row['Gender']], axis = 1)
# Generate the feature set
featNames = ['AnkleDiam', 'KneeDiam', 'WristDiam', 'Bitro', 'Biil', 'ElbowDiam', 'ChestDiam', 'ChestDp', 'Biac', 'Height']
X = df[featNames].values
# Standardise all features
sc = StandardScaler()
Xscaled = sc.fit_transform(X)
return Xscaled