function identify_best_suitable_datatype() { column_values_initially.forEach(function (column) { let is_numerical = false; if (predefined_data_types[id_data_type__categorical].indexOf(column[key_id]) > -1) { let copied_column = column; copied_column[key_data_type] = id_data_type__categorical; format_datatypes_regarding_datatype([copied_column]); column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === copied_column[key_id])] = get_descriptive_statistical_measures([column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === copied_column[key_id])]])[0]; column_values_filtered[column_values_filtered.findIndex(col => col[key_id] === copied_column[key_id])] = column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === copied_column[key_id])]; } else if (predefined_data_types[id_data_type__date].indexOf(column[key_id]) > -1) { let copied_column = column; copied_column[key_data_type] = id_data_type__date; format_datatypes_regarding_datatype([copied_column]); column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === copied_column[key_id])] = copied_column;//get_descriptive_statistical_measures([column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === copied_column[key_id])]])[0]; column_values_filtered[column_values_filtered.findIndex(col => col[key_id] === copied_column[key_id])] = column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === copied_column[key_id])]; } else if (predefined_data_types[id_data_type__numerical].indexOf(column[key_id]) > -1) { is_numerical = true; } else if (column[key_data_type] === id_data_type__categorical) { let copied_column = JSON.parse(JSON.stringify(column)); copied_column[key_data_type] = id_data_type__numerical; format_datatypes_regarding_datatype([copied_column]); let unique_values = copied_column[key_column_values].filter((x, i, a) => a.indexOf(x) == i); unique_values = unique_values.filter(unique_value => unique_value !== undefined && unique_value !== "" && unique_value !== "undefined"); // check for just 0 and 1 values if (unique_values.length === 2 && unique_values.indexOf(0) > -1 && unique_values.indexOf(1) > -1) { column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === copied_column[key_id])][key_column_values] = copied_column[key_column_values]; column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === copied_column[key_id])] = get_descriptive_statistical_measures([column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === copied_column[key_id])]])[0]; column_values_filtered[column_values_filtered.findIndex(col => col[key_id] === copied_column[key_id])] = column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === copied_column[key_id])]; } else if (is_numerical || (!isNaN(copied_column.descriptive_statistics[statistics_key__mean_value]) && unique_values.length > 2)) { d3.select('#' + copied_column[key_id] + id_drop_down_data_type_ending).selectAll("option").property("selected", function (d) { return d === id_data_type__numerical; }); column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === copied_column[key_id])] = copied_column; column_values_filtered[column_values_filtered.findIndex(col => col[key_id] === copied_column[key_id])] = copied_column; } } }); } function format_datatype_of_column_and_save(copied_column) { format_datatypes_regarding_datatype([copied_column]); column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === copied_column[key_id])] = copied_column; column_values_filtered[column_values_filtered.findIndex(col => col[key_id] === copied_column[key_id])] = copied_column; } function autoremove_outliers() { column_values_cleaned.forEach(function (column) { let copied_column = JSON.parse(JSON.stringify(column)); if (column[key_data_type] === id_data_type__categorical) { let outliers = copied_column.descriptive_statistics[statistics_key__categories].filter(value => value[statistics_key__relativeFrequency] < threshold_categorical_outlier); copied_column[key_column_values].forEach(function (value, index) { if (outliers.findIndex(obj => obj[statistics_key__unique_value] === value) > -1) { copied_column[key_column_values][index] = undefined; } }); copied_column = get_descriptive_statistical_measures([copied_column])[0]; column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === column[key_id])] = JSON.parse(JSON.stringify(copied_column)); column_values_filtered[column_values_filtered.findIndex(col => col[key_id] === column[key_id])] = JSON.parse(JSON.stringify(copied_column)); } else if (column[key_data_type] === id_data_type__numerical || column[key_data_type] === id_data_type__date) { // ToDO check for other descriptive statistics, it could be that everything is null in the end and this is not right copied_column[key_column_values] = copied_column.descriptive_statistics[statistics_key__outliers_removed]; copied_column = get_descriptive_statistical_measures([copied_column])[0]; column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === column[key_id])] = JSON.parse(JSON.stringify(copied_column)); column_values_filtered[column_values_filtered.findIndex(col => col[key_id] === column[key_id])] = JSON.parse(JSON.stringify(copied_column)); column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === column[key_id])] = get_descriptive_statistical_measures([column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === column[key_id])]])[0]; column_values_filtered[column_values_filtered.findIndex(col => col[key_id] === column[key_id])] = JSON.parse(JSON.stringify([column_values_cleaned[column_values_cleaned.findIndex(col => col[key_id] === column[key_id])]][0])); } }); } function compute_get_data_cleansing_changes() { //TODO use this for sorting the views in data cleansing popup view let columns_data_cleansing_changes_sorted = JSON.parse(JSON.stringify(column_values_cleaned)); columns_data_cleansing_changes_sorted.sort((a, b) => (a[changes_key_datatype_change] < b[changes_key_datatype_change]) ? 1 : (a[changes_key_datatype_change] === b[changes_key_datatype_change]) ? ((a[key_removed_during_data_formatting].length < b[key_removed_during_data_formatting].length) ? 1 : -1) : -1); return columns_data_cleansing_changes_sorted; }