forked from Desbordante/desbordante-core
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
d3941bb
commit 3d1cf76
Showing
4 changed files
with
341 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
year,position,percent | ||
2020,director,10% | ||
2020,other,50% | ||
2020,manager,40% | ||
2021,manager,35% | ||
2021,other,55% | ||
2021,director,10% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
year,employee_grade,avg_salary | ||
2020,24,1000 | ||
2020,40,7000 | ||
2020,32,5000 | ||
2020,29,3000 | ||
2020,49,10000 | ||
2021,50,15000 | ||
2021,25,1500 | ||
2021,30,6000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
import desbordante | ||
import pandas | ||
from tabulate import tabulate | ||
|
||
TABLE = 'examples/datasets/salary.csv' | ||
TIME_LIMIT_SECONDS = 3 | ||
|
||
def print_data_frame(data_frame, title = None): | ||
print_table(data_frame, 'keys', title) | ||
|
||
def print_table(table, headers = None, title = None): | ||
if title is not None: | ||
print(title) | ||
|
||
print(tabulate(table, headers=headers, tablefmt='psql')) | ||
|
||
def print_attribute_symbols(table): | ||
print('Attribute symbols:') | ||
|
||
counter = 1 | ||
|
||
for column in table: | ||
print(f'{column} -- {counter}') | ||
counter += 1 | ||
|
||
def print_desc_ods_with_comments(desc_ods): | ||
print('descending ods:', len(desc_ods)) | ||
|
||
def print_asc_ods_with_comments(asc_ods, table): | ||
print('ascending ods:', len(asc_ods)) | ||
|
||
for od in asc_ods: | ||
print(od) | ||
|
||
print() | ||
print(f'Dependency "{asc_ods[0]}" means that ordering the table') | ||
print('inside each equivalence class from "year" by attribute "avg_salary"') | ||
print('automatically entails ordering by attribute "employee_grade".') | ||
|
||
print() | ||
print('We have 2 equivalence classes in "year": [2020] and [2021].') | ||
print('Let\'s split the table into two tables based on these classes.') | ||
|
||
table_part1 = table.iloc[:5,:] | ||
table_part2 = table.iloc[5:,:] | ||
|
||
print() | ||
print_data_frame(table_part1, 'Part 1: this part of table corresponds to class [2020]') | ||
|
||
print() | ||
print('Let\'s sort it by attribute "avg_salary".') | ||
|
||
table_part1_sorted = table_part1.sort_values('avg_salary') | ||
|
||
print() | ||
print_data_frame(table_part1_sorted, 'Sorted part 1:') | ||
|
||
print() | ||
print('We can see that this sort entails automatic ordering by') | ||
print('attribute "employee_grade".') | ||
|
||
print() | ||
print_data_frame(table_part2, 'Part 2: this part of table corresponds to class [2021]') | ||
|
||
print() | ||
print('Let\'s sort it by attribute "avg_salary".') | ||
|
||
table_part2_sorted = table_part2.sort_values('avg_salary') | ||
|
||
print() | ||
print_data_frame(table_part2_sorted, 'Sorted part 2:') | ||
|
||
print() | ||
print('We can see that this sort entails automatic ordering by') | ||
print('attribute "employee_grade" too.') | ||
|
||
print() | ||
print(f'Dependency "{asc_ods[1]}" is similar to the first and means that') | ||
print('ordering the table inside each equivalence class from "year" by') | ||
print('attribute "employee_grade" automatically entails ordering by') | ||
print('attribute "avg_salary". This can be seen in the tables above.') | ||
|
||
print() | ||
print('In other words, these dependencies indicate that the ordering of') | ||
print('average salary entails an automatic ordering of the employee grade') | ||
print('and vice versa.') | ||
|
||
def print_simple_ods_with_comments(simple_ods, table): | ||
print('simple ods:', len(simple_ods)) | ||
|
||
for od in simple_ods: | ||
print(od) | ||
|
||
print() | ||
print('These dependencies mean that inside each equivalence class from') | ||
print('an attribute from their context the constancy of the attribute') | ||
print('from the right side of the dependency can be traced.') | ||
|
||
employee_grade_classes = [f'[{i}]' for i in table['employee_grade']] | ||
employee_grade_classes_str = ', '.join(employee_grade_classes) | ||
|
||
print() | ||
print(f'For example, let\'s look at "{simple_ods[0]}". The context of this') | ||
print('dependency is attribute "employee_grade". We have 8 equivalence classes') | ||
print(f'in "employee_grade": {employee_grade_classes_str}.') | ||
print('Since all the elements of attribute "employee_grade" are different,') | ||
print('each of these classes contains only one element, so constancy within') | ||
print('each class occurs automatically.') | ||
|
||
print() | ||
print('To better understand such dependencies, refer to the second example.') | ||
|
||
if __name__ == '__main__': | ||
algo = desbordante.od.algorithms.Fastod() | ||
algo.load_data(table=(TABLE, ',', True)) | ||
algo.execute(time_limit=TIME_LIMIT_SECONDS) | ||
|
||
asc_ods = algo.get_asc_ods() | ||
desc_ods = algo.get_desc_ods() | ||
simple_ods = algo.get_simple_ods() | ||
|
||
table = pandas.read_csv(TABLE) | ||
|
||
print_data_frame(table) | ||
print() | ||
print_attribute_symbols(table) | ||
print() | ||
print_desc_ods_with_comments(desc_ods) | ||
print() | ||
print_asc_ods_with_comments(asc_ods, table) | ||
print() | ||
print_simple_ods_with_comments(simple_ods, table) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
import desbordante | ||
import pandas | ||
from tabulate import tabulate | ||
|
||
TABLE = 'examples/datasets/position_distribution.csv' | ||
TIME_LIMIT_SECONDS = 3 | ||
|
||
COLOR_CODES = { | ||
'bold_underline_red': '\u001b[1;4;31m', | ||
'bold_red': '\u001b[1;31m', | ||
'bold_green': '\033[1;32m', | ||
'bold_yellow': '\033[1;33m', | ||
'bold_blue': '\033[1;34m', | ||
'bold_magenta': '\033[1;35m', | ||
'bold_cyan': '\033[1;36m', | ||
'bold_white': '\033[1;37m', | ||
'red': '\u001b[31m', | ||
'green': '\033[32m', | ||
'yellow': '\033[33m', | ||
'blue': '\033[34m', | ||
'magenta': '\033[35m', | ||
'cyan': '\033[36m', | ||
'white': '\033[37m', | ||
'default': '\033[0m' | ||
} | ||
|
||
def make_text_colored(text, color): | ||
return f'{COLOR_CODES[color]}{text}{COLOR_CODES["default"]}' | ||
|
||
def print_data_frame(data_frame, title = None): | ||
print_table(data_frame, 'keys', title) | ||
|
||
def print_table(table, headers = None, title = None): | ||
if title is not None: | ||
print(title) | ||
|
||
print(tabulate(table, headers=headers, tablefmt='psql')) | ||
|
||
def print_attribute_symbols(table): | ||
print('Attribute symbols:') | ||
|
||
counter = 1 | ||
|
||
for column in table: | ||
print(f'{column} -- {counter}') | ||
counter += 1 | ||
|
||
def print_desc_ods_with_comments(desc_ods): | ||
print('descending ods:', len(desc_ods)) | ||
|
||
def print_asc_ods_with_comments(asc_ods, table): | ||
print('ascending ods:', len(asc_ods)) | ||
|
||
for od in asc_ods: | ||
print(od) | ||
|
||
print() | ||
print(f'Dependency "{asc_ods[0]}" means that ordering the table by attribute') | ||
print('"percent" automatically entails ordering by attribute "position".') | ||
print('Moreover, this is observed regardless of other attributes, since the') | ||
print('dependency context is empty.') | ||
|
||
print() | ||
print('Let\'s sort it by attribute "percent".') | ||
|
||
table_sorted = table.sort_values('percent') | ||
|
||
print() | ||
print_data_frame(table_sorted, 'Sorted table:') | ||
|
||
print() | ||
print('We can see that this sort entails automatic ordering by attribute') | ||
print('"position".') | ||
|
||
print() | ||
print(f'Dependency "{asc_ods[1]}" is similar to the first and means that') | ||
print('ordering the table by attribute "position" automatically entails') | ||
print('ordering by attribute "percent". This can be seen in the table above.') | ||
|
||
print() | ||
print('In other words, these dependencies indicate that the ordering of') | ||
print('percents entails an automatic ordering of the positions and vice') | ||
print('versa.') | ||
|
||
def print_simple_ods_with_comments(simple_ods, table): | ||
print('simple ods:', len(simple_ods)) | ||
|
||
for od in simple_ods: | ||
print(od) | ||
|
||
print() | ||
print(f'Dependency "{simple_ods[0]}" means that inside each equivalence') | ||
print('class from "percent" the constancy of the attribute "position" can') | ||
print('be traced.') | ||
|
||
percent_values = list(table['percent']) | ||
percent_classes = set([f'class [{i}] with {percent_values.count(i)} element{"" if percent_values.count(i) == 1 else "s"}' | ||
for i in percent_values]) | ||
|
||
print() | ||
print('We have 5 equivalence classes in "percent":') | ||
|
||
for c in percent_classes: | ||
print(c) | ||
|
||
print() | ||
print('This table shows the constancy of values from attribute "position"') | ||
print('within each equivalence class from "percent". For clarity, lines') | ||
print('containing different equivalence classes are colored differently.') | ||
|
||
table1_headers = [i for i in table] | ||
table1_rows = [list(str(i) for i in r) for r in table.values] | ||
|
||
table1_rows[0] = [make_text_colored(i, 'bold_red') for i in table1_rows[0]] | ||
table1_rows[1] = [make_text_colored(i, 'green') for i in table1_rows[1]] | ||
table1_rows[2] = [make_text_colored(i, 'yellow') for i in table1_rows[2]] | ||
table1_rows[3] = [make_text_colored(i, 'blue') for i in table1_rows[3]] | ||
table1_rows[4] = [make_text_colored(i, 'magenta') for i in table1_rows[4]] | ||
table1_rows[5] = [make_text_colored(i, 'bold_red') for i in table1_rows[5]] | ||
|
||
print() | ||
print_table(table1_rows, table1_headers) | ||
|
||
print() | ||
print(f'Dependency "{simple_ods[1]}" contains 2 attributes ("year" and') | ||
print('"position") in its context and means the following: in the context') | ||
print('of one year and one position the constancy of percents is observed.') | ||
print('That is, in those tuples in which the year and position are the same,') | ||
print('the same percent value is observed.') | ||
|
||
print() | ||
print('The following table shows these observations.') | ||
|
||
table2_headers = list(table1_headers) | ||
table2_rows = [list(str(i) for i in r) for r in table.values] | ||
|
||
table2_rows[0] = [make_text_colored(i, 'red') for i in table2_rows[0]] | ||
table2_rows[1] = [make_text_colored(i, 'green') for i in table2_rows[1]] | ||
table2_rows[2] = [make_text_colored(i, 'yellow') for i in table2_rows[2]] | ||
table2_rows[3] = [make_text_colored(i, 'blue') for i in table2_rows[3]] | ||
table2_rows[4] = [make_text_colored(i, 'magenta') for i in table2_rows[4]] | ||
table2_rows[5] = [make_text_colored(i, 'cyan') for i in table2_rows[5]] | ||
|
||
print() | ||
print_table(table2_rows, table2_headers) | ||
|
||
print() | ||
print('Consider the following two tables. In the first, dependency') | ||
print(f'"{simple_ods[1]}" continues to exist. But in the second one no') | ||
print('longer exists, since it is violated in third tuple, where the pair') | ||
print('(2020, director) corresponds to 20%.') | ||
|
||
table3_headers = list(table1_headers) | ||
table3_rows = list(table2_rows) | ||
|
||
table3_rows.insert(0, table3_rows[0]) | ||
table3_rows.insert(0, table3_rows[0]) | ||
|
||
print() | ||
print_table(table3_rows, table3_headers, f'Dependency "{simple_ods[1]}" continues to exist:') | ||
|
||
table4_headers = list(table1_headers) | ||
table4_rows = list(table3_rows) | ||
|
||
error_tuple = [str(i) for i in table.values[0]] | ||
error_tuple[2] = '20%' | ||
error_tuple = [make_text_colored(i, 'bold_underline_red') for i in error_tuple] | ||
|
||
table4_rows[2] = error_tuple | ||
|
||
print() | ||
print_table(table4_rows, table4_headers, f'Dependency "{simple_ods[1]}" no longer exists:') | ||
|
||
if __name__ == '__main__': | ||
algo = desbordante.od.algorithms.Fastod() | ||
algo.load_data(table=(TABLE, ',', True)) | ||
algo.execute(time_limit=TIME_LIMIT_SECONDS) | ||
|
||
asc_ods = algo.get_asc_ods() | ||
desc_ods = algo.get_desc_ods() | ||
simple_ods = algo.get_simple_ods() | ||
|
||
table = pandas.read_csv(TABLE) | ||
|
||
print_data_frame(table) | ||
print() | ||
print_attribute_symbols(table) | ||
print() | ||
print_desc_ods_with_comments(desc_ods) | ||
print() | ||
print_asc_ods_with_comments(asc_ods, table) | ||
print() | ||
print_simple_ods_with_comments(simple_ods, table) |