Refactor create_hierarchy_dict function to improve readability and add documentation
Browse files
isco.py
CHANGED
|
@@ -1,71 +1,42 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
import csv
|
| 4 |
|
| 5 |
|
| 6 |
-
def create_hierarchy_dict(
|
| 7 |
"""
|
| 8 |
-
Creates a dictionary where keys are nodes and values are sets of parent nodes representing the hierarchy of the ISCO-08
|
|
|
|
|
|
|
| 9 |
|
| 10 |
Args:
|
| 11 |
-
-
|
| 12 |
|
| 13 |
Returns:
|
| 14 |
- A dictionary where keys are ISCO-08 unit codes and values are sets of their parent codes.
|
| 15 |
"""
|
| 16 |
isco_hierarchy = {}
|
| 17 |
|
| 18 |
-
with open(
|
| 19 |
reader = csv.DictReader(csvfile)
|
| 20 |
for row in reader:
|
| 21 |
# Extract unit group level code (4 digits)
|
| 22 |
unit_code = row["unit"].zfill(4)
|
|
|
|
| 23 |
# Extract the parent code for the unit group level, which is the minor group level (3 digits)
|
| 24 |
-
|
| 25 |
|
| 26 |
# Add the unit code to the hierarchy with its parent code
|
| 27 |
-
isco_hierarchy[unit_code] = {
|
| 28 |
|
| 29 |
-
# Additionally, we can add the parent's parent codes
|
| 30 |
-
|
| 31 |
major_code = unit_code[0]
|
| 32 |
-
sub_major_code = unit_code[:2]
|
| 33 |
isco_hierarchy[unit_code].update({major_code, sub_major_code})
|
| 34 |
|
| 35 |
return isco_hierarchy
|
| 36 |
|
| 37 |
|
| 38 |
-
def create_hierarchy_tree(hierarchy_dict: dict) -> tuple:
|
| 39 |
-
"""
|
| 40 |
-
Builds the hierarchy tree and a mapping from name to ISCO code.
|
| 41 |
-
|
| 42 |
-
Args:
|
| 43 |
-
- hierarchy_dict: A dictionary representing the hierarchical structure.
|
| 44 |
-
|
| 45 |
-
Returns:
|
| 46 |
-
- tree: A dictionary representing the hierarchical structure.
|
| 47 |
-
- code_to_node: A dictionary mapping from ISCO code to node name.
|
| 48 |
-
"""
|
| 49 |
-
|
| 50 |
-
tree = defaultdict(lambda: {"children": [], "parent": None})
|
| 51 |
-
code_to_node = {}
|
| 52 |
-
|
| 53 |
-
def add_node(parent_code, node):
|
| 54 |
-
code = node["name"].split("=")[0].strip()
|
| 55 |
-
code_to_node[code] = node["name"]
|
| 56 |
-
tree[code]["parent"] = parent_code
|
| 57 |
-
if parent_code:
|
| 58 |
-
tree[parent_code]["children"].append(code)
|
| 59 |
-
for child in node.get("children", []):
|
| 60 |
-
add_node(code, child)
|
| 61 |
-
|
| 62 |
-
add_node(None, hierarchy_dict) # Root node has no parent
|
| 63 |
-
return tree, code_to_node
|
| 64 |
-
|
| 65 |
-
|
| 66 |
# Example usage:
|
| 67 |
-
# hierarchy_dict =
|
| 68 |
-
# tree, code_to_node = create_hierarchy_tree(hierarchy_dict)
|
| 69 |
# print(hierarchy)
|
| 70 |
-
# print(code_to_node)
|
| 71 |
-
# print(tree)
|
|
|
|
| 1 |
+
"""This module provides functionality for creating a hierarchy tree and a mapping from ISCO code to node name."""
|
| 2 |
+
|
| 3 |
import csv
|
| 4 |
|
| 5 |
|
| 6 |
+
def create_hierarchy_dict(file: str) -> dict:
|
| 7 |
"""
|
| 8 |
+
Creates a dictionary where keys are nodes and values are sets of parent nodes representing the group level hierarchy of the ISCO-08 structure.
|
| 9 |
+
The function assumes that the input CSV file has a column named 'unit' with the 4-digit ISCO-08 codes.
|
| 10 |
+
A csv file with the ISCO-08 structure can be downloaded from the International Labour Organization (ILO) at [https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08 EN.csv](https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08%20EN.csv)
|
| 11 |
|
| 12 |
Args:
|
| 13 |
+
- file: A string representing the path to the CSV file containing the 4-digit ISCO-08 codes.
|
| 14 |
|
| 15 |
Returns:
|
| 16 |
- A dictionary where keys are ISCO-08 unit codes and values are sets of their parent codes.
|
| 17 |
"""
|
| 18 |
isco_hierarchy = {}
|
| 19 |
|
| 20 |
+
with open(file, newline="") as csvfile:
|
| 21 |
reader = csv.DictReader(csvfile)
|
| 22 |
for row in reader:
|
| 23 |
# Extract unit group level code (4 digits)
|
| 24 |
unit_code = row["unit"].zfill(4)
|
| 25 |
+
|
| 26 |
# Extract the parent code for the unit group level, which is the minor group level (3 digits)
|
| 27 |
+
minor_code = unit_code[0:3]
|
| 28 |
|
| 29 |
# Add the unit code to the hierarchy with its parent code
|
| 30 |
+
isco_hierarchy[unit_code] = {minor_code}
|
| 31 |
|
| 32 |
+
# Additionally, we can add the parent's parent codes at the sub-major group level (2 digits) and major group level (1 digit)
|
| 33 |
+
sub_major_code = unit_code[0:2]
|
| 34 |
major_code = unit_code[0]
|
|
|
|
| 35 |
isco_hierarchy[unit_code].update({major_code, sub_major_code})
|
| 36 |
|
| 37 |
return isco_hierarchy
|
| 38 |
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
# Example usage:
|
| 41 |
+
# hierarchy_dict = create_hierarchy_dict("ISCO_structure.csv")
|
|
|
|
| 42 |
# print(hierarchy)
|
|
|
|
|
|