Differences
This shows you the differences between two versions of the page.
Next revision | Previous revision | ||
ivci:nuva-utils [2024/01/05 13:48] – created fkaag | ivci:nuva-utils [2025/04/25 09:03] (current) – fkaag | ||
---|---|---|---|
Line 1: | Line 1: | ||
====== Python utilities to handle NUVA ====== | ====== Python utilities to handle NUVA ====== | ||
- | A [[https://github.com/fkaag71/ | + | [[https://pypi.org/project/ |
- | The supported functions are so far: | + | It can be installed with command: |
- | < | + | < |
- | get_nuva_version() | + | pip install nuva-utils |
</ | </ | ||
- | Returns the version index for the last publication of NUVA. | ||
+ | The supported functions are so far: | ||
<code python> | <code python> | ||
- | get_nuva(version) | + | def nuva_version(): |
- | </ | + | """ |
- | Uploads in the current | + | |
+ | """ | ||
+ | def nuva_core_graph(): | ||
+ | """ | ||
+ | Returns the core graph of NUVA as a RDFLib graph | ||
+ | :return: the core graph | ||
+ | """ | ||
+ | def nuva_add_codes_to_graph(g, | ||
+ | """ | ||
+ | Adds the alignments for an external code system. | ||
- | <code python> | + | g: The graph where the alignments are to be added |
- | split_nuva() | + | codesystem: The code system of the aligments |
- | </ | + | |
- | From the uploaded **nuva_ivci.rdf** file, creates | + | """ |
- | * **nuva_core.ttl** including | + | def nuva_add_lang(g,lang): |
- | * **nuva_lang_XX.ttl** includes | + | """ |
- | * **nuva_refcode_YYY.ttl** includes | + | Adds a language graph to a base graph |
+ | """ | ||
+ | def nuva_get_vaccines(g, | ||
+ | """ | ||
+ | Return a Dict of all NUVA vaccines and their properties | ||
+ | """ | ||
+ | def nuva_translate(g, | ||
+ | """ | ||
+ | Extracts from a graph the translation across 2 languages | ||
+ | """ | ||
+ | def nuva_optimize(g,codesystem,onlyAbstract): | ||
+ | """ | ||
+ | Determines the optimal mapping of a code system to NUVA, either full or limited to abstract vaccines. | ||
+ | Returns a dictionary with three items: | ||
+ | - bestcodes, a dictionary of all NUVA concepts | ||
+ | - revcodes, a dictionary of all codes in the code system | ||
+ | - metrics, the computed metrics of the code system | ||
- | <code python> | + | For each NUVA concept, bestcodes is formed by: |
- | refturtle_to_map(code) | + | - label: the English label of the concept |
- | </ | + | - isAbstract: whether the concept is abstract |
- | Starting from the **nuva_refcode_YYY.ttl** file for the given code, creates a simple CSV file **nuva_refcode_YYY.csv** with alignments between | + | - nbequiv: |
+ | - blur: the number of concepts covered by the narrowest codes for the NUVA concept. If nbequiv is not 0, blur should be 1 | ||
+ | - codes: the list of codes with the given blur | ||
- | <code python> | + | For each code in the code system, revcodes is formed by: |
- | map_to_turtle(code) | + | - label: the English label of the corresponding NUVA concept |
- | </code> | + | - cardinality: |
- | Assuming that the **nuva_refcode_YYY.csv** file has been copied to work file **nuva_code_YYY.csv**, | + | - may: the list of these NUVA concepts |
+ | - blur: the number of NUVA concepts | ||
+ | - best: the list of these NUVA concepts, that is a subset of " | ||
- | <code python> | + | The metrics is formed by: |
- | query_core(q) | + | - completeness: |
+ | - precision: the inverse of the average blur over all the codes in the code system, when using the most optimal one for each concept. | ||
+ | - redundancy: for the NUVA concepts that have exact alignments in the code system, the average number of such alignments. | ||
+ | """ | ||
</ | </ | ||
- | Runs a SPARQL query q against the core graph loaded from **nuva_core.ttl** | ||
- | <code python> | + | Here an example of use: |
- | query_code(q, | + | - Retrieve the NUVA version |
- | </ | + | - Retrieve the NUVA core graph |
- | Runs a SPARQL query q against a graph formed by merging **nuva_core.ttl** and the work file **nuva_code_YYY.ttl**, | + | - Complement it with ATC alignments |
+ | - Complement it with French labels | ||
+ | - Display the list of vaccines | ||
+ | - Display | ||
+ | - Determine | ||
+ | <code Python> | ||
+ | import os | ||
+ | import nuva_utils | ||
+ | from pathlib import Path | ||
+ | from nuva_utils.nuva_utils import * | ||
+ | # Here the main program - Adapt the work directory to your environment | ||
+ | os.chdir(str(Path.home())+"/ | ||
+ | version = nuva_version() | ||
+ | print(version) | ||
+ | |||
+ | g = nuva_core_graph() | ||
+ | print ("Core graph loaded" | ||
+ | |||
+ | codes = [] | ||
+ | csv_file = open(" | ||
+ | reader = csv.DictReader(csv_file, | ||
+ | codesystem = reader.fieldnames[0] | ||
+ | for row in reader: | ||
+ | codes.append(row) | ||
+ | |||
+ | nuva_add_codes_to_graph(g, | ||
+ | nuva_add_lang(g,' | ||
+ | vaccines = nuva_get_vaccines(g,' | ||
+ | print(vaccines) | ||
+ | trans = nuva_translate(g,' | ||
+ | print(trans) | ||
+ | eval_codes = nuva_optimize(g, | ||
+ | bestcodes = eval_codes[' | ||
+ | revcodes = eval_codes[' | ||
+ | metrics = eval_codes[' | ||
+ | |||
+ | rev_fname = f" | ||
+ | best_fname= f" | ||
+ | metrics_fname=f" | ||
+ | |||
+ | print (" | ||
+ | best_file = open(best_fname,' | ||
+ | best_writer = csv.writer(best_file, | ||
+ | best_writer.writerow([" | ||
+ | for nuva_code in bestcodes: | ||
+ | best_writer.writerow([nuva_code, | ||
+ | bestcodes[nuva_code][' | ||
+ | best_file.close | ||
+ | |||
+ | print (" | ||
+ | rev_file = open(rev_fname,' | ||
+ | rev_writer = csv.writer(rev_file, | ||
+ | rev_writer.writerow([codesystem," | ||
+ | for extcode in revcodes: | ||
+ | rev_writer.writerow([extcode, | ||
+ | revcodes[extcode][' | ||
+ | revcodes[extcode][' | ||
+ | rev_file.close | ||
+ | |||
+ | nbnuva = len(bestcodes) | ||
+ | nbcodes = len(revcodes) | ||
+ | |||
+ | print (f" | ||
+ | print (f" | ||
+ | print (" | ||
+ | print (f" | ||
+ | print (" | ||
+ | print (" | ||
+ | </ |