Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision | |||
ivci:nuva-utils [2024/03/20 13:21] – fkaag | ivci:nuva-utils [2025/04/25 09:03] (current) – fkaag | ||
---|---|---|---|
Line 1: | Line 1: | ||
====== Python utilities to handle NUVA ====== | ====== Python utilities to handle NUVA ====== | ||
- | A [[https://github.com/fkaag71/ | + | [[https://pypi.org/project/ |
- | The supported functions are so far: | + | It can be installed with command: |
- | < | + | < |
- | get_nuva_version() | + | pip install nuva-utils |
</ | </ | ||
- | Returns the version index for the last publication of NUVA. | ||
+ | The supported functions are so far: | ||
<code python> | <code python> | ||
- | get_nuva(version) | + | def nuva_version(): |
- | </ | + | """ |
- | Uploads in the current | + | |
+ | """ | ||
+ | def nuva_core_graph(): | ||
+ | """ | ||
+ | Returns the core graph of NUVA as a RDFLib graph | ||
+ | :return: the core graph | ||
+ | """ | ||
+ | def nuva_add_codes_to_graph(g, | ||
+ | """ | ||
+ | Adds the alignments for an external code system. | ||
- | <code python> | + | g: The graph where the alignments are to be added |
- | split_nuva() | + | codesystem: The code system of the aligments |
- | </ | + | |
- | From the uploaded **nuva_ivci.rdf** file, creates | + | """ |
- | * **nuva_core.ttl** including | + | def nuva_add_lang(g,lang): |
- | * **nuva_lang_XX.ttl** includes | + | """ |
- | * **nuva_refcode_YYY.ttl** includes | + | Adds a language graph to a base graph |
+ | """ | ||
+ | def nuva_get_vaccines(g, | ||
+ | """ | ||
+ | Return a Dict of all NUVA vaccines and their properties | ||
+ | """ | ||
+ | def nuva_translate(g, | ||
+ | """ | ||
+ | Extracts from a graph the translation across 2 languages | ||
+ | """ | ||
+ | def nuva_optimize(g,codesystem,onlyAbstract): | ||
+ | """ | ||
+ | Determines the optimal mapping of a code system to NUVA, either full or limited to abstract vaccines. | ||
+ | Returns a dictionary with three items: | ||
+ | - bestcodes, a dictionary of all NUVA concepts | ||
+ | - revcodes, a dictionary of all codes in the code system | ||
+ | - metrics, the computed metrics of the code system | ||
- | <code python> | + | For each NUVA concept, bestcodes is formed by: |
- | refturtle_to_map(code) | + | - label: the English label of the concept |
- | </ | + | - isAbstract: whether the concept is abstract |
- | Starting from the **nuva_refcode_YYY.ttl** file for the given code, creates a simple CSV file **nuva_refcode_YYY.csv** with alignments between | + | - nbequiv: |
+ | - blur: the number of concepts covered by the narrowest codes for the NUVA concept. If nbequiv is not 0, blur should be 1 | ||
+ | - codes: the list of codes with the given blur | ||
- | <code python> | + | For each code in the code system, revcodes is formed by: |
- | map_to_turtle(code) | + | - label: the English label of the corresponding NUVA concept |
+ | - cardinality: | ||
+ | - may: the list of these NUVA concepts | ||
+ | - blur: the number of NUVA concepts for which the given code is the best possible one | ||
+ | - best: the list of these NUVA concepts, that is a subset of " | ||
+ | |||
+ | The metrics is formed by: | ||
+ | - completeness: | ||
+ | - precision: the inverse of the average blur over all the codes in the code system, when using the most optimal one for each concept. | ||
+ | - redundancy: for the NUVA concepts that have exact alignments in the code system, the average number of such alignments. | ||
+ | """ | ||
</ | </ | ||
- | Assuming that the **nuva_refcode_YYY.csv** file has been copied to work file **nuva_code_YYY.csv**, | ||
- | Note that the refcode file contains | + | Here an example of use: |
+ | - Retrieve | ||
+ | - Retrieve | ||
+ | - Complement it with ATC alignments | ||
+ | - Complement it with French | ||
+ | - Display the list of vaccines | ||
+ | - Display a translation table from English to French | ||
+ | - Determine the best possible mapping | ||
- | < | + | < |
- | query_core(q) | + | import os |
- | </ | + | import nuva_utils |
- | Runs a SPARQL query q against the core graph loaded | + | from pathlib import Path |
+ | from nuva_utils.nuva_utils import | ||
- | <code python> | + | # Here the main program - Adapt the work directory |
- | query_code(q, | + | |
- | </ | + | |
- | Runs a SPARQL query q against a graph formed by merging **nuva_core.ttl** and the work file **nuva_code_YYY.ttl**, | + | |
+ | os.chdir(str(Path.home())+"/ | ||
+ | version = nuva_version() | ||
+ | print(version) | ||
- | < | + | g = nuva_core_graph() |
- | eval_code(code) | + | print ("Core graph loaded" |
- | </ | + | |
- | Produces the metrics for a code system, given a **nuva_code_YYY.csv** file for alignments. | + | |
- | Subproducts are: | + | codes = [] |
- | * **nuva_reverse_YYY.csv** : file with all NUVA codes matching a given external code | + | csv_file = open(" |
- | * **nuva_best_YYY.csv**: file with the best possible external code for a given NUVA code | + | reader = csv.DictReader(csv_file, |
+ | codesystem = reader.fieldnames[0] | ||
+ | for row in reader: | ||
+ | codes.append(row) | ||
- | An example use sequence is included in the file: | + | nuva_add_codes_to_graph(g, |
- | <code python> | + | nuva_add_lang(g,' |
- | # Here the main program - Adapt the work directory to your environment | + | vaccines = nuva_get_vaccines(g,' |
+ | print(vaccines) | ||
+ | trans = nuva_translate(g,' | ||
+ | print(trans) | ||
+ | eval_codes = nuva_optimize(g, | ||
+ | bestcodes = eval_codes[' | ||
+ | revcodes = eval_codes[' | ||
+ | metrics = eval_codes[' | ||
- | os.chdir(str(Path.home())+"/Documents/ | + | rev_fname = f"{codesystem}/nuva_reverse_{codesystem}.csv" |
- | get_nuva(get_nuva_version()) | + | best_fname= f"{codesystem}/ |
- | split_nuva() | + | metrics_fname=f"{codesystem}/ |
- | refturtle_to_map(" | + | |
- | shutil.copyfile(" | + | |
- | map_to_turtle("CVX") | + | |
- | q = """ | + | print ("Create best codes report |
- | # All vaccines against smallpox | + | best_file = open(best_fname,' |
- | SELECT ?vcode ?vl WHERE { | + | best_writer = csv.writer(best_file, |
- | ?dis rdfs: | + | best_writer.writerow([" |
- | ?dis rdfs: | + | for nuva_code |
- | ?vac rdfs: | + | |
- | ?vac rdfs:label ?vl . | + | bestcodes[nuva_code][' |
- | ?vac skos: | + | best_file.close |
- | ?vac nuvs: | + | |
- | ?val nuvs: | + | |
- | } | + | |
- | """ | + | |
- | res = query_core(q) | + | |
- | for row in res: | + | |
- | print (f"{row.vcode} - {row.vl}" | + | |
- | res = eval_code("CVX") | + | print ("Create reverse codes report |
- | print ("Completeness {: | + | rev_file = open(rev_fname,' |
- | print ("Precision {:.1%} " | + | rev_writer = csv.writer(rev_file, delimiter=';') |
+ | rev_writer.writerow([codesystem, | ||
+ | for extcode in revcodes: | ||
+ | rev_writer.writerow([extcode, | ||
+ | revcodes[extcode][' | ||
+ | revcodes[extcode][' | ||
+ | rev_file.close | ||
- | </ | + | nbnuva = len(bestcodes) |
+ | nbcodes = len(revcodes) | ||
+ | print (f" | ||
+ | print (f" | ||
+ | print (" | ||
+ | print (f" | ||
+ | print (" | ||
+ | print (" | ||
+ | </ |