Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision | ||
ivci:nuva-utils [2024/01/05 14:21] – fkaag | ivci:nuva-utils [2025/04/25 09:03] (current) – fkaag | ||
---|---|---|---|
Line 1: | Line 1: | ||
====== Python utilities to handle NUVA ====== | ====== Python utilities to handle NUVA ====== | ||
- | A [[https://github.com/fkaag71/ | + | [[https://pypi.org/project/ |
- | The supported functions are so far: | + | It can be installed with command: |
- | < | + | < |
- | get_nuva_version() | + | pip install nuva-utils |
</ | </ | ||
- | Returns the version index for the last publication of NUVA. | ||
+ | The supported functions are so far: | ||
<code python> | <code python> | ||
- | get_nuva(version) | + | def nuva_version(): |
- | </ | + | """ |
- | Uploads in the current | + | |
+ | """ | ||
+ | def nuva_core_graph(): | ||
+ | """ | ||
+ | Returns the core graph of NUVA as a RDFLib graph | ||
+ | :return: the core graph | ||
+ | """ | ||
+ | def nuva_add_codes_to_graph(g, | ||
+ | """ | ||
+ | Adds the alignments for an external code system. | ||
- | <code python> | + | g: The graph where the alignments are to be added |
- | split_nuva() | + | codesystem: The code system of the aligments |
- | </ | + | |
- | From the uploaded **nuva_ivci.rdf** file, creates | + | """ |
- | * **nuva_core.ttl** including | + | def nuva_add_lang(g,lang): |
- | * **nuva_lang_XX.ttl** includes | + | """ |
- | * **nuva_refcode_YYY.ttl** includes | + | Adds a language graph to a base graph |
+ | """ | ||
+ | def nuva_get_vaccines(g, | ||
+ | """ | ||
+ | Return a Dict of all NUVA vaccines and their properties | ||
+ | """ | ||
+ | def nuva_translate(g, | ||
+ | """ | ||
+ | Extracts from a graph the translation across 2 languages | ||
+ | """ | ||
+ | def nuva_optimize(g,codesystem,onlyAbstract): | ||
+ | """ | ||
+ | Determines the optimal mapping of a code system to NUVA, either full or limited to abstract vaccines. | ||
+ | Returns a dictionary with three items: | ||
+ | - bestcodes, a dictionary of all NUVA concepts | ||
+ | - revcodes, a dictionary of all codes in the code system | ||
+ | - metrics, the computed metrics of the code system | ||
- | <code python> | + | For each NUVA concept, bestcodes is formed by: |
- | refturtle_to_map(code) | + | - label: the English label of the concept |
- | </ | + | - isAbstract: whether the concept is abstract |
- | Starting from the **nuva_refcode_YYY.ttl** file for the given code, creates a simple CSV file **nuva_refcode_YYY.csv** with alignments between | + | - nbequiv: |
+ | - blur: the number of concepts covered by the narrowest codes for the NUVA concept. If nbequiv is not 0, blur should be 1 | ||
+ | - codes: the list of codes with the given blur | ||
- | <code python> | + | For each code in the code system, revcodes is formed by: |
- | map_to_turtle(code) | + | - label: the English label of the corresponding NUVA concept |
- | </code> | + | - cardinality: |
- | Assuming that the **nuva_refcode_YYY.csv** file has been copied to work file **nuva_code_YYY.csv**, | + | - may: the list of these NUVA concepts |
+ | - blur: the number of NUVA concepts | ||
+ | - best: the list of these NUVA concepts, that is a subset of " | ||
- | Note that the refcode file contains the NUVA English labels | + | The metrics is formed by: |
- | + | - completeness: | |
- | <code python> | + | - precision: the inverse of the average blur over all the codes in the code system, when using the most optimal one for each concept. |
- | query_core(q) | + | - redundancy: for the NUVA concepts that have exact alignments in the code system, the average number of such alignments. |
+ | """ | ||
</ | </ | ||
- | Runs a SPARQL query q against the core graph loaded from **nuva_core.ttl** | ||
- | <code python> | + | Here an example of use: |
- | query_code(q, | + | - Retrieve the NUVA version |
- | </ | + | - Retrieve the NUVA core graph |
- | Runs a SPARQL query q against a graph formed by merging **nuva_core.ttl** and the work file **nuva_code_YYY.ttl**, | + | - Complement it with ATC alignments |
+ | - Complement it with French labels | ||
+ | - Display the list of vaccines | ||
+ | - Display | ||
+ | - Determine | ||
+ | <code Python> | ||
+ | import os | ||
+ | import nuva_utils | ||
+ | from pathlib import Path | ||
+ | from nuva_utils.nuva_utils import * | ||
- | An example use sequence is included in the file: | ||
- | <code python> | ||
# Here the main program - Adapt the work directory to your environment | # Here the main program - Adapt the work directory to your environment | ||
os.chdir(str(Path.home())+"/ | os.chdir(str(Path.home())+"/ | ||
- | get_nuva(get_nuva_version()) | + | version = nuva_version() |
- | split_nuva() | + | print(version) |
- | refturtle_to_map(" | + | |
- | shutil.copyfile(" | + | |
- | map_to_turtle(" | + | |
- | q1 = """ | + | g = nuva_core_graph() |
- | # All vaccines against smallpox | + | print ("Core graph loaded") |
- | SELECT ?vcode ?vl WHERE { | + | |
- | ?dis rdfs: | + | |
- | ?dis rdfs:label " | + | |
- | ?vac rdfs: | + | |
- | ?vac rdfs:label ?vl . | + | |
- | ?vac skos: | + | |
- | ?vac nuvs: | + | |
- | ?val nuvs: | + | |
- | } | + | |
- | """ | + | |
- | res = query_core(q1) | + | |
- | for row in res: | + | |
- | | + | |
- | q2=""" | + | codes = [] |
- | # List CVX Codes | + | csv_file = open("NUVA_refcode_ATC.csv",' |
- | | + | reader = csv.DictReader(csv_file, |
- | ?vac rdfs: | + | codesystem = reader.fieldnames[0] |
- | ?vac skos:notation ?nuva . | + | for row in reader: |
- | | + | |
- | ?code rdfs: | + | |
- | ?code skos: | + | nuva_add_codes_to_graph(g, |
- | ?vac rdfs:label $lvac | + | nuva_add_lang(g,' |
- | | + | vaccines = nuva_get_vaccines(g,' |
- | """ | + | print(vaccines) |
- | res=query_code(q2,"CVX") | + | trans = nuva_translate(g,' |
- | for row in res: | + | print(trans) |
- | print (f"CVX {row.cvx} | + | eval_codes = nuva_optimize(g, |
- | </code> | + | bestcodes = eval_codes[' |
+ | revcodes = eval_codes[' | ||
+ | metrics = eval_codes[' | ||
+ | |||
+ | rev_fname = f" | ||
+ | best_fname= f" | ||
+ | metrics_fname=f" | ||
+ | |||
+ | print ("Create best codes report | ||
+ | best_file = open(best_fname,' | ||
+ | best_writer | ||
+ | best_writer.writerow([" | ||
+ | for nuva_code | ||
+ | | ||
+ | bestcodes[nuva_code][' | ||
+ | best_file.close | ||
+ | |||
+ | print ("Create reverse codes report " | ||
+ | rev_file | ||
+ | rev_writer = csv.writer(rev_file, | ||
+ | rev_writer.writerow([codesystem," | ||
+ | for extcode in revcodes: | ||
+ | rev_writer.writerow([extcode, | ||
+ | revcodes[extcode][' | ||
+ | revcodes[extcode][' | ||
+ | rev_file.close | ||
+ | nbnuva = len(bestcodes) | ||
+ | nbcodes = len(revcodes) | ||
+ | |||
+ | print (f" | ||
+ | print (f" | ||
+ | print (" | ||
+ | print (f" | ||
+ | print (" | ||
+ | print (" | ||
+ | </ |