Python utilities to handle NUVA
nuva_utils is Python package available from the PyPi repository.
It can be installed with command:
pip install nuva-utils
The supported functions are so far:
def nuva_version(): """ Returns the current version of the NUVA graph available from https://ivci.org/nuva """ def nuva_core_graph(): """ Returns the core graph of NUVA as a RDFLib graph :return: the core graph """ def nuva_add_codes_to_graph(g,codesystem,codes): """ Adds the alignments for an external code system. g: The graph where the alignments are to be added codesystem: The code system of the aligments codes: an array of Dict objects, such as {'CVX':'CVX-219','NUVA':'VAC1188')} """ def nuva_add_lang(g,lang): """ Adds a language graph to a base graph """ def nuva_get_vaccines(g,lang,onlyAbstract= False): """ Return a Dict of all NUVA vaccines and their properties """ def nuva_translate(g,lang1,lang2): """ Extracts from a graph the translation across 2 languages """ def nuva_optimize(g,codesystem,onlyAbstract): """ Determines the optimal mapping of a code system to NUVA, either full or limited to abstract vaccines. Returns a dictionary with three items: - bestcodes, a dictionary of all NUVA concepts - revcodes, a dictionary of all codes in the code system - metrics, the computed metrics of the code system For each NUVA concept, bestcodes is formed by: - label: the English label of the concept - isAbstract: whether the concept is abstract - nbequiv: the number of codes that match exactly the NUVA concept - blur: the number of concepts covered by the narrowest codes for the NUVA concept. If nbequiv is not 0, blur should be 1 - codes: the list of codes with the given blur For each code in the code system, revcodes is formed by: - label: the English label of the corresponding NUVA concept - cardinality: the number of NUVA concepts covered by the given code - may: the list of these NUVA concepts - blur: the number of NUVA concepts for which the given code is the best possible one - best: the list of these NUVA concepts, that is a subset of "may" The metrics is formed by: - completeness: the share of NUVA concepts that can be represented by a code, even roughly - precision: the inverse of the average blur over all the codes in the code system, when using the most optimal one for each concept. - redundancy: for the NUVA concepts that have exact alignments in the code system, the average number of such alignments. """
Here an example of use:
- Retrieve the NUVA version
- Retrieve the NUVA core graph
- Complement it with ATC alignments
- Complement it with French labels
- Display the list of vaccines
- Display a translation table from English to French
- Determine the best possible mapping from and to ATC and the corresponding metrics
import os import nuva_utils from pathlib import Path from nuva_utils.nuva_utils import * # Here the main program - Adapt the work directory to your environment os.chdir(str(Path.home())+"/Documents/NUVA") version = nuva_version() print(version) g = nuva_core_graph() print ("Core graph loaded") codes = [] csv_file = open("NUVA_refcode_ATC.csv",'r',encoding="utf-8-sig",newline='') reader = csv.DictReader(csv_file,delimiter=';') codesystem = reader.fieldnames[0] for row in reader: codes.append(row) nuva_add_codes_to_graph(g,codesystem,codes) nuva_add_lang(g,'fr') vaccines = nuva_get_vaccines(g,'fr') print(vaccines) trans = nuva_translate(g,'en','fr') print(trans) eval_codes = nuva_optimize(g,codesystem,False) bestcodes = eval_codes['bestcodes'] revcodes = eval_codes['revcodes'] metrics = eval_codes['metrics'] rev_fname = f"{codesystem}/nuva_reverse_{codesystem}.csv" best_fname= f"{codesystem}/nuva_best_{codesystem}.csv" metrics_fname=f"{codesystem}/nuva_metrics_{codesystem}.txt" print ("Create best codes report "+best_fname) best_file = open(best_fname,'w',encoding="utf-8",newline='') best_writer = csv.writer(best_file, delimiter=';') best_writer.writerow(["NUVA","Label","IsAbstract",f"Best {codesystem}", "Equiv"]) for nuva_code in bestcodes: best_writer.writerow([nuva_code,bestcodes[nuva_code]['label'],bestcodes[nuva_code]['isAbstract'], bestcodes[nuva_code]['codes'], bestcodes[nuva_code]['nbequiv']]) best_file.close print ("Create reverse codes report "+rev_fname) rev_file = open(rev_fname,'w',encoding="utf-8",newline='') rev_writer = csv.writer(rev_file, delimiter=';') rev_writer.writerow([codesystem,"Label","Cardinality","May code", "Blur", "Best code for"]) for extcode in revcodes: rev_writer.writerow([extcode,revcodes[extcode]['label'], revcodes[extcode]['cardinality'],revcodes[extcode]['may'], revcodes[extcode]['blur'], revcodes[extcode]['best']]) rev_file.close nbnuva = len(bestcodes) nbcodes = len(revcodes) print (f"NUVA version :{version}\n") print (f"Number of NUVA concepts : {nbnuva}") print ("Completeness: {:.1%}\n".format(metrics['completeness'])) print (f"Number of aligned codes: {nbcodes}") print ("Precision: {:.1%}".format(metrics['precision'])) print ("Redundancy: {:.3}".format(metrics['redundancy']))