Python utilities to handle NUVA

nuva_utils is Python package available from the PyPi repository.

It can be installed with command:

pip install nuva-utils

The supported functions are so far:

def nuva_version():
    """
    Returns the current version of the NUVA graph available from https://ivci.org/nuva
    """
def nuva_core_graph():
    """
    Returns the core graph of NUVA as a RDFLib graph
    :return: the core graph
    """
def nuva_add_codes_to_graph(g,codesystem,codes):
    """
    Adds the alignments for an external code system.
 
    g: The graph where the alignments are to be added
    codesystem: The code system of the aligments
    codes: an array of Dict objects, such as {'CVX':'CVX-219','NUVA':'VAC1188')}
    """
def nuva_add_lang(g,lang):
    """ 
    Adds a language graph to a base graph
    """     
def nuva_get_vaccines(g,lang,onlyAbstract= False):
    """
    Return a Dict of all NUVA vaccines and their properties
    """
def nuva_translate(g,lang1,lang2):
    """
    Extracts from a graph the translation across 2 languages
    """
def nuva_optimize(g,codesystem,onlyAbstract):
    """
    Determines the optimal mapping of a code system to NUVA, either full or limited to abstract vaccines.
    Returns a dictionary with three items:
    - bestcodes, a dictionary of all NUVA concepts
    - revcodes, a dictionary of all codes in the code system
    - metrics, the computed metrics of the code system
 
    For each NUVA concept, bestcodes is formed by:
    - label: the English label of the concept
    - isAbstract: whether the concept is abstract
    - nbequiv: the number of codes that match exactly the NUVA concept
    - blur: the number of concepts covered by the narrowest codes for the NUVA concept. If nbequiv is not 0, blur should be 1
    - codes: the list of codes with the given blur
 
    For each code in the code system, revcodes is formed by:
    - label: the English label of the corresponding NUVA concept
    - cardinality: the number of NUVA concepts covered by the given code
    - may: the list of these NUVA concepts
    - blur: the number of NUVA concepts for which the given code is the best possible one
    - best: the list of these NUVA concepts, that is a subset of "may"
 
    The metrics is formed by:
    - completeness: the share of NUVA concepts that can be represented by a code, even roughly
    - precision: the inverse of the average blur over all the codes in the code system, when using the most optimal one for each concept.
    - redundancy: for the NUVA concepts that have exact alignments in the code system, the average number of such alignments.
    """

Here an example of use:

Retrieve the NUVA version
Retrieve the NUVA core graph
Complement it with ATC alignments
Complement it with French labels
Display the list of vaccines
Display a translation table from English to French
Determine the best possible mapping from and to ATC and the corresponding metrics

import os
import nuva_utils
from pathlib import Path
from nuva_utils.nuva_utils import *
 
# Here the main program - Adapt the work directory to your environment
 
os.chdir(str(Path.home())+"/Documents/NUVA")
version = nuva_version()
print(version)
 
g = nuva_core_graph()
print ("Core graph loaded")
 
codes = []
csv_file = open("NUVA_refcode_ATC.csv",'r',encoding="utf-8-sig",newline='')
reader = csv.DictReader(csv_file,delimiter=';')
codesystem = reader.fieldnames[0]
for row in reader:
    codes.append(row)
 
nuva_add_codes_to_graph(g,codesystem,codes)
nuva_add_lang(g,'fr')
vaccines = nuva_get_vaccines(g,'fr')
print(vaccines)
trans = nuva_translate(g,'en','fr')
print(trans)
eval_codes = nuva_optimize(g,codesystem,False)
bestcodes = eval_codes['bestcodes']
revcodes = eval_codes['revcodes']
metrics = eval_codes['metrics']
 
rev_fname = f"{codesystem}/nuva_reverse_{codesystem}.csv"
best_fname= f"{codesystem}/nuva_best_{codesystem}.csv"
metrics_fname=f"{codesystem}/nuva_metrics_{codesystem}.txt"
 
print ("Create best codes report "+best_fname)
best_file = open(best_fname,'w',encoding="utf-8",newline='')
best_writer = csv.writer(best_file, delimiter=';')
best_writer.writerow(["NUVA","Label","IsAbstract",f"Best {codesystem}", "Equiv"])
for nuva_code in bestcodes:
    best_writer.writerow([nuva_code,bestcodes[nuva_code]['label'],bestcodes[nuva_code]['isAbstract'],
                            bestcodes[nuva_code]['codes'], bestcodes[nuva_code]['nbequiv']])
best_file.close
 
print ("Create reverse codes report "+rev_fname)
rev_file = open(rev_fname,'w',encoding="utf-8",newline='')
rev_writer = csv.writer(rev_file, delimiter=';')
rev_writer.writerow([codesystem,"Label","Cardinality","May code", "Blur", "Best code for"])
for extcode in revcodes:
    rev_writer.writerow([extcode,revcodes[extcode]['label'], 
                            revcodes[extcode]['cardinality'],revcodes[extcode]['may'], 
                            revcodes[extcode]['blur'], revcodes[extcode]['best']])
rev_file.close
 
nbnuva = len(bestcodes)
nbcodes = len(revcodes)
 
print (f"NUVA version :{version}\n")
print (f"Number of NUVA concepts : {nbnuva}")
print ("Completeness: {:.1%}\n".format(metrics['completeness']))
print (f"Number of aligned codes: {nbcodes}")
print ("Precision: {:.1%}".format(metrics['precision']))
print ("Redundancy: {:.3}".format(metrics['redundancy']))