Differences

This shows you the differences between two versions of the page.

--- ivci:nuva-utils [2024/03/20 11:47] – fkaag
+++ ivci:nuva-utils [2025/04/25 09:03] (current) – fkaag
@@ Line 1: / Line 1: @@
 ====== Python utilities to handle NUVA ======
-A [[https://github.com/fkaag71/nuva-utils/tree/master/NUVA%20Utils|library available on GitHub]] allows to retrieve and explore NUVA. It is a work in progress, that will be progressively enriched to provide metrics on code systems based upon their mapping to NUVA codes.
+[[https://pypi.org/project/nuva-utils/|nuva_utils]] is Python package available from the PyPi repository.
-The supported functions are so far:
+It can be installed with command:
-<code python>
+<code>
-get_nuva_version()
+pip install nuva-utils
 </code>
-Returns the version index for the last publication of NUVA.
+The supported functions are so far:
 <code python>
-get_nuva(version)
+def nuva_version():
-</code>
+    """
-Uploads in the current directory the referenced version in RDF/XML format as **nuva_ans.rdf**, and creates a rebased version **nuva_ivci.rdf**.
+    Returns the current version of the NUVA graph available from https://ivci.org/nuva
+    """
+def nuva_core_graph():
+    """
+    Returns the core graph of NUVA as a RDFLib graph
+    :return: the core graph
+    """
+def nuva_add_codes_to_graph(g,codesystem,codes):
+    """
+    Adds the alignments for an external code system.
-<code python>
+    g: The graph where the alignments are to be added
-split_nuva()
+    codesystem: The code system of the aligments
-</code>
+    codes: an array of Dict objects, such as {'CVX':'CVX-219','NUVA':'VAC1188')}
-From the uploaded **nuva_ivci.rdf** file, creates a split version as a collection of files in RDF/Turtle format:
+    """
-  * **nuva_core.ttl** including the concepts for vaccines, valences, target diseases and their labels in English
+def nuva_add_lang(g,lang):
-  * **nuva_lang_XX.ttl** includes all translations for language XX
+    """
-  * **nuva_refcode_YYY.ttl** includes the concepts and the NUVA alignments for code system YYY
+    Adds a language graph to a base graph
+    """
+def nuva_get_vaccines(g,lang,onlyAbstract= False):
+    """
+    Return a Dict of all NUVA vaccines and their properties
+    """
+def nuva_translate(g,lang1,lang2):
+    """
+    Extracts from a graph the translation across 2 languages
+    """
+def nuva_optimize(g,codesystem,onlyAbstract):
+    """
+    Determines the optimal mapping of a code system to NUVA, either full or limited to abstract vaccines.
+    Returns a dictionary with three items:
+    - bestcodes, a dictionary of all NUVA concepts
+    - revcodes, a dictionary of all codes in the code system
+    - metrics, the computed metrics of the code system
-<code python>
+    For each NUVA concept, bestcodes is formed by:
-refturtle_to_map(code)
+    - label: the English label of the concept
-</code>
+    - isAbstract: whether the concept is abstract
-Starting from the **nuva_refcode_YYY.ttl** file for the given code, creates a simple CSV file **nuva_refcode_YYY.csv** with alignments between the given code and NUVA.
+    - nbequiv: the number of codes that match exactly the NUVA concept
+    - blur: the number of concepts covered by the narrowest codes for the NUVA concept. If nbequiv is not 0, blur should be 1
+    - codes: the list of codes with the given blur
-<code python>
+    For each code in the code system, revcodes is formed by:
-map_to_turtle(code)
+    - label: the English label of the corresponding NUVA concept
+    - cardinality: the number of NUVA concepts covered by the given code
+    - may: the list of these NUVA concepts
+    - blur: the number of NUVA concepts for which the given code is the best possible one
+    - best: the list of these NUVA concepts, that is a subset of "may"
+    The metrics is formed by:
+    - completeness: the share of NUVA concepts that can be represented by a code, even roughly
+    - precision: the inverse of the average blur over all the codes in the code system, when using the most optimal one for each concept.
+    - redundancy: for the NUVA concepts that have exact alignments in the code system, the average number of such alignments.
+    """
 </code>
-Assuming that the **nuva_refcode_YYY.csv** file has been copied to work file **nuva_code_YYY.csv**, then edited for enhancing the alignments, creates a Turtle work file **nuva_code_YYY.ttl** for further processing.
-Note that the refcode file contains the NUVA English labels of vaccines for convenience, but these are not required nor processed from the work code file.
+Here an example of use:
+  - Retrieve the NUVA version
+  - Retrieve the NUVA core graph
+  - Complement it with ATC alignments
+  - Complement it with French labels
+  - Display the list of vaccines
+  - Display a translation table from English to French
+  - Determine the best possible mapping from and to ATC and the corresponding metrics
-<code python>
+<code Python>
-query_core(q)
+import os
-</code>
+import nuva_utils
-Runs a SPARQL query q against the core graph loaded from **nuva_core.ttl**
+from pathlib import Path
+from nuva_utils.nuva_utils import *
-<code python>
+# Here the main program - Adapt the work directory to your environment
-query_code(q,code)
-</code>
-Runs a SPARQL query q against a graph formed by merging **nuva_core.ttl** and the work file **nuva_code_YYY.ttl**, thus allowing to run checks and measures on the alignment.
+os.chdir(str(Path.home())+"/Documents/NUVA")
+version = nuva_version()
+print(version)
-<code>
+g = nuva_core_graph()
-eval_code(code)
+print ("Core graph loaded")
-</code>
-Produces the metrics for a code system, given a nuva_code_YYY.csv file for alignments.
-Subproducts are:
+codes = []
-  * nuva_reverse_YYY.csv : file with all NUVA codes matching a given external code
+csv_file = open("NUVA_refcode_ATC.csv",'r',encoding="utf-8-sig",newline='')
-  * nuva_best_YYY.csv: file with the best possible external code for a given NUVA code
+reader = csv.DictReader(csv_file,delimiter=';')
+codesystem = reader.fieldnames[0]
+for row in reader:
+    codes.append(row)
-An example use sequence is included in the file:
+nuva_add_codes_to_graph(g,codesystem,codes)
-<code python>
+nuva_add_lang(g,'fr')
-# Here the main program - Adapt the work directory to your environment
+vaccines = nuva_get_vaccines(g,'fr')
+print(vaccines)
+trans = nuva_translate(g,'en','fr')
+print(trans)
+eval_codes = nuva_optimize(g,codesystem,False)
+bestcodes = eval_codes['bestcodes']
+revcodes = eval_codes['revcodes']
+metrics = eval_codes['metrics']
-os.chdir(str(Path.home())+"/Documents/NUVA")
+rev_fname = f"{codesystem}/nuva_reverse_{codesystem}.csv"
-get_nuva(get_nuva_version())
+best_fname= f"{codesystem}/nuva_best_{codesystem}.csv"
-split_nuva()
+metrics_fname=f"{codesystem}/nuva_metrics_{codesystem}.txt"
-refturtle_to_map("CVX")
-shutil.copyfile("nuva_refcode_CVX.csv","nuva_code_CVX.csv")
-map_to_turtle("CVX")
-q1 = """
+print ("Create best codes report "+best_fname)
-   # All vaccines against smallpox
+best_file = open(best_fname,'w',encoding="utf-8",newline='')
-    SELECT ?vcode ?vl WHERE {
+best_writer = csv.writer(best_file, delimiter=';')
-    ?dis rdfs:subClassOf nuva:Disease .
+best_writer.writerow(["NUVA","Label","IsAbstract",f"Best {codesystem}", "Equiv"])
-    ?dis rdfs:label "Smallpox-Monkeypox"@en .
+for nuva_code in bestcodes:
-    ?vac rdfs:subClassOf nuva:Vaccine .
+    best_writer.writerow([nuva_code,bestcodes[nuva_code]['label'],bestcodes[nuva_code]['isAbstract'],
-    ?vac rdfs:label ?vl .
+                            bestcodes[nuva_code]['codes'], bestcodes[nuva_code]['nbequiv']])
-    ?vac skos:notation ?vcode .
+best_file.close
-    ?vac nuvs:containsValence ?val .
-    ?val nuvs:prevents ?dis
- }
-"""
-res = query_core(q1)
-for row in res:
-    print (f"{row.vcode} - {row.vl}")
-q2="""
+print ("Create reverse codes report "+rev_fname)
-    # List CVX Codes
+rev_file = open(rev_fname,'w',encoding="utf-8",newline='')
-    SELECT ?cvx ?nuva ?lvac WHERE {
+rev_writer = csv.writer(rev_file, delimiter=';')
-    ?vac rdfs:subClassOf nuva:Vaccine .
+rev_writer.writerow([codesystem,"Label","Cardinality","May code", "Blur", "Best code for"])
-    ?vac skos:notation ?nuva .
+for extcode in revcodes:
-    ?vac skos:exactMatch ?code .
+    rev_writer.writerow([extcode,revcodes[extcode]['label'],
-    ?code rdfs:subClassOf nuva:CVX .
+                            revcodes[extcode]['cardinality'],revcodes[extcode]['may'],
-    ?code skos:notation ?cvx .
+                            revcodes[extcode]['blur'], revcodes[extcode]['best']])
-    ?vac rdfs:label $lvac
+rev_file.close
-    }
-"""
-res=query_code(q2,"CVX")
-for row in res:
-    print (f"CVX {row.cvx} = {row.nuva} - {row.lvac}")
-</code>
+nbnuva = len(bestcodes)
+nbcodes = len(revcodes)
+print (f"NUVA version :{version}\n")
+print (f"Number of NUVA concepts : {nbnuva}")
+print ("Completeness: {:.1%}\n".format(metrics['completeness']))
+print (f"Number of aligned codes: {nbcodes}")
+print ("Precision: {:.1%}".format(metrics['precision']))
+print ("Redundancy: {:.3}".format(metrics['redundancy']))
+</code>