From 3a51d9653b49ad3d94b2bd3afc3eff51d50c5c1d Mon Sep 17 00:00:00 2001
From: Daniele Di Bari <ddibari0@login01.leonardo.local>
Date: Thu, 19 Dec 2024 10:48:32 +0100
Subject: [PATCH] Included all functions in cin_seff.py

---
 cin_seff.py | 321 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 303 insertions(+), 18 deletions(-)

diff --git a/cin_seff.py b/cin_seff.py
index 737b657..4c28b29 100755
--- a/cin_seff.py
+++ b/cin_seff.py
@@ -13,21 +13,306 @@ Report the efficiency of jobs on CINECA Clusters
 ===================================================================
 
 .. versionadded:: 0.1
-
-TODO: Add in-depth discussion.
-
-Helper functions
-----------------
-.. autofunction:: run
-.. autofunction:: strclean
-.. autofunction:: get_args
 """
 
 import subprocess
 import argparse
-import strman
-import sys
-import occupancy_units
+
+#In[0] strman
+
+# String Cleaning
+
+def clean(s, strip=True, rm_all_wspaces=False, rplc={}, crms=["\""," "], case="lower"):
+    """ Clean string from specific characters
+
+    Parameters
+    ----------
+    s : string
+        Input string to clean.
+    strip : bool, optional
+        Strip the string (i.e. remove whitespaces from the begin and the end of the input string). 
+        Default: True.
+    rm_all_wspaces : bool, optional
+        Remove all whitespaces from the string. 
+        Default: False.
+    strip : dict, optional 
+        Pairs of characters to replace. Syntax: {old_char: new_char}. 
+        Default: {}.
+    cmrs : list, optional 
+        Characters to remove from the input string. 
+        Default: ["\"", " "].  
+    case : string, optional
+        Select if return the string with all lowecase characters (case='lower'), all uppercase characters (case='upper'), the first character of the string to an uppercase letter and all other alphabets to lowercase, or initial character in each word to uppercase and the subsequent characters to lowercase (case='title'). 
+        Default: 'lower'.
+
+    Returns
+    -------
+    s : string
+        Cleaned string.
+    """
+    
+    def remove_chars(sin, chars):
+        for c in chars:
+            if c in sin:
+                sin = sin.replace(c,"")
+        return sin
+    
+    if strip:
+        s = s.strip()
+            
+    # Remove all withespaces
+    _wspaces = (" ", "\t", "\v", "\n", "\r", "\f")
+    if rm_all_wspaces:
+        s = remove_chars(s, _wspaces)
+
+    # Replace specific characters pairs
+    for c_old, c_new in rplc.items():
+        s = s.replace(c_old, c_new)
+            
+    # Remove specific characters
+    s = remove_chars(s, crms)        
+            
+    # Set case of string characters
+    if case != "":
+        case = case.lower()
+        if case == "lower":
+            s = s.lower()
+        elif case == "upper":
+            s = s.upper()
+        elif case == "capitalize":
+            s.capitalize()
+        elif case == "title":
+            s.title()
+        else:
+            raise Exception(f"\nWARNING! Unknown parameter: '{case}'. Allowed params are: 'lower', 'upper', 'capitalize', 'title'.")
+
+    return s
+
+## Expand slurm lists
+
+def xpnd_lrange(snums, unsafe=False, verbose=True):
+    num_ranges = snums.split(",")
+    
+    if len(num_ranges) > 0:
+        num_list = []
+        for nr in num_ranges:
+            if "-" in nr:
+                limits = nr.split("-")
+                if len(limits) != 2:
+                    raise Exception(f"Problem with range: {nr}")
+                n_digits = len(limits[0])
+                if len(limits[1]) != n_digits:
+                    raise Exception(f"Different number of digits for range limits: {limits[0]} -> {limits[1]}")
+                start = int(limits[0])
+                end   = int(limits[1])
+                for i in range(start, end+1):
+                    num_list.append("{0:0{1}d}".format(i,n_digits))
+            else:
+                num_list.append(nr)
+        return num_list
+    else:
+        out_message = f"No range to expand found in '{snums}'"
+        if unsafe:
+            if verbose:
+                print(f"WARNING! {out_message}. The string will be used anyway.")
+            return [int(snums)]  
+        else:
+            raise Exception(out_message)      
+
+def xpnd_first_str_range(srange, unsafe=False, verbose=True):
+    if srange.find("[") >= 0:
+        limits = [srange.find("["), srange.find("]")]
+        prefix = srange[:limits[0]]
+        suffix = srange[limits[1]+1:]
+        snums  = srange[limits[0]+1:limits[1]]
+        lnums  = xpnd_lrange(snums, unsafe=unsafe, verbose=verbose)
+        lrange = []
+        for n in lnums:
+            lrange.append(prefix + n + suffix)
+        return lrange
+    else:
+        out_message = f"No range to expand found in '{srange}'"
+        if unsafe:
+            if verbose:
+                print(f"WARNING! {out_message}. The string will be used anyway.")
+            return [srange]
+        else:
+            raise Exception(out_message)
+    
+def xpnd_full_ranges(sranges, unsafe=False, verbose=True):
+    n_of_expansions = sranges.count("[")
+    if n_of_expansions != sranges.count("]"):
+        raise Exception(f"Different number of '[' and ']' in: {sranges}")
+    
+    if n_of_expansions > 0:
+        lranges = xpnd_first_str_range(sranges, unsafe=unsafe, verbose=verbose)
+        for i in range(n_of_expansions-1):
+            tmp = []
+            for srange in lranges:
+                tmp.extend(xpnd_first_str_range(srange, unsafe=unsafe, verbose=verbose))
+            lranges = tmp
+        return lranges
+    else:
+        out_message = f"No range to expand found in '{sranges}'"
+        if unsafe:
+            if verbose:
+                print(f"WARNING! {out_message}. The string will be used anyway.")
+            return [sranges]
+        else:
+            raise Exception(out_message)
+
+def xpnd_grps_of_ranges(sranges, delimiter=",", unsafe=False, verbose=True):
+    outside_range = True
+    start = 0
+    end   = -1
+    list_of_ranges = [] 
+    for i, c in enumerate(sranges):
+        if c == delimiter and outside_range:
+            end = i
+            list_of_ranges.append(sranges[start:end])
+            start = i+1
+        elif   c == "[":
+            outside_range = False
+        elif c == "]":
+            outside_range = True
+    list_of_ranges.append(sranges[start:])
+    
+    full_list = []
+    for r in list_of_ranges:
+        full_list.extend(xpnd_full_ranges(r, unsafe=unsafe, verbose=verbose))
+    return full_list
+
+#In[1] occupancy_units
+
+# Definition of the allowed units and their recognized names within the system
+allowed_units = dict(
+    B   = dict(exp=0,  base=2, descr="Byte",     valid_names=["",  "b",   "byte"]),
+    # Base 2
+    KiB = dict(exp=10, base=2, descr="Kibibyte", valid_names=["k", "kib", "kibibytes", "kbytes"]),
+    MiB = dict(exp=20, base=2, descr="Mebibyte", valid_names=["m", "mib", "mebibytes", "mbytes"]),
+    GiB = dict(exp=30, base=2, descr="Gibibyte", valid_names=["g", "gib", "gibibytes", "gbytes"]),
+    TiB = dict(exp=40, base=2, descr="Tebibyte", valid_names=["t", "tib", "tebibytes", "tbytes"]),
+    PiB = dict(exp=50, base=2, descr="Pebibyte", valid_names=["p", "pib", "pebibytes", "pbytes"]),
+    # Base 10
+    kB = dict(exp=3,  base=10, descr="Kilobyte", valid_names=["kb", "kilobytes"]),
+    MB = dict(exp=6,  base=10, descr="Megabyte", valid_names=["mb", "megabytes"]),
+    GB = dict(exp=9,  base=10, descr="Gigabyte", valid_names=["gb", "gigabytes"]),
+    TB = dict(exp=12, base=10, descr="Terabyte", valid_names=["tb", "terabytes"]),
+    PB = dict(exp=15, base=10, descr="Petabyte", valid_names=["pb", "petabytes"]),
+)
+
+def check(unit):
+    """ 
+    Check if the provided unit is recognized and return its corresponding reference name.
+
+    Parameters
+    ----------
+    unit : str
+        The name, or acronim, of the unit to check. This should be a string that represents
+        a unit of measurement (e.g., "kib", "kibibytes", "kb", "kilobytes").
+
+    Returns
+    -------
+    unit_name : str
+        The reference name of the unit, which is used as the key in the `allowed_units` dictionary. 
+        If the unit is not recognized, the function will raise an exception indicating that the unit 
+        is invalid.
+    
+    Raises
+    ------
+    ValueError
+        If the unit is not recognized, a `ValueError` exception will be raised.   
+    """
+
+    unit_key = "unrecognized_unit"
+
+    # Check if unit is allowed and return its corresponding key
+    for unit_name, unit_features in allowed_units.items():
+        if unit.lower() in unit_features["valid_names"]:
+            unit_key = unit_name
+            break
+    
+    if unit_key ==  "unrecognized_unit":
+        # Unit not found - Raise exception
+        error_log = f"Not recognized unit '{unit}'\nAllowed values (case insensitive):\n"
+        for unit_name, unit_features in allowed_units.items():
+            error_log += f" - {unit_name}: {unit_features['valid_names']}\n"
+        raise ValueError(error_log)
+
+    return unit_key
+
+
+def conversion_factor(old_unit, new_unit): 
+    """ 
+    Get the conversion factor between two allowed units, such that the conversion follows the formula:
+    `new_value = old_value * conversion_factor`.
+
+    Parameters
+    ----------
+    old_unit : str
+        The name of the unit to convert from. 
+
+    new_unit : str
+        The name of the unit to convert to.
+
+    Returns
+    -------
+    conversion_factor : float
+        The factor to be used for converting from `old_unit` to `new_unit`.
+
+    Notes
+    -----
+    - The function assumes that both `old_unit` and `new_unit` are valid and recognized in the allowed units list.
+    - A `ValueError` will be raised if either `old_unit` or `new_unit` is unrecognized.
+    - The conversion factor is determined based on the relationship between the two units, as defined in the conversion system.
+    """
+
+    # Check is the input unit names are valid
+    old_key = check(old_unit)
+    new_key = check(new_unit)
+
+    # Get the conversion factor
+    old_base = allowed_units[old_key]["base"]
+    old_exp  = allowed_units[old_key]["exp"]
+    new_base = allowed_units[new_key]["base"]
+    new_exp  = allowed_units[new_key]["exp"]
+    if old_base == new_base:
+        return old_base**(old_exp - new_exp)
+    else:
+        return old_base**old_exp / new_base**new_exp
+    
+def convert_num(num, old_unit, new_unit):
+    """ 
+    Convert a numeric value from one unit (`old_unit`) to another (`new_unit`) using the formula:
+    `new_value = num * conversion_factor`, where `conversion_factor` is obtained using the function 
+    defined earlier for unit conversion.
+
+    Parameters
+    ----------
+    num : float
+        The numeric value of the `old_unit` to convert. 
+       
+    old_unit : str
+        The name of the unit to convert from. 
+        
+    new_unit : str
+        The name of the unit to convert to.
+
+    Returns
+    -------
+    converted_num : float
+        The numeric value of the converted unit, corresponding to `new_unit`.
+        
+    Notes
+    -----
+    - The function assumes that both `old_unit` and `new_unit` are valid and recognized in the allowed units list.
+    - A `ValueError` will be raised if either `old_unit` or `new_unit` is unrecognized.
+    - The conversion factor is determined based on the relationship between the two units, as defined in the conversion system.
+    """
+    
+    return num * conversion_factor(old_unit, new_unit)
+
+#In[2] cin_seff
 
 def run(cmd, timeout=60, splitted_cmd=False):
     """ 
@@ -168,7 +453,7 @@ def split_num_and_unit(full_number):
     unit   = full_number[-1].lower()
     if ord(unit) > 57:
         # not numeric char for unit
-        occupancy_units.check(unit)
+        check(unit)
         val = float(full_number[:-1])
     elif ord(unit) > 47:
         # unit is a digit
@@ -185,9 +470,9 @@ def main():
     if args.verbosity > 2:
         print(f"PARSED VARIABLES:\njobids=={args.jobids}\nconsumed_budget=={args.consumed_budget}\nnode_distribution=={args.node_distribution}\nverbosity=={args.verbosity}")
 
-    groups_of_jobids_ranges = strman.clean(args.jobids, rplc={" ":",", "\n":","})
+    groups_of_jobids_ranges = clean(args.jobids, rplc={" ":",", "\n":","})
     
-    jobids = strman.xpnd_grps_of_ranges(groups_of_jobids_ranges, unsafe=True, verbose=False)
+    jobids = xpnd_grps_of_ranges(groups_of_jobids_ranges, unsafe=True, verbose=False)
     
     for jobid in jobids:
         cmd = "sacct -P -n -a --format JobID,User,Account,State,AllocCPUS,REQMEM,TotalCPU,ElapsedRaw,MaxRSS,ExitCode,NodeList,AllocNodes,Elapsed,Partition,QOS -j {jobid}"
@@ -238,18 +523,18 @@ Nodes:               {data["AllocNodes"]}"""
                 step_info = line.split('|')
                 if len(step_info[8]) > 0:
                     maxrss, unit = split_num_and_unit(step_info[8]) 
-                    step_MAXRSS_kiB = occupancy_units.convert_num(maxrss, unit, "kib")
+                    step_MAXRSS_kiB = convert_num(maxrss, unit, "kib")
                     if step_MAXRSS_kiB > data["TotalRSS_kiB"]:
                         data["TotalRSS_kiB"] = step_MAXRSS_kiB
                 else:
                     step_exitcode = step_info[9]
                     if step_exitcode == "0:0":
                         raise Exception(f"Step termited correctly (exit codfe {step_exitcode}), but no MAXRSS recorded.")
-            data["TotalRSS_GiB"] = occupancy_units.convert_num(data["TotalRSS_kiB"], "kib", "gib")
+            data["TotalRSS_GiB"] = convert_num(data["TotalRSS_kiB"], "kib", "gib")
             
             reqmem, unit = split_num_and_unit(data["REQMEM"])
-            data["REQMEM_kiB"] = occupancy_units.convert_num(reqmem, unit, "kib")
-            data["REQMEM_GiB"] = occupancy_units.convert_num(reqmem, unit, "gib")
+            data["REQMEM_kiB"] = convert_num(reqmem, unit, "kib")
+            data["REQMEM_GiB"] = convert_num(reqmem, unit, "gib")
             
             data["MEMEfficiency"] = 100 * data["TotalRSS_kiB"] / data["REQMEM_kiB"]
 
-- 
GitLab